-
Couldn't load subscription status.
- Fork 2.8k
[NPU] Model serialization/deserialization without weights copies #31939
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 38 commits
d717f92
55adf77
8f3e760
ea58a81
7800811
7393f90
f2e1e64
553b23e
f93ef2b
63117b1
a2362fe
5a49dea
5b34fc2
57201e3
38bb262
cf12d7e
a27cddf
bbad999
5313bb6
7a097d4
932627e
8a165a4
9e2010f
731dafd
1152407
7a74cf4
fde9c7a
5a2ad5a
9f322d0
0b8f541
695f227
5bba959
742ee91
afca0f8
bf11a90
38ca6e6
352f853
9fd1b2b
5fdf849
24aa96d
7f892aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| // Copyright (C) 2018-2025 Intel Corporation | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
| // | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "openvino/core/runtime_attribute.hpp" | ||
|
|
||
| namespace intel_npu { | ||
|
|
||
| /** | ||
| * @brief Attribute containing the memory address of a weights buffer and the size of the buffer in bytes. | ||
| * @details Used as part of the serialization/deserialization algorithms in order to allow processing models without | ||
| * copying weights. | ||
| */ | ||
| class WeightsPointerAttribute : public ov::RuntimeAttribute { | ||
| public: | ||
| OPENVINO_RTTI("WeightsPointerAttribute", "0", RuntimeAttribute); | ||
|
|
||
| WeightsPointerAttribute() = delete; | ||
|
|
||
| WeightsPointerAttribute(const void* pointer, const size_t size) | ||
| : memory_pointer(reinterpret_cast<size_t>(pointer)), | ||
| byte_size(size) {} | ||
|
|
||
| /** | ||
| * @note The names of the attributes have been kept short in order to save some memory (there may be a lot of | ||
| * "ov::Constant" nodes in a model). Also, two characters should be sufficient to avoid collisions. "mp" stands for | ||
| * "memory pointer", "ms" for "memory size". | ||
| */ | ||
| bool visit_attributes(ov::AttributeVisitor& visitor) override { | ||
| visitor.on_attribute("mp", memory_pointer); | ||
| visitor.on_attribute("ms", byte_size); | ||
| return true; | ||
| } | ||
|
|
||
| size_t memory_pointer; | ||
| size_t byte_size; | ||
| }; | ||
|
|
||
| } // namespace intel_npu |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,6 +75,11 @@ class writer_streambuf final : public std::streambuf { | |
| } | ||
| } | ||
|
|
||
| pos_type seekpos(pos_type pos, std::ios_base::openmode which) override { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new serialization algorithm is based on |
||
| writeIt = startIt + pos; | ||
| return pos; | ||
| } | ||
|
|
||
| OutputIt startIt; | ||
| OutputIt writeIt; | ||
| }; | ||
|
|
||
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will tune this later to see which value yields the best performance. For now, we assume 0 is the best candidate (only weights pointers & sizes are stored).