Skip to content
Open
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
d717f92
Implementing the NPU plugin deserializer, it doesn't do anything spec…
razvanapetroaie Sep 2, 2025
55adf77
Addinge a serializer that does nothing special
razvanapetroaie Sep 18, 2025
8f3e760
Adding a new config option
razvanapetroaie Sep 18, 2025
ea58a81
Starting to refactor the plugin-driver adapter
razvanapetroaie Sep 18, 2025
7800811
Done refactoring
razvanapetroaie Sep 22, 2025
7393f90
Tweaking the deserializer. First weights-copy solution that seems to …
razvanapetroaie Sep 24, 2025
f2e1e64
Adding the same extensions used by the driver-compiler adapter
razvanapetroaie Sep 24, 2025
553b23e
Storing the first serializer attempt
razvanapetroaie Sep 25, 2025
f93ef2b
Second attempt
razvanapetroaie Sep 25, 2025
63117b1
First solution that seems to be working
razvanapetroaie Sep 26, 2025
a2362fe
Merge remote-tracking branch 'upstream/master' into CVS-169982-weight…
razvanapetroaie Oct 2, 2025
5a49dea
Linux time measurements
razvanapetroaie Oct 2, 2025
5b34fc2
windowstime measurements
razvanapetroaie Oct 2, 2025
57201e3
Renaming the config
razvanapetroaie Oct 6, 2025
38bb262
Adding a new config option for setting the weights size threshold
razvanapetroaie Oct 6, 2025
cf12d7e
Revert "windowstime measurements"
razvanapetroaie Oct 6, 2025
a27cddf
SERIALIZATION_WEIGHTS_SIZE_THRESHOLD ammend
razvanapetroaie Oct 6, 2025
bbad999
Renamed to VCLSerializer
razvanapetroaie Oct 6, 2025
5313bb6
Added a weights size threshold
razvanapetroaie Oct 6, 2025
7a097d4
Adding one more time measurement
razvanapetroaie Oct 6, 2025
932627e
Avoiding one model clone
razvanapetroaie Oct 6, 2025
8a165a4
Shorter tags for the new attribute
razvanapetroaie Oct 7, 2025
9e2010f
Serializer - writing custon data using the OV interface
razvanapetroaie Oct 7, 2025
731dafd
Moving the deserializer code
razvanapetroaie Oct 7, 2025
1152407
Merge remote-tracking branch 'upstream/master' into CVS-169982-weight…
razvanapetroaie Oct 7, 2025
7a74cf4
Comments, code style
razvanapetroaie Oct 8, 2025
fde9c7a
Removing measurements
razvanapetroaie Oct 8, 2025
5a2ad5a
Test tweak
razvanapetroaie Oct 9, 2025
9f322d0
Merge remote-tracking branch 'upstream/master' into CVS-169982-weight…
razvanapetroaie Oct 9, 2025
0b8f541
more test tweak
razvanapetroaie Oct 10, 2025
695f227
Merge remote-tracking branch 'upstream/master' into CVS-169982-weight…
razvanapetroaie Oct 15, 2025
5bba959
just comments and attribute tags
razvanapetroaie Oct 15, 2025
742ee91
virtual dtor
razvanapetroaie Oct 15, 2025
afca0f8
Basic test for weightless serializer
razvanapetroaie Oct 16, 2025
bf11a90
reduced copy-pasta in the "serialize_model_to_stream" functions
razvanapetroaie Oct 16, 2025
38ca6e6
just a comment
razvanapetroaie Oct 16, 2025
352f853
Reusing the weightless writer -> significant time boost if weights are
razvanapetroaie Oct 23, 2025
9fd1b2b
Merge remote-tracking branch 'upstream/master' into CVS-169982-weight…
razvanapetroaie Oct 27, 2025
5fdf849
post-merge build fix
razvanapetroaie Oct 27, 2025
24aa96d
ubuntu measurements
razvanapetroaie Oct 27, 2025
7f892aa
windows measurements
razvanapetroaie Oct 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/plugins/intel_npu/src/al/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ target_link_libraries(${TARGET_NAME}
PUBLIC
openvino::npu_logger_utils
openvino::runtime::dev
openvino_xml_util
)

set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1426,4 +1426,18 @@ struct USE_BASE_MODEL_SERIALIZER final : OptionBase<USE_BASE_MODEL_SERIALIZER, b
}
};

struct SERIALIZATION_WEIGHTS_SIZE_THRESHOLD final : OptionBase<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD, size_t> {
static std::string_view key() {
return ov::intel_npu::serialization_weights_size_threshold.name();
}

static size_t defaultValue() {
return 0;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will tune this later to see which value yields the best performance. For now, we assume 0 is the best candidate (only weights pointers & sizes are stored).

}

static OptionMode mode() {
return OptionMode::RunTime;
}
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,21 @@ static constexpr ov::Property<bool> weightless_blob{"NPU_WEIGHTLESS_BLOB"};
*
* The base serializer is the OV implementation of the "XmlSerializer" without any extensions. All weights are copied in
* a separate buffer. By turning this off, the NPU extension of the serializer is enabled. This allows optimizing the
* process by avoiding copies into a separate weights buffer. However, this solution may be less reliable.
*
* @note This option doesn't actually do anything right now, it has been registered in advance.
* process by reducing the amount of weights that will be copied in a separate buffer. However, this solution may be
* less reliable.
*/
static constexpr ov::Property<bool> use_base_model_serializer{"NPU_USE_BASE_MODEL_SERIALIZER"};

/**
* @brief [Only for NPU Plugin]
* Type: size_t. Default is 0.
*
* Effective only if "use_base_model_serializer" is set to false. All "ov::Constant" buffers smaller than this value
* (byte size) will be copied in a separate buffer. The rest of the weights will be reconstructed at deserialization
* time using buffer pointers.
*/
static constexpr ov::Property<size_t> serialization_weights_size_threshold{"NPU_SERIALIZATION_WEIGHTS_SIZE_THRESHOLD"};

/**
* @brief [Experimental, only for NPU Plugin]
* Type: integer.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/core/runtime_attribute.hpp"

namespace intel_npu {

/**
* @brief Attribute containing the memory address of a weights buffer and the size of the buffer in bytes.
* @details Used as part of the serialization/deserialization algorithms in order to allow processing models without
* copying weights.
*/
class WeightsPointerAttribute : public ov::RuntimeAttribute {
public:
OPENVINO_RTTI("WeightsPointerAttribute", "0", RuntimeAttribute);

WeightsPointerAttribute() = delete;

WeightsPointerAttribute(const void* pointer, const size_t size)
: memory_pointer(reinterpret_cast<size_t>(pointer)),
byte_size(size) {}

/**
* @note The names of the attributes have been kept short in order to save some memory (there may be a lot of
* "ov::Constant" nodes in a model). Also, two characters should be sufficient to avoid collisions. "mp" stands for
* "memory pointer", "ms" for "memory size".
*/
bool visit_attributes(ov::AttributeVisitor& visitor) override {
visitor.on_attribute("mp", memory_pointer);
visitor.on_attribute("ms", byte_size);
return true;
}

size_t memory_pointer;
size_t byte_size;
};

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@

#pragma once

#include "intel_npu/common/filtered_config.hpp"
#include "intel_npu/common/igraph.hpp"

namespace intel_npu {

class ICompilerAdapter {
public:
virtual std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
const Config& config) const = 0;
const FilteredConfig& config) const = 0;

/**
* @brief Compiles the model, weights separation enabled.
Expand All @@ -27,7 +28,8 @@ class ICompilerAdapter {
* "icompiler.hpp".
* @return A "WeightlessGraph" type of object.
*/
virtual std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const = 0;
virtual std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config) const = 0;

/**
* @brief Parses the provided binary objects and returns a wrapper over the resulted L0 handles. The model may also
Expand All @@ -44,11 +46,12 @@ class ICompilerAdapter {
*/
virtual std::shared_ptr<IGraph> parse(
ov::Tensor mainBlob,
const Config& config,
const FilteredConfig& config,
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const = 0;

virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config) const = 0;
virtual uint32_t get_version() const = 0;
virtual std::vector<std::string> get_supported_options() const = 0;
virtual bool is_option_supported(std::string optname) const = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ class writer_streambuf final : public std::streambuf {
}
}

pos_type seekpos(pos_type pos, std::ios_base::openmode which) override {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new serialization algorithm is based on ov::pass::StreamSerialize which uses a few seek operation (unlike the old algorithm).

writeIt = startIt + pos;
return pos;
}

OutputIt startIt;
OutputIt writeIt;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "intel_npu/config/config.hpp"
#include "intel_npu/utils/logger/logger.hpp"
#include "intel_npu/utils/zero/zero_init.hpp"
#include "vcl_serializer.hpp"
#include "ze_graph_ext_wrappers.hpp"

namespace intel_npu {
Expand All @@ -18,17 +19,20 @@ class DriverCompilerAdapter final : public ICompilerAdapter {
public:
DriverCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config) const override;

std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;
std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config) const override;

std::shared_ptr<IGraph> parse(
ov::Tensor mainBlob,
const Config& config,
const FilteredConfig& config,
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config) const override;

std::vector<std::string> get_supported_options() const override;

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,20 @@ class PluginCompilerAdapter final : public ICompilerAdapter {
public:
PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config) const override;

std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;
std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
const FilteredConfig& config) const override;

std::shared_ptr<IGraph> parse(
ov::Tensor mainBlob,
const Config& config,
const FilteredConfig& config,
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model,
const FilteredConfig& config) const override;

std::vector<std::string> get_supported_options() const override;

Expand Down
Loading
Loading