openvinotoolkit · DariaMityagina · Aug 12, 2025 · Aug 12, 2025 · Aug 13, 2025 · Aug 15, 2025
@@ -42,11 +42,11 @@ class ICompilerAdapter {
      * @return A wrapper over the corresponding L0 graph handles (multiple only if "initBlobs" has been provided). This
      * wrapper further details the compiled model and brings it in a state closer to execution.
      */
-    virtual std::shared_ptr<IGraph> parse(
-        ov::Tensor mainBlob,
-        const Config& config,
-        std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
-        const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const = 0;
+    virtual std::shared_ptr<IGraph> parse(ov::Tensor mainBlob,
+                                          const Config& config,
+                                          std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
+                                          const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
+                                          std::optional<int64_t> batchSize = std::nullopt) const = 0;
 
     virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
     virtual uint32_t get_version() const = 0;

@@ -36,6 +36,8 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
 
     virtual void set_argument_value(uint32_t argi, const void* argv) const = 0;
 
+    virtual void set_metadata(NetworkMetadata metadata) = 0;
+
     virtual void initialize(const Config& config) = 0;
 
     virtual ~IGraph() = default;

@@ -25,11 +25,11 @@ class DriverCompilerAdapter final : public ICompilerAdapter {
 
     std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;
 
-    std::shared_ptr<IGraph> parse(
-        ov::Tensor mainBlob,
-        const Config& config,
-        std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
-        const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
+    std::shared_ptr<IGraph> parse(ov::Tensor mainBlob,
+                                  const Config& config,
+                                  std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
+                                  const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
+                                  std::optional<int64_t> batchSize = std::nullopt) const override;
 
     ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
 

@@ -35,6 +35,8 @@ class Graph : public IGraph {
 
     void set_argument_value(uint32_t argi, const void* argv) const override;
 
+    void set_metadata(NetworkMetadata metadata) override;
+
     void initialize(const Config& config) override;
 
     const NetworkMetadata& get_metadata() const override;

@@ -23,11 +23,11 @@ class PluginCompilerAdapter final : public ICompilerAdapter {
 
     std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;
 
-    std::shared_ptr<IGraph> parse(
-        ov::Tensor mainBlob,
-        const Config& config,
-        std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
-        const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
+    std::shared_ptr<IGraph> parse(ov::Tensor mainBlob,
+                                  const Config& config,
+                                  std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
+                                  const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
+                                  std::optional<int64_t> batchSize = std::nullopt) const override;
 
     ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
 

@@ -45,7 +45,8 @@ class ZeGraphExtWrappers {
 
     GraphDescriptor getGraphDescriptor(void* data, size_t size) const;
 
-    NetworkMetadata getNetworkMeta(GraphDescriptor& graphDescriptor) const;
+    NetworkMetadata getNetworkMeta(GraphDescriptor& graphDescriptor,
+                                   std::optional<int64_t> batchSize = std::nullopt) const;
 
     void destroyGraph(GraphDescriptor& graphDescriptor);
 
@@ -70,7 +71,8 @@ class ZeGraphExtWrappers {
     void getMetadata(ze_graph_handle_t graphHandle,
                      uint32_t index,
                      std::vector<IODescriptor>& inputs,
-                     std::vector<IODescriptor>& outputs) const;
+                     std::vector<IODescriptor>& outputs,
+                     std::optional<int64_t> batchSize) const;
 
     void initializeGraphThroughCommandList(ze_graph_handle_t graphHandle, uint32_t commandQueueGroupOrdinal) const;
 

@@ -357,19 +357,19 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compileWS(const std::shared_ptr<o
                                              config);
 }
 
-std::shared_ptr<IGraph> DriverCompilerAdapter::parse(
-    ov::Tensor mainBlob,
-    const Config& config,
-    std::optional<std::vector<ov::Tensor>> initBlobs,
-    const std::optional<std::shared_ptr<const ov::Model>>& model) const {
+std::shared_ptr<IGraph> DriverCompilerAdapter::parse(ov::Tensor mainBlob,
+                                                     const Config& config,
+                                                     std::optional<std::vector<ov::Tensor>> initBlobs,
+                                                     const std::optional<std::shared_ptr<const ov::Model>>& model,
+                                                     std::optional<int64_t> batchSize) const {
     OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");
 
     _logger.debug("parse start");
     auto mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
     _logger.debug("parse end");
 
     OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
-    auto networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc);
+    auto networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc, batchSize);
 
     // exporting the blob when we get it from cache or ov::hint::compiled_blob property
     // shall be available

@@ -42,6 +42,10 @@ Graph::Graph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
     }
 }
 
+void Graph::set_metadata(NetworkMetadata metadata) {
+    _metadata = metadata;
+}
+
 const NetworkMetadata& Graph::get_metadata() const {
     return _metadata;
 }

@@ -249,11 +249,11 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
         _compiler);
 }
 
-std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
-    ov::Tensor mainBlob,
-    const Config& config,
-    std::optional<std::vector<ov::Tensor>> initBlobs,
-    const std::optional<std::shared_ptr<const ov::Model>>& model) const {
+std::shared_ptr<IGraph> PluginCompilerAdapter::parse(ov::Tensor mainBlob,
+                                                     const Config& config,
+                                                     std::optional<std::vector<ov::Tensor>> initBlobs,
+                                                     const std::optional<std::shared_ptr<const ov::Model>>& model,
+                                                     std::optional<int64_t> batchSize) const {
     OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
 
     _logger.debug("parse start");
@@ -264,6 +264,17 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     network.clear();
     network.shrink_to_fit();
 
+    for (auto& in : networkMeta.inputs) {
+        if (in.shapeFromIRModel.has_value() && batchSize.has_value()) {
+            in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
+        }
+    }
+    for (auto& out : networkMeta.outputs) {
+        if (out.shapeFromIRModel.has_value() && batchSize.has_value()) {
+            out.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
+        }
+    }
+
-    for (auto& in : networkMeta.inputs) {
-        if (in.shapeFromIRModel.has_value() && batchSize.has_value()) {
-            in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
-        }
-    }
-    for (auto& out : networkMeta.outputs) {
-        if (out.shapeFromIRModel.has_value() && batchSize.has_value()) {
-            out.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
-        }
-    }
+if (batchSize.has_value()) {
+        for (auto& in : networkMeta.inputs) {
+            if (in.shapeFromIRModel.has_value()) {
+                in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
+            }
+        }
+        for (auto& out : networkMeta.outputs) {
+            if (out.shapeFromIRModel.has_value()) {
+                out.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
+            }
+        }
+}
-    for (auto& in : networkMeta.inputs) {
-        if (in.shapeFromIRModel.has_value() && batchSize.has_value()) {
-            in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
-        }
-    }
-    for (auto& out : networkMeta.outputs) {
-        if (out.shapeFromIRModel.has_value() && batchSize.has_value()) {
-            out.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
-        }
-    }
+if (batchSize.has_value()) {
+        for (auto& in : networkMeta.inputs) {
+            if (in.shapeFromIRModel.has_value()) {
+                in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
+            }
+        }
+        for (auto& out : networkMeta.outputs) {
+            if (out.shapeFromIRModel.has_value()) {
+                out.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
+            }
+        }
+}
     GraphDescriptor mainGraphDesc;
 
     if (_zeGraphExt) {

@@ -434,7 +434,8 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(void* blobData, size_t bl
  * @returns A descriptor object containing the metadata converted in OpenVINO specific structures.
  */
 static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
-                                    const std::optional<ze_graph_argument_metadata_t>& metadata) {
+                                    const std::optional<ze_graph_argument_metadata_t>& metadata,
+                                    std::optional<int64_t> batchSize) {
     auto logger = Logger::global().clone("getIODescriptor");
     ov::element::Type_t precision = zeroUtils::toOVElementType(arg.devicePrecision);
     ov::Shape shapeFromCompiler;
@@ -451,7 +452,9 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
         const auto dynamicDim = std::numeric_limits<uint64_t>::max();
         shapeFromIRModel.reserve(metadata->shape_size);
         for (uint32_t id = 0; id < metadata->shape_size; id++) {
-            if (metadata->shape[id] != dynamicDim) {
+            if (batchSize.has_value() && id == utils::BATCH_AXIS) {
+                shapeFromIRModel.push_back(ov::Dimension(1, batchSize.value()));
+            } else if (metadata->shape[id] != dynamicDim) {
                 shapeFromIRModel.push_back(metadata->shape[id]);
             } else {
                 // lower bound is ignored, so we set it to 1 just to satisfy the Dimension constructor,
@@ -516,7 +519,8 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
 void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
                                      uint32_t index,
                                      std::vector<IODescriptor>& inputs,
-                                     std::vector<IODescriptor>& outputs) const {
+                                     std::vector<IODescriptor>& outputs,
+                                     std::optional<int64_t> batchSize) const {
     if (NotSupportArgumentMetadata(_graphExtVersion)) {
         ze_graph_argument_properties_3_t arg = {};
         _logger.debug("getMetadata - perform pfnGetArgumentProperties3");
@@ -525,10 +529,10 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
 
         switch (arg.type) {
         case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
-            inputs.push_back(getIODescriptor(arg, std::nullopt));
+            inputs.push_back(getIODescriptor(arg, std::nullopt, batchSize));
         } break;
         case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
-            outputs.push_back(getIODescriptor(arg, std::nullopt));
+            outputs.push_back(getIODescriptor(arg, std::nullopt, batchSize));
         } break;
         default: {
             OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ",
@@ -556,10 +560,10 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
 
         switch (arg.type) {
         case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
-            inputs.push_back(getIODescriptor(arg, optionalMetadata));
+            inputs.push_back(getIODescriptor(arg, optionalMetadata, batchSize));
         } break;
         case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
-            outputs.push_back(getIODescriptor(arg, optionalMetadata));
+            outputs.push_back(getIODescriptor(arg, optionalMetadata, batchSize));
         } break;
         default: {
             OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ",
@@ -569,7 +573,8 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
     }
 }
 
-NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescriptor) const {
+NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescriptor,
+                                                   std::optional<int64_t> batchSize) const {
     ze_graph_properties_t graphProperties = {};
     graphProperties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
 
@@ -578,7 +583,7 @@ NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescrip
     THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetProperties", result, _zeroInitStruct->getGraphDdiTable());
     NetworkMetadata meta;
     for (uint32_t index = 0; index < graphProperties.numGraphArgs; ++index) {
-        getMetadata(graphDescriptor._handle, index, meta.inputs, meta.outputs);
+        getMetadata(graphDescriptor._handle, index, meta.inputs, meta.outputs, batchSize);
     }
     // TODO: support this information in CiD [track: E#33479]
     meta.numStreams = 1;

@@ -43,6 +43,11 @@ class MetadataBase {
      */
     virtual std::optional<std::vector<uint64_t>> get_init_sizes() const = 0;
 
+    /**
+     * @returns Batch size. Populated in case of plugin batching.
+     */
+    virtual std::optional<int64_t> get_batch_size() const = 0;
+
     virtual ~MetadataBase() = default;
 
     static std::streampos getFileSize(std::istream& stream);
@@ -101,11 +106,12 @@ constexpr std::string_view MAGIC_BYTES = "OVNPU";
  */
 constexpr uint32_t METADATA_VERSION_2_0{MetadataBase::make_version(2, 0)};
 constexpr uint32_t METADATA_VERSION_2_1{MetadataBase::make_version(2, 1)};
+constexpr uint32_t METADATA_VERSION_2_2{MetadataBase::make_version(2, 2)};
 
 /**
  * @brief Current metadata version.
  */
-constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_2_1};
+constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_2_2};
 
 constexpr uint16_t CURRENT_METADATA_MAJOR_VERSION{MetadataBase::get_major(CURRENT_METADATA_VERSION)};
 constexpr uint16_t CURRENT_METADATA_MINOR_VERSION{MetadataBase::get_minor(CURRENT_METADATA_VERSION)};
@@ -210,6 +216,8 @@ class Metadata<METADATA_VERSION_2_0> : public MetadataBase {
 
     std::optional<std::vector<uint64_t>> get_init_sizes() const override;
 
+    std::optional<int64_t> get_batch_size() const override;
+
     size_t get_metadata_size() const override;
 
 protected:
@@ -242,13 +250,48 @@ class Metadata<METADATA_VERSION_2_1> : public Metadata<METADATA_VERSION_2_0> {
 
     std::optional<std::vector<uint64_t>> get_init_sizes() const override;
 
+    std::optional<int64_t> get_batch_size() const override;
+
     size_t get_metadata_size() const override;
 
 private:
     std::optional<std::vector<uint64_t>> _initSizes;
     uint64_t _numberOfInits = 0;
 };
 
+/**
+ * @brief The version that adds support for batch value storage.
+ */
+template <>
+class Metadata<METADATA_VERSION_2_2> : public Metadata<METADATA_VERSION_2_1> {
+public:
+    Metadata(uint64_t blobSize,
+             std::optional<OpenvinoVersion> ovVersion = std::nullopt,
+             const std::optional<std::vector<uint64_t>> initSizes = std::nullopt,
+             const std::optional<int64_t> batchSize = std::nullopt);
+
+    /**
+     * @details The number of init schedules, along with the size of each init binary object are read in addition to the
+     * information provided by the previous metadata versions.
+     */
+    void read(std::istream& stream) override;
+
+    void read(const ov::Tensor& tensor) override;
+
+    /**
+     * @details The number of init schedules, along with the size of each init binary object are written in addition to
+     * the information registered by the previous metadata versions.
+     */
+    void write(std::ostream& stream) override;
+
+    std::optional<int64_t> get_batch_size() const override;
+
+    size_t get_metadata_size() const override;
+
+private:
+    std::optional<int64_t> _batchSize;
+};
+
 /**
  * @brief Creates a Metadata object.
  *

@@ -11,6 +11,7 @@
 #include "intel_npu/common/itt.hpp"
 #include "intel_npu/config/config.hpp"
 #include "intel_npu/config/options.hpp"
+#include "intel_npu/utils/utils.hpp"
 #include "metadata.hpp"
 #include "openvino/pass/constant_folding.hpp"
 #include "openvino/runtime/properties.hpp"
@@ -90,7 +91,22 @@ void CompiledModel::export_model(std::ostream& stream) const {
 
     auto [blobSizesBeforeVersioning, initBlobSizes] = _graph->export_blob(stream);
 
-    Metadata<CURRENT_METADATA_VERSION>(blobSizesBeforeVersioning, CURRENT_OPENVINO_VERSION, initBlobSizes)
+    std::optional<int64_t> originalBatchSize = std::nullopt;
+    auto metadata = _graph->get_metadata();
+    auto inputMeta = metadata.inputs;
+    for (auto in : inputMeta) {
+        // Plugin batching applied, saving original batch value
+        if (in.shapeFromIRModel.has_value() && in.shapeFromCompiler[intel_npu::utils::BATCH_AXIS] == 1) {
+            originalBatchSize =
+                std::optional(in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS].get_max_length());
+            break;
+        }
+    }
+
+    Metadata<CURRENT_METADATA_VERSION>(blobSizesBeforeVersioning,
+                                       CURRENT_OPENVINO_VERSION,
+                                       initBlobSizes,
+                                       originalBatchSize)
         .write(stream);
 }