Skip to content

Commit a2f5de8

Browse files
Investigate refactoring opportunities for batch management in Plugin and Compiler - review - WIP
1 parent 55fb7d7 commit a2f5de8

File tree

10 files changed

+60
-26
lines changed

10 files changed

+60
-26
lines changed

src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ class ICompilerAdapter {
4646
ov::Tensor mainBlob,
4747
const Config& config,
4848
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
49-
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const = 0;
49+
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
50+
std::optional<ov::Dimension> batchSize = std::nullopt) const = 0;
5051

5152
virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
5253
virtual uint32_t get_version() const = 0;

src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ class DriverCompilerAdapter final : public ICompilerAdapter {
2929
ov::Tensor mainBlob,
3030
const Config& config,
3131
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
32-
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
32+
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
33+
std::optional<ov::Dimension> batchSize = std::nullopt) const override;
3334

3435
ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
3536

src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ class PluginCompilerAdapter final : public ICompilerAdapter {
2727
ov::Tensor mainBlob,
2828
const Config& config,
2929
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
30-
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
30+
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
31+
std::optional<ov::Dimension> batchSize = std::nullopt) const override;
3132

3233
ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
3334

src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class ZeGraphExtWrappers {
4545

4646
GraphDescriptor getGraphDescriptor(void* data, size_t size) const;
4747

48-
NetworkMetadata getNetworkMeta(GraphDescriptor& graphDescriptor) const;
48+
NetworkMetadata getNetworkMeta(GraphDescriptor& graphDescriptor, std::optional<ov::Dimension> batchSize = std::nullopt) const;
4949

5050
void destroyGraph(GraphDescriptor& graphDescriptor);
5151

@@ -70,7 +70,8 @@ class ZeGraphExtWrappers {
7070
void getMetadata(ze_graph_handle_t graphHandle,
7171
uint32_t index,
7272
std::vector<IODescriptor>& inputs,
73-
std::vector<IODescriptor>& outputs) const;
73+
std::vector<IODescriptor>& outputs,
74+
std::optional<ov::Dimension> batchSize) const;
7475

7576
void initializeGraphThroughCommandList(ze_graph_handle_t graphHandle, uint32_t commandQueueGroupOrdinal) const;
7677

src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,15 +361,16 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(
361361
ov::Tensor mainBlob,
362362
const Config& config,
363363
std::optional<std::vector<ov::Tensor>> initBlobs,
364-
const std::optional<std::shared_ptr<const ov::Model>>& model) const {
364+
const std::optional<std::shared_ptr<const ov::Model>>& model,
365+
std::optional<ov::Dimension> batchSize) const {
365366
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");
366367

367368
_logger.debug("parse start");
368369
auto mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
369370
_logger.debug("parse end");
370371

371372
OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
372-
auto networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc);
373+
auto networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc, batchSize);
373374

374375
// exporting the blob when we get it from cache or ov::hint::compiled_blob property
375376
// shall be available

src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,8 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
253253
ov::Tensor mainBlob,
254254
const Config& config,
255255
std::optional<std::vector<ov::Tensor>> initBlobs,
256-
const std::optional<std::shared_ptr<const ov::Model>>& model) const {
256+
const std::optional<std::shared_ptr<const ov::Model>>& model,
257+
std::optional<ov::Dimension> batchSize) const {
257258
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
258259

259260
_logger.debug("parse start");

src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,8 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(void* blobData, size_t bl
434434
* @returns A descriptor object containing the metadata converted in OpenVINO specific structures.
435435
*/
436436
static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
437-
const std::optional<ze_graph_argument_metadata_t>& metadata) {
437+
const std::optional<ze_graph_argument_metadata_t>& metadata,
438+
std::optional<ov::Dimension> batchSize) {
438439
auto logger = Logger::global().clone("getIODescriptor");
439440
ov::element::Type_t precision = zeroUtils::toOVElementType(arg.devicePrecision);
440441
ov::Shape shapeFromCompiler;
@@ -451,7 +452,9 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
451452
const auto dynamicDim = std::numeric_limits<uint64_t>::max();
452453
shapeFromIRModel.reserve(metadata->shape_size);
453454
for (uint32_t id = 0; id < metadata->shape_size; id++) {
454-
if (metadata->shape[id] != dynamicDim) {
455+
if (batchSize.has_value() && id == utils::BATCH_AXIS) {
456+
shapeFromIRModel.push_back(ov::Dimension(1, batchSize.value().get_max_length()));
457+
} else if (metadata->shape[id] != dynamicDim) {
455458
shapeFromIRModel.push_back(metadata->shape[id]);
456459
} else {
457460
// lower bound is ignored, so we set it to 1 just to satisfy the Dimension constructor,
@@ -516,7 +519,8 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
516519
void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
517520
uint32_t index,
518521
std::vector<IODescriptor>& inputs,
519-
std::vector<IODescriptor>& outputs) const {
522+
std::vector<IODescriptor>& outputs,
523+
std::optional<ov::Dimension> batchSize) const {
520524
if (NotSupportArgumentMetadata(_graphExtVersion)) {
521525
ze_graph_argument_properties_3_t arg = {};
522526
_logger.debug("getMetadata - perform pfnGetArgumentProperties3");
@@ -525,10 +529,10 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
525529

526530
switch (arg.type) {
527531
case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
528-
inputs.push_back(getIODescriptor(arg, std::nullopt));
532+
inputs.push_back(getIODescriptor(arg, std::nullopt, batchSize));
529533
} break;
530534
case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
531-
outputs.push_back(getIODescriptor(arg, std::nullopt));
535+
outputs.push_back(getIODescriptor(arg, std::nullopt, batchSize));
532536
} break;
533537
default: {
534538
OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ",
@@ -556,10 +560,10 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
556560

557561
switch (arg.type) {
558562
case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
559-
inputs.push_back(getIODescriptor(arg, optionalMetadata));
563+
inputs.push_back(getIODescriptor(arg, optionalMetadata, batchSize));
560564
} break;
561565
case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
562-
outputs.push_back(getIODescriptor(arg, optionalMetadata));
566+
outputs.push_back(getIODescriptor(arg, optionalMetadata, batchSize));
563567
} break;
564568
default: {
565569
OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ",
@@ -569,7 +573,7 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
569573
}
570574
}
571575

572-
NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescriptor) const {
576+
NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescriptor, std::optional<ov::Dimension> batchSize) const {
573577
ze_graph_properties_t graphProperties = {};
574578
graphProperties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
575579

@@ -578,7 +582,7 @@ NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescrip
578582
THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetProperties", result, _zeroInitStruct->getGraphDdiTable());
579583
NetworkMetadata meta;
580584
for (uint32_t index = 0; index < graphProperties.numGraphArgs; ++index) {
581-
getMetadata(graphDescriptor._handle, index, meta.inputs, meta.outputs);
585+
getMetadata(graphDescriptor._handle, index, meta.inputs, meta.outputs, batchSize);
582586
}
583587
// TODO: support this information in CiD [track: E#33479]
584588
meta.numStreams = 1;

src/plugins/intel_npu/src/plugin/include/metadata.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ class MetadataBase {
4343
*/
4444
virtual std::optional<std::vector<uint64_t>> get_init_sizes() const = 0;
4545

46+
/**
47+
* @returns Batch size. Populated in case of plugin batching.
48+
*/
49+
virtual std::optional<ov::Dimension> get_batch_size() const = 0;
50+
4651
virtual ~MetadataBase() = default;
4752

4853
static std::streampos getFileSize(std::istream& stream);
@@ -211,6 +216,8 @@ class Metadata<METADATA_VERSION_2_0> : public MetadataBase {
211216

212217
std::optional<std::vector<uint64_t>> get_init_sizes() const override;
213218

219+
std::optional<ov::Dimension> get_batch_size() const override;
220+
214221
size_t get_metadata_size() const override;
215222

216223
protected:
@@ -243,6 +250,8 @@ class Metadata<METADATA_VERSION_2_1> : public Metadata<METADATA_VERSION_2_0> {
243250

244251
std::optional<std::vector<uint64_t>> get_init_sizes() const override;
245252

253+
std::optional<ov::Dimension> get_batch_size() const override;
254+
246255
size_t get_metadata_size() const override;
247256

248257
private:
@@ -275,7 +284,7 @@ class Metadata<METADATA_VERSION_2_2> : public Metadata<METADATA_VERSION_2_1> {
275284
*/
276285
void write(std::ostream& stream) override;
277286

278-
virtual std::optional<ov::Dimension> get_batch_size() const;
287+
std::optional<ov::Dimension> get_batch_size() const override;
279288

280289
private:
281290
std::optional<ov::Dimension> _batchSize;

src/plugins/intel_npu/src/plugin/src/metadata.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,14 @@ void Metadata<METADATA_VERSION_2_2>::read(std::istream& stream) {
131131

132132
void Metadata<METADATA_VERSION_2_2>::read(const ov::Tensor& tensor) {
133133
Metadata<METADATA_VERSION_2_1>::read(tensor);
134-
auto roiTensor = ov::Tensor(tensor,
135-
ov::Coordinate{sizeof(decltype(std::declval<OpenvinoVersion>().get_major())) +
136-
sizeof(decltype(std::declval<OpenvinoVersion>().get_minor())) +
137-
sizeof(decltype(std::declval<OpenvinoVersion>().get_patch()))},
138-
ov::Coordinate{tensor.get_byte_size()});
139134

140-
_batchSize = *reinterpret_cast<const decltype(_batchSize)*>(roiTensor.data<const char>());
135+
// Calculate the offset where the batch size is stored in the tensor
136+
auto offset = sizeof(decltype(std::declval<OpenvinoVersion>().get_major())) +
137+
sizeof(decltype(std::declval<OpenvinoVersion>().get_minor())) +
138+
sizeof(decltype(std::declval<OpenvinoVersion>().get_patch())) +
139+
sizeof(uint64_t) * (get_init_sizes() ? get_init_sizes()->size() : 0);
140+
141+
_batchSize = *reinterpret_cast<const decltype(_batchSize)*>(tensor.data<const char>() + offset);
141142
}
142143

143144
void MetadataBase::append_padding_blob_size_and_magic(std::ostream& stream) {
@@ -175,7 +176,9 @@ void Metadata<METADATA_VERSION_2_1>::write(std::ostream& stream) {
175176
void Metadata<METADATA_VERSION_2_2>::write(std::ostream& stream) {
176177
Metadata<METADATA_VERSION_2_1>::write(stream);
177178

178-
stream.write(reinterpret_cast<const char*>(&_batchSize), sizeof(_batchSize));
179+
if (_batchSize.has_value()) {
180+
stream.write(reinterpret_cast<const char*>(&_batchSize), sizeof(_batchSize));
181+
}
179182

180183
append_padding_blob_size_and_magic(stream);
181184
}
@@ -335,6 +338,14 @@ std::optional<std::vector<uint64_t>> Metadata<METADATA_VERSION_2_1>::get_init_si
335338
return _initSizes;
336339
}
337340

341+
std::optional<ov::Dimension> Metadata<METADATA_VERSION_2_0>::get_batch_size() const {
342+
return std::nullopt;
343+
}
344+
345+
std::optional<ov::Dimension> Metadata<METADATA_VERSION_2_1>::get_batch_size() const {
346+
return std::nullopt;
347+
}
348+
338349
std::optional<ov::Dimension> Metadata<METADATA_VERSION_2_2>::get_batch_size() const {
339350
return _batchSize;
340351
}

src/plugins/intel_npu/src/plugin/src/plugin.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -951,13 +951,15 @@ std::shared_ptr<ov::ICompiledModel> Plugin::parse(const ov::Tensor& tensorBig,
951951

952952
uint64_t mainSize = tensorBig.get_byte_size();
953953
std::optional<std::vector<uint64_t>> initSizes;
954+
std::optional<ov::Dimension> batchSize;
954955

955956
if (metadata) {
956957
size_t accumulator = 0;
957958
initSizes = metadata->get_init_sizes();
958959
mainSize = initSizes.has_value()
959960
? metadata->get_blob_size() - std::accumulate(initSizes->begin(), initSizes->end(), accumulator)
960961
: metadata->get_blob_size();
962+
batchSize = metadata->get_batch_size();
961963
} else {
962964
_logger.info("Blob compatibility check skipped.");
963965
}
@@ -1024,8 +1026,10 @@ std::shared_ptr<ov::ICompiledModel> Plugin::parse(const ov::Tensor& tensorBig,
10241026
auto graph = compiler->parse(std::move(tensorMain),
10251027
localConfig,
10261028
weightsSeparationEnabled ? std::make_optional(std::move(tensorsInits)) : std::nullopt,
1027-
weightsSeparationEnabled ? std::make_optional(originalModel) : std::nullopt);
1029+
weightsSeparationEnabled ? std::make_optional(originalModel) : std::nullopt,
1030+
batchSize);
10281031
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));
1032+
10291033
const std::shared_ptr<ov::Model> modelDummy =
10301034
create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs);
10311035

0 commit comments

Comments
 (0)