Skip to content

Commit 6cb439f

Browse files
Investigate refactoring opportunities for batch management in Plugin and Compiler - review - WIP
1 parent a2f5de8 commit 6cb439f

File tree

11 files changed

+59
-40
lines changed

11 files changed

+59
-40
lines changed

src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class ICompilerAdapter {
4747
const Config& config,
4848
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
4949
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
50-
std::optional<ov::Dimension> batchSize = std::nullopt) const = 0;
50+
std::optional<int64_t> batchSize = std::nullopt) const = 0;
5151

5252
virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
5353
virtual uint32_t get_version() const = 0;

src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter {
3030
const Config& config,
3131
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
3232
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
33-
std::optional<ov::Dimension> batchSize = std::nullopt) const override;
33+
std::optional<int64_t> batchSize = std::nullopt) const override;
3434

3535
ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
3636

src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter {
2828
const Config& config,
2929
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
3030
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
31-
std::optional<ov::Dimension> batchSize = std::nullopt) const override;
31+
std::optional<int64_t> batchSize = std::nullopt) const override;
3232

3333
ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
3434

src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class ZeGraphExtWrappers {
4545

4646
GraphDescriptor getGraphDescriptor(void* data, size_t size) const;
4747

48-
NetworkMetadata getNetworkMeta(GraphDescriptor& graphDescriptor, std::optional<ov::Dimension> batchSize = std::nullopt) const;
48+
NetworkMetadata getNetworkMeta(GraphDescriptor& graphDescriptor, std::optional<int64_t> batchSize = std::nullopt) const;
4949

5050
void destroyGraph(GraphDescriptor& graphDescriptor);
5151

@@ -71,7 +71,7 @@ class ZeGraphExtWrappers {
7171
uint32_t index,
7272
std::vector<IODescriptor>& inputs,
7373
std::vector<IODescriptor>& outputs,
74-
std::optional<ov::Dimension> batchSize) const;
74+
std::optional<int64_t> batchSize) const;
7575

7676
void initializeGraphThroughCommandList(ze_graph_handle_t graphHandle, uint32_t commandQueueGroupOrdinal) const;
7777

src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(
362362
const Config& config,
363363
std::optional<std::vector<ov::Tensor>> initBlobs,
364364
const std::optional<std::shared_ptr<const ov::Model>>& model,
365-
std::optional<ov::Dimension> batchSize) const {
365+
std::optional<int64_t> batchSize) const {
366366
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");
367367

368368
_logger.debug("parse start");

src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
254254
const Config& config,
255255
std::optional<std::vector<ov::Tensor>> initBlobs,
256256
const std::optional<std::shared_ptr<const ov::Model>>& model,
257-
std::optional<ov::Dimension> batchSize) const {
257+
std::optional<int64_t> batchSize) const {
258258
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
259259

260260
_logger.debug("parse start");

src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(void* blobData, size_t bl
435435
*/
436436
static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
437437
const std::optional<ze_graph_argument_metadata_t>& metadata,
438-
std::optional<ov::Dimension> batchSize) {
438+
std::optional<int64_t> batchSize) {
439439
auto logger = Logger::global().clone("getIODescriptor");
440440
ov::element::Type_t precision = zeroUtils::toOVElementType(arg.devicePrecision);
441441
ov::Shape shapeFromCompiler;
@@ -453,7 +453,7 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
453453
shapeFromIRModel.reserve(metadata->shape_size);
454454
for (uint32_t id = 0; id < metadata->shape_size; id++) {
455455
if (batchSize.has_value() && id == utils::BATCH_AXIS) {
456-
shapeFromIRModel.push_back(ov::Dimension(1, batchSize.value().get_max_length()));
456+
shapeFromIRModel.push_back(ov::Dimension(1, batchSize.value()));
457457
} else if (metadata->shape[id] != dynamicDim) {
458458
shapeFromIRModel.push_back(metadata->shape[id]);
459459
} else {
@@ -520,7 +520,7 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
520520
uint32_t index,
521521
std::vector<IODescriptor>& inputs,
522522
std::vector<IODescriptor>& outputs,
523-
std::optional<ov::Dimension> batchSize) const {
523+
std::optional<int64_t> batchSize) const {
524524
if (NotSupportArgumentMetadata(_graphExtVersion)) {
525525
ze_graph_argument_properties_3_t arg = {};
526526
_logger.debug("getMetadata - perform pfnGetArgumentProperties3");
@@ -573,7 +573,7 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
573573
}
574574
}
575575

576-
NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescriptor, std::optional<ov::Dimension> batchSize) const {
576+
NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescriptor, std::optional<int64_t> batchSize) const {
577577
ze_graph_properties_t graphProperties = {};
578578
graphProperties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
579579

src/plugins/intel_npu/src/plugin/include/metadata.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class MetadataBase {
4646
/**
4747
* @returns Batch size. Populated in case of plugin batching.
4848
*/
49-
virtual std::optional<ov::Dimension> get_batch_size() const = 0;
49+
virtual std::optional<int64_t> get_batch_size() const = 0;
5050

5151
virtual ~MetadataBase() = default;
5252

@@ -216,7 +216,7 @@ class Metadata<METADATA_VERSION_2_0> : public MetadataBase {
216216

217217
std::optional<std::vector<uint64_t>> get_init_sizes() const override;
218218

219-
std::optional<ov::Dimension> get_batch_size() const override;
219+
std::optional<int64_t> get_batch_size() const override;
220220

221221
size_t get_metadata_size() const override;
222222

@@ -250,7 +250,7 @@ class Metadata<METADATA_VERSION_2_1> : public Metadata<METADATA_VERSION_2_0> {
250250

251251
std::optional<std::vector<uint64_t>> get_init_sizes() const override;
252252

253-
std::optional<ov::Dimension> get_batch_size() const override;
253+
std::optional<int64_t> get_batch_size() const override;
254254

255255
size_t get_metadata_size() const override;
256256

@@ -268,7 +268,7 @@ class Metadata<METADATA_VERSION_2_2> : public Metadata<METADATA_VERSION_2_1> {
268268
Metadata(uint64_t blobSize,
269269
std::optional<OpenvinoVersion> ovVersion = std::nullopt,
270270
const std::optional<std::vector<uint64_t>> initSizes = std::nullopt,
271-
const std::optional<ov::Dimension> batchSize = std::nullopt);
271+
const std::optional<int64_t> batchSize = std::nullopt);
272272

273273
/**
274274
* @details The number of init schedules, along with the size of each init binary object are read in addition to the
@@ -284,10 +284,12 @@ class Metadata<METADATA_VERSION_2_2> : public Metadata<METADATA_VERSION_2_1> {
284284
*/
285285
void write(std::ostream& stream) override;
286286

287-
std::optional<ov::Dimension> get_batch_size() const override;
287+
std::optional<int64_t> get_batch_size() const override;
288+
289+
size_t get_metadata_size() const override;
288290

289291
private:
290-
std::optional<ov::Dimension> _batchSize;
292+
std::optional<int64_t> _batchSize;
291293
};
292294

293295
/**

src/plugins/intel_npu/src/plugin/src/compiled_model.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,15 @@ void CompiledModel::export_model(std::ostream& stream) const {
9191

9292
auto [blobSizesBeforeVersioning, initBlobSizes] = _graph->export_blob(stream);
9393

94-
std::optional<ov::Dimension> originalBatchSize = std::nullopt;
94+
std::optional<int64_t> originalBatchSize = std::nullopt;
9595
auto metadata = _graph->get_metadata();
9696
auto inputMeta = metadata.inputs;
9797
for (auto in : inputMeta) {
9898
// Plugin batching applied, saving original batch value
99-
if (in.shapeFromIRModel.has_value() && in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS].is_dynamic() &&
99+
if (in.shapeFromIRModel.has_value() &&
100100
in.shapeFromCompiler[intel_npu::utils::BATCH_AXIS] == 1) {
101-
originalBatchSize = std::optional(in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS]);
101+
originalBatchSize = std::optional(in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS].get_max_length());
102+
break;
102103
}
103104
}
104105

src/plugins/intel_npu/src/plugin/src/metadata.cpp

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Metadata<METADATA_VERSION_2_1>::Metadata(uint64_t blobSize,
7474
Metadata<METADATA_VERSION_2_2>::Metadata(uint64_t blobSize,
7575
std::optional<OpenvinoVersion> ovVersion,
7676
const std::optional<std::vector<uint64_t>> initSizes,
77-
const std::optional<ov::Dimension> batchSize)
77+
const std::optional<int64_t> batchSize)
7878
: Metadata<METADATA_VERSION_2_1>{blobSize, ovVersion, initSizes},
7979
_batchSize{batchSize} {
8080
_version = METADATA_VERSION_2_2;
@@ -126,19 +126,31 @@ void Metadata<METADATA_VERSION_2_1>::read(const ov::Tensor& tensor) {
126126
void Metadata<METADATA_VERSION_2_2>::read(std::istream& stream) {
127127
Metadata<METADATA_VERSION_2_1>::read(stream);
128128

129-
stream.read(reinterpret_cast<char*>(&_batchSize), sizeof(_batchSize));
129+
int64_t batchSize;
130+
stream.read(reinterpret_cast<char*>(&batchSize), sizeof(batchSize));
131+
132+
if (batchSize) {
133+
_batchSize = std::optional(batchSize);
134+
}
130135
}
131136

132137
void Metadata<METADATA_VERSION_2_2>::read(const ov::Tensor& tensor) {
133138
Metadata<METADATA_VERSION_2_1>::read(tensor);
134139

135-
// Calculate the offset where the batch size is stored in the tensor
136-
auto offset = sizeof(decltype(std::declval<OpenvinoVersion>().get_major())) +
137-
sizeof(decltype(std::declval<OpenvinoVersion>().get_minor())) +
138-
sizeof(decltype(std::declval<OpenvinoVersion>().get_patch())) +
139-
sizeof(uint64_t) * (get_init_sizes() ? get_init_sizes()->size() : 0);
140+
auto roiTensor = ov::Tensor(tensor,
141+
ov::Coordinate{sizeof(decltype(std::declval<OpenvinoVersion>().get_major())) +
142+
sizeof(decltype(std::declval<OpenvinoVersion>().get_minor())) +
143+
sizeof(decltype(std::declval<OpenvinoVersion>().get_patch())) +
144+
sizeof(uint64_t) +
145+
sizeof(uint64_t) * (get_init_sizes() ? get_init_sizes()->size() : 0)},
146+
ov::Coordinate{tensor.get_byte_size()});
147+
148+
int64_t batchSize;
149+
batchSize = *reinterpret_cast<const decltype(batchSize)*>(roiTensor.data<const char>());
140150

141-
_batchSize = *reinterpret_cast<const decltype(_batchSize)*>(tensor.data<const char>() + offset);
151+
if (batchSize) {
152+
_batchSize = std::optional(batchSize);
153+
}
142154
}
143155

144156
void MetadataBase::append_padding_blob_size_and_magic(std::ostream& stream) {
@@ -169,8 +181,6 @@ void Metadata<METADATA_VERSION_2_1>::write(std::ostream& stream) {
169181
stream.write(reinterpret_cast<const char*>(&initSize), sizeof(initSize));
170182
}
171183
}
172-
173-
append_padding_blob_size_and_magic(stream);
174184
}
175185

176186
void Metadata<METADATA_VERSION_2_2>::write(std::ostream& stream) {
@@ -195,7 +205,7 @@ std::unique_ptr<MetadataBase> create_metadata(uint32_t version, uint64_t blobSiz
195205
case METADATA_VERSION_2_1:
196206
return std::make_unique<Metadata<METADATA_VERSION_2_1>>(blobSize, std::nullopt);
197207
case METADATA_VERSION_2_2:
198-
return std::make_unique<Metadata<METADATA_VERSION_2_1>>(blobSize, std::nullopt);
208+
return std::make_unique<Metadata<METADATA_VERSION_2_2>>(blobSize, std::nullopt);
199209
default:
200210
OPENVINO_THROW("Metadata version is not supported!");
201211
}
@@ -338,15 +348,15 @@ std::optional<std::vector<uint64_t>> Metadata<METADATA_VERSION_2_1>::get_init_si
338348
return _initSizes;
339349
}
340350

341-
std::optional<ov::Dimension> Metadata<METADATA_VERSION_2_0>::get_batch_size() const {
351+
std::optional<int64_t> Metadata<METADATA_VERSION_2_0>::get_batch_size() const {
342352
return std::nullopt;
343353
}
344354

345-
std::optional<ov::Dimension> Metadata<METADATA_VERSION_2_1>::get_batch_size() const {
355+
std::optional<int64_t> Metadata<METADATA_VERSION_2_1>::get_batch_size() const {
346356
return std::nullopt;
347357
}
348358

349-
std::optional<ov::Dimension> Metadata<METADATA_VERSION_2_2>::get_batch_size() const {
359+
std::optional<int64_t> Metadata<METADATA_VERSION_2_2>::get_batch_size() const {
350360
return _batchSize;
351361
}
352362

@@ -366,4 +376,10 @@ size_t Metadata<METADATA_VERSION_2_1>::get_metadata_size() const {
366376
return metadataSize;
367377
}
368378

379+
size_t Metadata<METADATA_VERSION_2_2>::get_metadata_size() const {
380+
size_t metadataSize = Metadata<METADATA_VERSION_2_1>::get_metadata_size() + sizeof(_batchSize);
381+
382+
return metadataSize;
383+
}
384+
369385
} // namespace intel_npu

0 commit comments

Comments
 (0)