Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ class ICompilerAdapter {
* @return A wrapper over the corresponding L0 graph handles (multiple only if "initBlobs" has been provided). This
* wrapper further details the compiled model and brings it in a state closer to execution.
*/
virtual std::shared_ptr<IGraph> parse(
ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const = 0;
virtual std::shared_ptr<IGraph> parse(ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
std::optional<int64_t> batchSize = std::nullopt) const = 0;

virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
virtual uint32_t get_version() const = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ class IGraph : public std::enable_shared_from_this<IGraph> {

virtual void set_argument_value(uint32_t argi, const void* argv) const = 0;

virtual void set_metadata(NetworkMetadata metadata) = 0;

virtual void initialize(const Config& config) = 0;

virtual ~IGraph() = default;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ class DriverCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(
ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
std::shared_ptr<IGraph> parse(ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
std::optional<int64_t> batchSize = std::nullopt) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class Graph : public IGraph {

void set_argument_value(uint32_t argi, const void* argv) const override;

void set_metadata(NetworkMetadata metadata) override;

void initialize(const Config& config) override;

const NetworkMetadata& get_metadata() const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ class PluginCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(
ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
std::shared_ptr<IGraph> parse(ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt,
std::optional<int64_t> batchSize = std::nullopt) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class ZeGraphExtWrappers {

GraphDescriptor getGraphDescriptor(void* data, size_t size) const;

NetworkMetadata getNetworkMeta(GraphDescriptor& graphDescriptor) const;
NetworkMetadata getNetworkMeta(GraphDescriptor& graphDescriptor,
std::optional<int64_t> batchSize = std::nullopt) const;

void destroyGraph(GraphDescriptor& graphDescriptor);

Expand All @@ -70,7 +71,8 @@ class ZeGraphExtWrappers {
void getMetadata(ze_graph_handle_t graphHandle,
uint32_t index,
std::vector<IODescriptor>& inputs,
std::vector<IODescriptor>& outputs) const;
std::vector<IODescriptor>& outputs,
std::optional<int64_t> batchSize) const;

void initializeGraphThroughCommandList(ze_graph_handle_t graphHandle, uint32_t commandQueueGroupOrdinal) const;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,19 +357,19 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compileWS(const std::shared_ptr<o
config);
}

std::shared_ptr<IGraph> DriverCompilerAdapter::parse(
ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs,
const std::optional<std::shared_ptr<const ov::Model>>& model) const {
std::shared_ptr<IGraph> DriverCompilerAdapter::parse(ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs,
const std::optional<std::shared_ptr<const ov::Model>>& model,
std::optional<int64_t> batchSize) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");

_logger.debug("parse start");
auto mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
_logger.debug("parse end");

OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
auto networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc);
auto networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc, batchSize);

// exporting the blob when we get it from cache or ov::hint::compiled_blob property
// shall be available
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ Graph::Graph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
}
}

void Graph::set_metadata(NetworkMetadata metadata) {
_metadata = metadata;
}

const NetworkMetadata& Graph::get_metadata() const {
return _metadata;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,11 +249,11 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
_compiler);
}

std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs,
const std::optional<std::shared_ptr<const ov::Model>>& model) const {
std::shared_ptr<IGraph> PluginCompilerAdapter::parse(ov::Tensor mainBlob,
const Config& config,
std::optional<std::vector<ov::Tensor>> initBlobs,
const std::optional<std::shared_ptr<const ov::Model>>& model,
std::optional<int64_t> batchSize) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");

_logger.debug("parse start");
Expand All @@ -264,6 +264,17 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
network.clear();
network.shrink_to_fit();

for (auto& in : networkMeta.inputs) {
if (in.shapeFromIRModel.has_value() && batchSize.has_value()) {
in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
}
}
for (auto& out : networkMeta.outputs) {
if (out.shapeFromIRModel.has_value() && batchSize.has_value()) {
out.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
}
}

Comment on lines +267 to +277
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (auto& in : networkMeta.inputs) {
if (in.shapeFromIRModel.has_value() && batchSize.has_value()) {
in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
}
}
for (auto& out : networkMeta.outputs) {
if (out.shapeFromIRModel.has_value() && batchSize.has_value()) {
out.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
}
}
if (batchSize.has_value()) {
for (auto& in : networkMeta.inputs) {
if (in.shapeFromIRModel.has_value()) {
in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
}
}
for (auto& out : networkMeta.outputs) {
if (out.shapeFromIRModel.has_value()) {
out.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS] = ov::Dimension(1, batchSize.value());
}
}
}

GraphDescriptor mainGraphDesc;

if (_zeGraphExt) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,8 @@ GraphDescriptor ZeGraphExtWrappers::getGraphDescriptor(void* blobData, size_t bl
* @returns A descriptor object containing the metadata converted in OpenVINO specific structures.
*/
static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
const std::optional<ze_graph_argument_metadata_t>& metadata) {
const std::optional<ze_graph_argument_metadata_t>& metadata,
std::optional<int64_t> batchSize) {
auto logger = Logger::global().clone("getIODescriptor");
ov::element::Type_t precision = zeroUtils::toOVElementType(arg.devicePrecision);
ov::Shape shapeFromCompiler;
Expand All @@ -451,7 +452,9 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
const auto dynamicDim = std::numeric_limits<uint64_t>::max();
shapeFromIRModel.reserve(metadata->shape_size);
for (uint32_t id = 0; id < metadata->shape_size; id++) {
if (metadata->shape[id] != dynamicDim) {
if (batchSize.has_value() && id == utils::BATCH_AXIS) {
shapeFromIRModel.push_back(ov::Dimension(1, batchSize.value()));
} else if (metadata->shape[id] != dynamicDim) {
shapeFromIRModel.push_back(metadata->shape[id]);
} else {
// lower bound is ignored, so we set it to 1 just to satisfy the Dimension constructor,
Expand Down Expand Up @@ -516,7 +519,8 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
uint32_t index,
std::vector<IODescriptor>& inputs,
std::vector<IODescriptor>& outputs) const {
std::vector<IODescriptor>& outputs,
std::optional<int64_t> batchSize) const {
if (NotSupportArgumentMetadata(_graphExtVersion)) {
ze_graph_argument_properties_3_t arg = {};
_logger.debug("getMetadata - perform pfnGetArgumentProperties3");
Expand All @@ -525,10 +529,10 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,

switch (arg.type) {
case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
inputs.push_back(getIODescriptor(arg, std::nullopt));
inputs.push_back(getIODescriptor(arg, std::nullopt, batchSize));
} break;
case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
outputs.push_back(getIODescriptor(arg, std::nullopt));
outputs.push_back(getIODescriptor(arg, std::nullopt, batchSize));
} break;
default: {
OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ",
Expand Down Expand Up @@ -556,10 +560,10 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,

switch (arg.type) {
case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
inputs.push_back(getIODescriptor(arg, optionalMetadata));
inputs.push_back(getIODescriptor(arg, optionalMetadata, batchSize));
} break;
case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
outputs.push_back(getIODescriptor(arg, optionalMetadata));
outputs.push_back(getIODescriptor(arg, optionalMetadata, batchSize));
} break;
default: {
OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ",
Expand All @@ -569,7 +573,8 @@ void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle,
}
}

NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescriptor) const {
NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescriptor,
std::optional<int64_t> batchSize) const {
ze_graph_properties_t graphProperties = {};
graphProperties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;

Expand All @@ -578,7 +583,7 @@ NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(GraphDescriptor& graphDescrip
THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetProperties", result, _zeroInitStruct->getGraphDdiTable());
NetworkMetadata meta;
for (uint32_t index = 0; index < graphProperties.numGraphArgs; ++index) {
getMetadata(graphDescriptor._handle, index, meta.inputs, meta.outputs);
getMetadata(graphDescriptor._handle, index, meta.inputs, meta.outputs, batchSize);
}
// TODO: support this information in CiD [track: E#33479]
meta.numStreams = 1;
Expand Down
45 changes: 44 additions & 1 deletion src/plugins/intel_npu/src/plugin/include/metadata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ class MetadataBase {
*/
virtual std::optional<std::vector<uint64_t>> get_init_sizes() const = 0;

/**
* @returns Batch size. Populated in case of plugin batching.
*/
virtual std::optional<int64_t> get_batch_size() const = 0;

virtual ~MetadataBase() = default;

static std::streampos getFileSize(std::istream& stream);
Expand Down Expand Up @@ -101,11 +106,12 @@ constexpr std::string_view MAGIC_BYTES = "OVNPU";
*/
constexpr uint32_t METADATA_VERSION_2_0{MetadataBase::make_version(2, 0)};
constexpr uint32_t METADATA_VERSION_2_1{MetadataBase::make_version(2, 1)};
constexpr uint32_t METADATA_VERSION_2_2{MetadataBase::make_version(2, 2)};

/**
* @brief Current metadata version.
*/
constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_2_1};
constexpr uint32_t CURRENT_METADATA_VERSION{METADATA_VERSION_2_2};

constexpr uint16_t CURRENT_METADATA_MAJOR_VERSION{MetadataBase::get_major(CURRENT_METADATA_VERSION)};
constexpr uint16_t CURRENT_METADATA_MINOR_VERSION{MetadataBase::get_minor(CURRENT_METADATA_VERSION)};
Expand Down Expand Up @@ -210,6 +216,8 @@ class Metadata<METADATA_VERSION_2_0> : public MetadataBase {

std::optional<std::vector<uint64_t>> get_init_sizes() const override;

std::optional<int64_t> get_batch_size() const override;

size_t get_metadata_size() const override;

protected:
Expand Down Expand Up @@ -242,13 +250,48 @@ class Metadata<METADATA_VERSION_2_1> : public Metadata<METADATA_VERSION_2_0> {

std::optional<std::vector<uint64_t>> get_init_sizes() const override;

std::optional<int64_t> get_batch_size() const override;

size_t get_metadata_size() const override;

private:
std::optional<std::vector<uint64_t>> _initSizes;
uint64_t _numberOfInits = 0;
};

/**
* @brief The version that adds support for batch value storage.
*/
template <>
class Metadata<METADATA_VERSION_2_2> : public Metadata<METADATA_VERSION_2_1> {
public:
Metadata(uint64_t blobSize,
std::optional<OpenvinoVersion> ovVersion = std::nullopt,
const std::optional<std::vector<uint64_t>> initSizes = std::nullopt,
const std::optional<int64_t> batchSize = std::nullopt);

/**
* @details The number of init schedules, along with the size of each init binary object are read in addition to the
* information provided by the previous metadata versions.
*/
void read(std::istream& stream) override;

void read(const ov::Tensor& tensor) override;

/**
* @details The number of init schedules, along with the size of each init binary object are written in addition to
* the information registered by the previous metadata versions.
*/
void write(std::ostream& stream) override;

std::optional<int64_t> get_batch_size() const override;

size_t get_metadata_size() const override;

private:
std::optional<int64_t> _batchSize;
};

/**
* @brief Creates a Metadata object.
*
Expand Down
18 changes: 17 additions & 1 deletion src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "intel_npu/common/itt.hpp"
#include "intel_npu/config/config.hpp"
#include "intel_npu/config/options.hpp"
#include "intel_npu/utils/utils.hpp"
#include "metadata.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "openvino/runtime/properties.hpp"
Expand Down Expand Up @@ -90,7 +91,22 @@ void CompiledModel::export_model(std::ostream& stream) const {

auto [blobSizesBeforeVersioning, initBlobSizes] = _graph->export_blob(stream);

Metadata<CURRENT_METADATA_VERSION>(blobSizesBeforeVersioning, CURRENT_OPENVINO_VERSION, initBlobSizes)
std::optional<int64_t> originalBatchSize = std::nullopt;
auto metadata = _graph->get_metadata();
auto inputMeta = metadata.inputs;
for (auto in : inputMeta) {
// Plugin batching applied, saving original batch value
if (in.shapeFromIRModel.has_value() && in.shapeFromCompiler[intel_npu::utils::BATCH_AXIS] == 1) {
originalBatchSize =
std::optional(in.shapeFromIRModel.value()[intel_npu::utils::BATCH_AXIS].get_max_length());
break;
}
}

Metadata<CURRENT_METADATA_VERSION>(blobSizesBeforeVersioning,
CURRENT_OPENVINO_VERSION,
initBlobSizes,
originalBatchSize)
.write(stream);
}

Expand Down
Loading
Loading