Skip to content

Commit b8e010e

Browse files
Investigate refactoring opportunities for batch management in Plugin and Compiler - clean up
1 parent 1d54957 commit b8e010e

File tree

3 files changed

+89
-65
lines changed

3 files changed

+89
-65
lines changed

src/plugins/intel_npu/src/plugin/include/plugin.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ class Plugin : public ov::IPlugin {
6363
ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
6464
const ov::AnyMap& properties) const override;
6565

66+
void handleDynamicBatching(std::shared_ptr<ov::Model>& modelForCompilation,
67+
Config& localConfig,
68+
const std::function<void(ov::intel_npu::BatchMode)>& updateBatchMode) const;
69+
70+
void encodeDynamicBatchInfo(std::shared_ptr<ov::Model> model) const;
71+
6672
private:
6773
void init_options();
6874
void filter_config_by_compiler_support(FilteredConfig& cfg) const;

src/plugins/intel_npu/src/plugin/src/plugin.cpp

Lines changed: 81 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,11 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
5757
ov::ParameterVector parameters;
5858
ov::ResultVector results;
5959

60-
// Helper function to check if a tensor was originally dynamic
60+
// Check if tensor was originally dynamic by looking for encoded markers
61+
// This information is needed to restore the original dynamic batching behavior
6162
auto wasOriginallyDynamic = [](const std::unordered_set<std::string>& tensorNames) -> bool {
6263
for (const auto& name : tensorNames) {
63-
if (name.find("_DYNBATCH_ORIG") != std::string::npos) {
64+
if (name.find(intel_npu::utils::DYNBATCH_SUFFIX) != std::string::npos) {
6465
return true;
6566
}
6667
}
@@ -557,7 +558,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
557558
return _properties->get_property(name, npu_plugin_properties);
558559
}
559560

560-
// Helper function to check if shape has dynamic dimensions other than batch dimension
561+
// Helper function to detect if shape contains dynamic dimensions other than the batch dimension
562+
// Plugin-side batch handling can only be applied when batch is the sole dynamic dimension
561563
bool hasOtherDynamicDims(const ov::PartialShape& shape) {
562564
for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
563565
if (shape[dim_idx].is_dynamic()) {
@@ -712,6 +714,69 @@ void deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
712714
model->reshape(newShapes);
713715
}
714716

717+
void Plugin::encodeDynamicBatchInfo(std::shared_ptr<ov::Model> model) const {
718+
const std::string suffix = intel_npu::utils::DYNBATCH_SUFFIX;
719+
720+
// Encode info in input tensor names
721+
for (auto& input : model->inputs()) {
722+
const std::string originalName = input.get_any_name();
723+
input.get_tensor().set_names({originalName, originalName + suffix});
724+
}
725+
// Encode info in output tensor names
726+
for (auto& output : model->outputs()) {
727+
const std::string originalName = output.get_any_name();
728+
output.get_tensor().set_names({originalName, originalName + suffix});
729+
}
730+
}
731+
732+
void Plugin::handleDynamicBatching(std::shared_ptr<ov::Model>& modelForCompilation,
733+
Config& localConfig,
734+
const std::function<void(ov::intel_npu::BatchMode)>& updateBatchMode) const {
735+
// Avoiding risks with static models. TODO: common solution.
736+
if (!modelForCompilation->is_dynamic()) {
737+
return;
738+
}
739+
740+
const auto batchMode = localConfig.get<BATCH_MODE>();
741+
const bool isAutoOrPluginBatch =
742+
(batchMode == ov::intel_npu::BatchMode::PLUGIN || batchMode == ov::intel_npu::BatchMode::AUTO);
743+
744+
try {
745+
const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
746+
747+
if (!isAutoOrPluginBatch || !pluginBatchingIsSupported) {
748+
_logger.info("Batching will be handled by compiler.");
749+
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
750+
return;
751+
}
752+
753+
_logger.info("Attempting to handle batching on the plugin side.");
754+
755+
// Preserve dynamic batch metadata by encoding it in tensor names
756+
// Avoids introducing new metadata fields by leveraging existing naming system
757+
encodeDynamicBatchInfo(modelForCompilation);
758+
759+
try {
760+
ov::set_batch(modelForCompilation, ov::Dimension(1));
761+
} catch (const std::exception& ex) {
762+
_logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n"
763+
"Trying to debatch it...",
764+
ex.what());
765+
766+
deBatchModel(modelForCompilation, ov::Dimension(1));
767+
if (!modelForCompilation) {
768+
OPENVINO_THROW("Cannot debatch a model");
769+
}
770+
_logger.info("The model has been debatched successfully");
771+
}
772+
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
773+
} catch (const std::exception& ex) {
774+
_logger.info("Couldn't validate and reshape the model. Batching will be handled by compiler. Error: %s",
775+
ex.what());
776+
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
777+
}
778+
}
779+
715780
std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model,
716781
const ov::AnyMap& properties) const {
717782
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::compile_model");
@@ -770,71 +835,22 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
770835
localConfig.update({{ov::intel_npu::batch_mode.name(), strStream.str()}});
771836
};
772837

773-
if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) &&
774-
!localConfig.has(ov::intel_npu::batch_mode.name())) {
775-
updateBatchMode(ov::intel_npu::BatchMode::AUTO);
776-
}
777-
778-
if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && !model->get_variables().empty()) {
779-
if (localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
780-
OPENVINO_THROW("This model contains states, thus it is not supported when handling batching on the plugin");
838+
// Handle batch mode configuration
839+
if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) {
840+
// Set default batch mode if not configured
841+
if (!localConfig.has(ov::intel_npu::batch_mode.name())) {
842+
updateBatchMode(ov::intel_npu::BatchMode::AUTO);
781843
}
782844

783-
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
784-
}
785-
786-
if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) {
787-
bool autoOrPluginBatch = localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN ||
788-
localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO;
789-
if (modelForCompilation->is_dynamic()) { // Avoiding risks with static models. TODO: common solution.
790-
try {
791-
const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
792-
793-
if (autoOrPluginBatch && pluginBatchingIsSupported) {
794-
_logger.info("Attempting to handle batching on the plugin side.");
795-
796-
// Store dynamic batch info in tensor names BEFORE reshaping
797-
auto encodeDynamicBatchInfo = [](std::shared_ptr<ov::Model> model) {
798-
// Encode info in input tensor names
799-
for (auto& input : model->inputs()) {
800-
std::string originalName = input.get_any_name();
801-
std::string newName = originalName + "_DYNBATCH_ORIG";
802-
input.get_tensor().set_names({newName});
803-
}
804-
805-
// Encode info in output tensor names
806-
for (auto& output : model->outputs()) {
807-
std::string originalName = output.get_any_name();
808-
std::string newName = originalName + "_DYNBATCH_ORIG";
809-
output.get_tensor().set_names({newName});
810-
}
811-
};
812-
813-
try {
814-
encodeDynamicBatchInfo(modelForCompilation);
815-
ov::set_batch(modelForCompilation, ov::Dimension(1));
816-
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
817-
} catch (const std::exception& ex) {
818-
_logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n"
819-
"Trying to debatch it...",
820-
ex.what());
821-
encodeDynamicBatchInfo(modelForCompilation);
822-
deBatchModel(modelForCompilation, ov::Dimension(1));
823-
if (!modelForCompilation) {
824-
OPENVINO_THROW("Cannot debatch a model");
825-
}
826-
_logger.info("The model has been debatched successfully");
827-
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
828-
}
829-
} else {
830-
_logger.info("Batching will be handed by compiler.");
831-
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
832-
}
833-
} catch (const std::exception& ex) {
834-
_logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.",
835-
ex.what());
836-
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
845+
// Handle models with variables (states)
846+
if (!model->get_variables().empty()) {
847+
if (localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
848+
OPENVINO_THROW(
849+
"This model contains states, thus it is not supported when handling batching on the plugin");
837850
}
851+
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
852+
} else {
853+
handleDynamicBatching(modelForCompilation, localConfig, updateBatchMode);
838854
}
839855
}
840856

src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ constexpr std::size_t STANDARD_PAGE_SIZE = 4096;
1717
constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
1818
constexpr std::size_t BATCH_AXIS = 0;
1919

20+
const std::string DYNBATCH_SUFFIX = "_DYNBATCH_ORIG";
21+
2022
struct AlignedAllocator {
2123
public:
2224
AlignedAllocator(const size_t align_size) : _align_size(align_size) {}

0 commit comments

Comments
 (0)