@@ -57,10 +57,11 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
5757 ov::ParameterVector parameters;
5858 ov::ResultVector results;
5959
60- // Helper function to check if a tensor was originally dynamic
60+ // Check if tensor was originally dynamic by looking for encoded markers
61+ // This information is needed to restore the original dynamic batching behavior
6162 auto wasOriginallyDynamic = [](const std::unordered_set<std::string>& tensorNames) -> bool {
6263 for (const auto & name : tensorNames) {
63- if (name.find (" _DYNBATCH_ORIG " ) != std::string::npos) {
64+ if (name.find (intel_npu::utils::DYNBATCH_SUFFIX ) != std::string::npos) {
6465 return true ;
6566 }
6667 }
@@ -557,7 +558,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
557558 return _properties->get_property (name, npu_plugin_properties);
558559}
559560
560- // Helper function to check if shape has dynamic dimensions other than batch dimension
561+ // Helper function to detect if shape contains dynamic dimensions other than the batch dimension
562+ // Plugin-side batch handling can only be applied when batch is the sole dynamic dimension
561563bool hasOtherDynamicDims (const ov::PartialShape& shape) {
562564 for (size_t dim_idx = 1 ; dim_idx < shape.size (); dim_idx++) {
563565 if (shape[dim_idx].is_dynamic ()) {
@@ -712,6 +714,69 @@ void deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
712714 model->reshape (newShapes);
713715}
714716
717+ void Plugin::encodeDynamicBatchInfo (std::shared_ptr<ov::Model> model) const {
718+ const std::string suffix = intel_npu::utils::DYNBATCH_SUFFIX;
719+
720+ // Encode info in input tensor names
721+ for (auto & input : model->inputs ()) {
722+ const std::string originalName = input.get_any_name ();
723+ input.get_tensor ().set_names ({originalName, originalName + suffix});
724+ }
725+ // Encode info in output tensor names
726+ for (auto & output : model->outputs ()) {
727+ const std::string originalName = output.get_any_name ();
728+ output.get_tensor ().set_names ({originalName, originalName + suffix});
729+ }
730+ }
731+
732+ void Plugin::handleDynamicBatching (std::shared_ptr<ov::Model>& modelForCompilation,
733+ Config& localConfig,
734+ const std::function<void (ov::intel_npu::BatchMode)>& updateBatchMode) const {
735+ // Avoiding risks with static models. TODO: common solution.
736+ if (!modelForCompilation->is_dynamic ()) {
737+ return ;
738+ }
739+
740+ const auto batchMode = localConfig.get <BATCH_MODE>();
741+ const bool isAutoOrPluginBatch =
742+ (batchMode == ov::intel_npu::BatchMode::PLUGIN || batchMode == ov::intel_npu::BatchMode::AUTO);
743+
744+ try {
745+ const bool pluginBatchingIsSupported = validateModelBatch (modelForCompilation, _logger);
746+
747+ if (!isAutoOrPluginBatch || !pluginBatchingIsSupported) {
748+ _logger.info (" Batching will be handled by compiler." );
749+ updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
750+ return ;
751+ }
752+
753+ _logger.info (" Attempting to handle batching on the plugin side." );
754+
755+ // Preserve dynamic batch metadata by encoding it in tensor names
756+ // Avoids introducing new metadata fields by leveraging existing naming system
757+ encodeDynamicBatchInfo (modelForCompilation);
758+
759+ try {
760+ ov::set_batch (modelForCompilation, ov::Dimension (1 ));
761+ } catch (const std::exception& ex) {
762+ _logger.warning (" The plugin couldn't resize a batched model due to exception: %s.\n "
763+ " Trying to debatch it..." ,
764+ ex.what ());
765+
766+ deBatchModel (modelForCompilation, ov::Dimension (1 ));
767+ if (!modelForCompilation) {
768+ OPENVINO_THROW (" Cannot debatch a model" );
769+ }
770+ _logger.info (" The model has been debatched successfully" );
771+ }
772+ updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
773+ } catch (const std::exception& ex) {
774+ _logger.info (" Couldn't validate and reshape the model. Batching will be handled by compiler. Error: %s" ,
775+ ex.what ());
776+ updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
777+ }
778+ }
779+
715780std::shared_ptr<ov::ICompiledModel> Plugin::compile_model (const std::shared_ptr<const ov::Model>& model,
716781 const ov::AnyMap& properties) const {
717782 OV_ITT_SCOPED_TASK (itt::domains::NPUPlugin, " Plugin::compile_model" );
@@ -770,71 +835,22 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
770835 localConfig.update ({{ov::intel_npu::batch_mode.name (), strStream.str ()}});
771836 };
772837
773- if (localConfig.isAvailable (ov::intel_npu::batch_mode.name ()) &&
774- !localConfig.has (ov::intel_npu::batch_mode.name ())) {
775- updateBatchMode (ov::intel_npu::BatchMode::AUTO);
776- }
777-
778- if (localConfig.isAvailable (ov::intel_npu::batch_mode.name ()) && !model->get_variables ().empty ()) {
779- if (localConfig.get <BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
780- OPENVINO_THROW (" This model contains states, thus it is not supported when handling batching on the plugin" );
838+ // Handle batch mode configuration
839+ if (localConfig.isAvailable (ov::intel_npu::batch_mode.name ())) {
840+ // Set default batch mode if not configured
841+ if (!localConfig.has (ov::intel_npu::batch_mode.name ())) {
842+ updateBatchMode (ov::intel_npu::BatchMode::AUTO);
781843 }
782844
783- updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
784- }
785-
786- if (localConfig.isAvailable (ov::intel_npu::batch_mode.name ())) {
787- bool autoOrPluginBatch = localConfig.get <BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN ||
788- localConfig.get <BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO;
789- if (modelForCompilation->is_dynamic ()) { // Avoiding risks with static models. TODO: common solution.
790- try {
791- const bool pluginBatchingIsSupported = validateModelBatch (modelForCompilation, _logger);
792-
793- if (autoOrPluginBatch && pluginBatchingIsSupported) {
794- _logger.info (" Attempting to handle batching on the plugin side." );
795-
796- // Store dynamic batch info in tensor names BEFORE reshaping
797- auto encodeDynamicBatchInfo = [](std::shared_ptr<ov::Model> model) {
798- // Encode info in input tensor names
799- for (auto & input : model->inputs ()) {
800- std::string originalName = input.get_any_name ();
801- std::string newName = originalName + " _DYNBATCH_ORIG" ;
802- input.get_tensor ().set_names ({newName});
803- }
804-
805- // Encode info in output tensor names
806- for (auto & output : model->outputs ()) {
807- std::string originalName = output.get_any_name ();
808- std::string newName = originalName + " _DYNBATCH_ORIG" ;
809- output.get_tensor ().set_names ({newName});
810- }
811- };
812-
813- try {
814- encodeDynamicBatchInfo (modelForCompilation);
815- ov::set_batch (modelForCompilation, ov::Dimension (1 ));
816- updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
817- } catch (const std::exception& ex) {
818- _logger.warning (" The plugin couldn't resize a batched model due to exception: %s.\n "
819- " Trying to debatch it..." ,
820- ex.what ());
821- encodeDynamicBatchInfo (modelForCompilation);
822- deBatchModel (modelForCompilation, ov::Dimension (1 ));
823- if (!modelForCompilation) {
824- OPENVINO_THROW (" Cannot debatch a model" );
825- }
826- _logger.info (" The model has been debatched successfully" );
827- updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
828- }
829- } else {
830- _logger.info (" Batching will be handed by compiler." );
831- updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
832- }
833- } catch (const std::exception& ex) {
834- _logger.info (" Couldn't validate and reshape the model. Batching will be handed by compiler." ,
835- ex.what ());
836- updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
845+ // Handle models with variables (states)
846+ if (!model->get_variables ().empty ()) {
847+ if (localConfig.get <BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
848+ OPENVINO_THROW (
849+ " This model contains states, thus it is not supported when handling batching on the plugin" );
837850 }
851+ updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
852+ } else {
853+ handleDynamicBatching (modelForCompilation, localConfig, updateBatchMode);
838854 }
839855 }
840856
0 commit comments