get ready.

sayakpaul · sayakpaul · commit 22b179924af4 · 2025-10-05T18:08:09.000+05:30
diff --git a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
@@ -640,50 +640,6 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
         return components, state
 
 
-class QwenImageEditPlusRoPEInputsStep(QwenImageEditRoPEInputsStep):
-    model_name = "qwenimage-edit-plus"
-    # TODO: Is there a better way to handle this name? It's used in
-    # `QwenImageEditPlusProcessImagesInputStep` as well. We can later
-    # keep these things as a module-level constant.
-    _image_size_output_name = "vae_image_sizes"
-
-    @property
-    def inputs(self) -> List[InputParam]:
-        inputs_list = super().inputs
-        return inputs_list + [
-            InputParam(name=self._image_size_output_name, required=True),
-        ]
-
-    def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -> PipelineState:
-        block_state = self.get_block_state(state)
-        vae_image_sizes = getattr(block_state, self._image_size_output_name)
-        height, width = block_state.image_height, block_state.image_width
-
-        # for edit, image size can be different from the target size (height/width)
-        block_state.img_shapes = [
-            [
-                (1, height // components.vae_scale_factor // 2, width // components.vae_scale_factor // 2),
-                *[
-                    (1, vae_height // components.vae_scale_factor // 2, vae_width // components.vae_scale_factor // 2)
-                    for vae_width, vae_height in vae_image_sizes
-                ],
-            ]
-        ] * block_state.batch_size
-
-        block_state.txt_seq_lens = (
-            block_state.prompt_embeds_mask.sum(dim=1).tolist() if block_state.prompt_embeds_mask is not None else None
-        )
-        block_state.negative_txt_seq_lens = (
-            block_state.negative_prompt_embeds_mask.sum(dim=1).tolist()
-            if block_state.negative_prompt_embeds_mask is not None
-            else None
-        )
-
-        self.set_block_state(state, block_state)
-
-        return components, state
-
-
 ## ControlNet inputs for denoiser
 class QwenImageControlNetBeforeDenoiserStep(ModularPipelineBlocks):
     model_name = "qwenimage"
diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Union
 
 import PIL
 import torch
@@ -855,12 +855,6 @@ def description(self) -> str:
     def inputs(self) -> List[InputParam]:
         return [InputParam("vae_image"), InputParam("image"), InputParam("height"), InputParam("width")]
 
-    @property
-    def intermediate_outputs(self) -> List[OutputParam]:
-        return super().intermediate_outputs + [
-            OutputParam(name="vae_image_sizes", type_hint=List[Tuple[int, int]]),
-        ]
-
     @torch.no_grad()
     def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
         block_state = self.get_block_state(state)
@@ -879,18 +873,11 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
                 image=image, height=height, width=width
             )
         else:
-            vae_image_sizes = []
+            width, height = block_state.vae_image[0].size
             image = block_state.vae_image
-            for img in image:
-                width, height = img.size
-                vae_width, vae_height, _ = calculate_dimensions(self.vae_image_size, width / height)
-                vae_image_sizes.append((vae_width, vae_height))
-
-            block_state.vae_image_sizes = vae_image_sizes
 
-            width, height = block_state.vae_image[0].size
             block_state.processed_image = components.image_processor.preprocess(
-                image=image, height=vae_height, width=vae_width
+                image=image, height=height, width=width
             )
 
         self.set_block_state(state, block_state)
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py
@@ -18,7 +18,6 @@
 from .before_denoise import (
     QwenImageControlNetBeforeDenoiserStep,
     QwenImageCreateMaskLatentsStep,
-    QwenImageEditPlusRoPEInputsStep,
     QwenImageEditRoPEInputsStep,
     QwenImagePrepareLatentsStep,
     QwenImagePrepareLatentsWithStrengthStep,
@@ -929,41 +928,17 @@ def description(self) -> str:
         ("input", QwenImageEditInputStep()),
         ("prepare_latents", QwenImagePrepareLatentsStep()),
         ("set_timesteps", QwenImageSetTimestepsStep()),
-        ("prepare_rope_inputs", QwenImageEditPlusRoPEInputsStep()),
+        ("prepare_rope_inputs", QwenImageEditRoPEInputsStep()),
         ("denoise", QwenImageEditDenoiseStep()),
         ("decode", QwenImageDecodeStep()),
     ]
 )
 
 
-## 3.2 QwenImage-Edit Plus/auto before denoise
-# compose the steps into a BeforeDenoiseStep for edit tasks before combining into an auto step
-
-#### QwenImage-Edit/edit before denoise
-QwenImageEditPlusBeforeDenoiseBlocks = InsertableDict(
-    [
-        ("prepare_latents", QwenImagePrepareLatentsStep()),
-        ("set_timesteps", QwenImageSetTimestepsStep()),
-        # Different from QwenImage Edit.
-        ("prepare_rope_inputs", QwenImageEditPlusRoPEInputsStep()),
-    ]
-)
-
-
-class QwenImageEditPlusBeforeDenoiseStep(SequentialPipelineBlocks):
-    model_name = "qwenimage"
-    block_classes = QwenImageEditPlusBeforeDenoiseBlocks.values()
-    block_names = QwenImageEditPlusBeforeDenoiseBlocks.keys()
-
-    @property
-    def description(self):
-        return "Before denoise step that prepare the inputs (timesteps, latents, rope inputs etc.) for the denoise step for edit task."
-
-
 # auto before_denoise step for edit tasks
 class QwenImageEditPlusAutoBeforeDenoiseStep(AutoPipelineBlocks):
     model_name = "qwenimage-edit-plus"
-    block_classes = [QwenImageEditPlusBeforeDenoiseStep]
+    block_classes = [QwenImageEditBeforeDenoiseStep]
     block_names = ["edit"]
     block_trigger_inputs = ["image_latents"]
 
@@ -977,7 +952,7 @@ def description(self):
         )
 
 
-## 3.3 QwenImage-Edit Plus/auto encoders
+## 3.2 QwenImage-Edit Plus/auto encoders
 
 
 class QwenImageEditPlusAutoVaeEncoderStep(AutoPipelineBlocks):
@@ -997,7 +972,7 @@ def description(self):
         )
 
 
-## 3.4 QwenImage-Edit/auto blocks & presets
+## 3.3 QwenImage-Edit/auto blocks & presets
 
 
 class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):