fix

QiJune · QiJune · commit 2eb30309c314 · 2025-08-11T13:36:47.000+08:00
Signed-off-by: junq &lt;22017000+QiJune@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/model_engine.py b/tensorrt_llm/_torch/pyexecutor/model_engine.py
@@ -750,18 +750,6 @@ def disable_optimization(backend: Backend):
                 if bs > self.batch_size:
                     # skip batch size larger than self.batch_size
                     continue
-                with release_batch(get_cuda_graph_warmup_request(bs)) as batch:
-                    if batch is None:
-                        # No KV cache space!
-                        return
-                    logger.info(
-                        f"Run generation only CUDA graph warmup for batch size={bs}"
-                    )
-                    self.cuda_graph_model_engine.execute(
-                        batch,
-                        new_tensors_device=None,
-                        resource_manager=resource_manager)
-                    torch.cuda.synchronize()
 
                 for draft_len in draft_lengths:
                     with release_batch(