Skip to content

Commit 2eb3030

Browse files
committed
fix
Signed-off-by: junq <[email protected]>
1 parent fdfacbc commit 2eb3030

File tree

1 file changed

+0
-12
lines changed

1 file changed

+0
-12
lines changed

tensorrt_llm/_torch/pyexecutor/model_engine.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -750,18 +750,6 @@ def disable_optimization(backend: Backend):
750750
if bs > self.batch_size:
751751
# skip batch size larger than self.batch_size
752752
continue
753-
with release_batch(get_cuda_graph_warmup_request(bs)) as batch:
754-
if batch is None:
755-
# No KV cache space!
756-
return
757-
logger.info(
758-
f"Run generation only CUDA graph warmup for batch size={bs}"
759-
)
760-
self.cuda_graph_model_engine.execute(
761-
batch,
762-
new_tensors_device=None,
763-
resource_manager=resource_manager)
764-
torch.cuda.synchronize()
765753

766754
for draft_len in draft_lengths:
767755
with release_batch(

0 commit comments

Comments
 (0)