From aeae914521005c5f7fc98453b0b9b98f6c67d8b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=AE=87?= <506610466@qq.com> Date: Tue, 16 Sep 2025 23:25:10 +0800 Subject: [PATCH] Optimize AsyncEngine generation method --- lmdeploy/serve/async_engine.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py index 53baca50f8..f48be2d98e 100644 --- a/lmdeploy/serve/async_engine.py +++ b/lmdeploy/serve/async_engine.py @@ -628,7 +628,8 @@ async def _get_prompt_input(self, sequence_start, tools=tools, enable_thinking=enable_thinking, - reasoning_effort=reasoning_effort) + reasoning_effort=reasoning_effort, + **kwargs) if prompt is None: raise ValueError( f'You are using base template to handle chat task. Please specify a `--chat-template` name chosen from `lmdeploy list` if you want to use OpenAI messages input.' # noqa @@ -736,7 +737,8 @@ async def generate( adapter_name, tools=tools, reasoning_effort=reasoning_effort, - enable_thinking=enable_thinking) + enable_thinking=enable_thinking, + **kwargs) prompt = prompt_input['prompt'] input_ids = prompt_input['input_ids'] self.request_logger.log_inputs(session_id=session_id,