update

irexyc · irexyc · commit 01c37e26f7d5 · 2025-09-17T03:26:32.000Z
diff --git a/lmdeploy/model.py b/lmdeploy/model.py
@@ -748,7 +748,8 @@ def __init__(self, model_path: str = '', **kwargs):
             if hasattr(self.tokenizer, 'eot_token') and self.tokenizer.eot_token is not None:
                 self.stop_words.append(self.tokenizer.eot_token)
             cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=True)
-            if cfg.architectures[0] == 'GptOssForCausalLM':
+            self.is_gpt_oss = getattr(cfg, 'architectures', [''])[0] == 'GptOssForCausalLM'
+            if self.is_gpt_oss:
                 self.stop_words.append('<|call|>')
         except Exception as e:
             raise ValueError(f'Try apply_chat_template failed: {e}')
@@ -790,6 +791,9 @@ def messages2prompt(self, messages, sequence_start=True, **kwargs):
 
         if messages[-1]['role'] == 'assistant' and len(self.assistant_end) > 0:
             prompt = prompt[:-len(self.assistant_end)]  # prefix of response to let the model complete the response
+        if self.is_gpt_oss and not kwargs.get('tools'):
+            # for gpt-oss model, remove this seems more conducive to instruction following.
+            prompt = prompt.replace('commentary, ', '', 1)
         return prompt
 
     def _role_instruction(self, role):
diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py
@@ -633,9 +633,6 @@ async def _get_prompt_input(self,
             raise ValueError(
                 f'You are using base template to handle chat task. Please specify a `--chat-template` name chosen from `lmdeploy list` if you want to use OpenAI messages input.'  # noqa
             )
-        if self.arch == 'GptOssForCausalLM' and (tools is None or len(tools) == 0):
-            # for gpt-oss model, remove this seems more conducive to instruction following.
-            prompt = prompt.replace('commentary, ', '', 1)
         input_ids = self.tokenizer.encode(prompt, add_bos=sequence_start)
         return {'prompt': prompt, 'input_ids': input_ids}
 

Original file line number	Diff line number	Diff line change
`@@ -633,9 +633,6 @@ async def _get_prompt_input(self,`
`633`	`633`	`raise ValueError(`
`634`	`634`	f'You are using base template to handle chat task. Please specify a `--chat-template` name chosen from `lmdeploy list` if you want to use OpenAI messages input.' # noqa
`635`	`635`	`)`
`636`		`- if self.arch == 'GptOssForCausalLM' and (tools is None or len(tools) == 0):`
`637`		`- # for gpt-oss model, remove this seems more conducive to instruction following.`
`638`		`- prompt = prompt.replace('commentary, ', '', 1)`
`639`	`636`	`input_ids = self.tokenizer.encode(prompt, add_bos=sequence_start)`
`640`	`637`	`return {'prompt': prompt, 'input_ids': input_ids}`
`641`	`638`