Skip to content
This repository was archived by the owner on Sep 20, 2025. It is now read-only.

Commit d72b12b

Browse files
committed
modify qwq-32b deploy
1 parent 2845bc3 commit d72b12b

File tree

4 files changed

+15
-5
lines changed

4 files changed

+15
-5
lines changed

src/emd/models/engines.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,13 @@ class KtransformersEngine(OpenAICompitableEngine):
135135
"default_cli_args": " --chat-template emd/models/chat_templates/qwq_32b_add_prefill_chat_template.jinja --max_model_len 16000 --max_num_seq 10 --disable-log-stats --enable-auto-tool-choice --tool-call-parser hermes"
136136
})
137137

138+
vllm_qwq_engine082 = VllmEngine(**{
139+
**vllm_qwen25vl72b_engine073.model_dump(),
140+
"engine_dockerfile_config": {"VERSION":"v0.8.2"},
141+
"environment_variables": "export VLLM_ATTENTION_BACKEND=FLASHINFER && export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True",
142+
"default_cli_args": " --chat-template emd/models/chat_templates/qwq_32b_add_prefill_chat_template.jinja --max_model_len 16000 --max_num_seq 10 --disable-log-stats --enable-auto-tool-choice --tool-call-parser hermes --enable-reasoning --reasoning-parser deepseek_r1"
143+
})
144+
138145

139146
vllm_internvl2d5_76b_engine064 = VllmEngine(**{
140147
**vllm_engine064.model_dump(),

src/emd/models/llms/qwen.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
tgi_qwen2d5_on_inf2,
88
tgi_qwen2d5_72b_on_inf2,
99
vllm_qwen2d5_72b_engine064,
10-
vllm_qwq_engine073
10+
vllm_qwq_engine073,
11+
vllm_qwq_engine082
1112
)
1213
from ..services import (
1314
sagemaker_service,
@@ -471,7 +472,7 @@
471472
Model.register(
472473
dict(
473474
model_id = "QwQ-32B",
474-
supported_engines=[vllm_qwq_engine073],
475+
supported_engines=[vllm_qwq_engine082],
475476
supported_instances=[
476477
g5d12xlarge_instance,
477478
g5d24xlarge_instance,

src/emd/models/services.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
"ServiceType":"service_type",
9292
"EngineType":"engine_type",
9393
"Region": "region",
94+
"DesiredCapacity": "desired_capacity",
9495
"ContainerCpu": "container_cpu",
9596
"ContainerMemory": "container_memory",
9697
"ContainerGpu":"instance_gpu_num"

src/pipeline/backend/backend.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,10 @@ def start_server(self, server_start_command):
134134
logger.info(f"Starting {self.engine_type} server with command: {server_start_command}")
135135
t = threading.Thread(target=os.system,args=(server_start_command,),daemon=True)
136136
t.start()
137-
t2 = threading.Thread(target=self.check_model_serve_ready,args=(t, "127.0.0.1", self.server_port),daemon=True)
138-
t2.start()
139-
t2.join()
137+
self.check_model_serve_ready(t, "127.0.0.1", self.server_port)
138+
logger.info(f"Server started successfully.")
139+
# t2.start()
140+
# t2.join()
140141
return
141142

142143

0 commit comments

Comments
 (0)