Skip to content
This repository was archived by the owner on Sep 20, 2025. It is now read-only.

Commit 232f601

Browse files
authored
fix: modify baichuan m1 model config (#40)
* Remove forced checking of aws environment during local deployment * fix bug in baichuan m1 model * modify deploy_time_test
1 parent 89ec61f commit 232f601

File tree

6 files changed

+137
-66
lines changed

6 files changed

+137
-66
lines changed

src/emd/models/engines.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@ class OpenAICompitableEngine(Engine):
99
default_cli_args: str = ""
1010
custom_gpu_num: Union[int,None] = None
1111

12+
1213
class VllmEngine(OpenAICompitableEngine):
1314
pass
1415

16+
1517
class LMdeployEngine(OpenAICompitableEngine):
1618
pass
1719

20+
1821
class TgiEngine(OpenAICompitableEngine):
1922
support_inf2_instance:bool = True
2023
compile_to_neuron:bool = False
@@ -286,13 +289,14 @@ class ComfyuiEngine(Engine):
286289
)
287290

288291

289-
vllm_baichuan_engine071 = VllmEngine(**{
292+
vllm_M1_14B_engine066 = VllmEngine(**{
290293
"engine_type":EngineType.VLLM,
291294
"engine_dockerfile_config": {"VERSION":"v0.6.6-baichuan-m1"},
292295
"engine_cls":"vllm.vllm_backend.VLLMBackend",
293296
"base_image_host":"public.ecr.aws",
294297
"use_public_ecr":True,
295298
"docker_login_region":"us-east-1",
299+
"custom_gpu_num":2,
296300
"default_cli_args": " --disable-log-stats --trust-remote-code"
297301
})
298302

@@ -351,16 +355,6 @@ class ComfyuiEngine(Engine):
351355
})
352356

353357

354-
# huggingface_llm_engine_4d47d0 = HuggingFaceLLMEngine(**{
355-
# "engine_type":EngineType.HUGGINGFACE,
356-
# "engine_cls":"huggingface.llm.transformer_llm_backend.TransformerLLMBackend",
357-
# "python_name":"python3",
358-
# "base_image_host":"public.ecr.aws",
359-
# "use_public_ecr":True,
360-
# "docker_login_region":"us-east-1",
361-
# "engine_dockerfile_config": {"VERSION":"4.47.0"},
362-
# })
363-
364358
comfyui_engine = ComfyuiEngine(**{
365359
"engine_type":EngineType.COMFYUI,
366360
"engine_cls":"comfyui.comfyui_backend.ComfyUIBackend",

src/emd/models/llms/baichuan.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from .. import Model
22
from ..engines import (
33
huggingface_baichuan_engine_4d41d2,
4-
vllm_baichuan_engine071
4+
vllm_M1_14B_engine066
55
)
66
from ..services import (
77
sagemaker_service,
@@ -31,7 +31,7 @@
3131
Model.register(
3232
dict(
3333
model_id = "Baichuan-M1-14B-Instruct",
34-
supported_engines=[vllm_baichuan_engine071,huggingface_baichuan_engine_4d41d2],
34+
supported_engines=[vllm_M1_14B_engine066,huggingface_baichuan_engine_4d41d2],
3535
supported_instances=[
3636
g5d12xlarge_instance,
3737
g5d24xlarge_instance,
@@ -47,7 +47,7 @@
4747
supported_frameworks=[
4848
fastapi_framework
4949
],
50-
allow_china_region=False,
50+
allow_china_region=True,
5151
huggingface_model_id="baichuan-inc/Baichuan-M1-14B-Instruct",
5252
# modelscope_model_id="Qwen/QwQ-32B-Preview",
5353
require_huggingface_token=False,

src/pipeline/deploy/prepare_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def run(model:Model):#, model_s3_bucket, backend_type, service_type, region,args
135135
service_type = model.executable_config.current_service.service_type
136136
engine_type = model.executable_config.current_engine.engine_type
137137
model_s3_bucket = model.executable_config.model_s3_bucket
138+
logger.info(f"need_prepare_model: {need_prepare_model}, model_files_s3_path: {model_files_s3_path}, service_type: {service_type}, engine_type: {engine_type}, model_s3_bucket: {model_s3_bucket}")
138139
# if args.service_type == ServiceType.LOCAL or (args.model.need_prepare_model and not args.skip_prepare_model):
139140
if service_type == ServiceType.LOCAL or (need_prepare_model and model_files_s3_path is None):
140141
if engine_type == EngineType.OLLAMA:

src/pipeline/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
boto3
88
huggingface_hub
9+
hf_transfer
910
openai
1011
jinja2
1112
modelscope

tests/batch_deploy_test.py

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from emd.models.utils.constants import ModelType
55
import traceback
66
from emd.utils.logger_utils import get_logger
7-
7+
import time
88

99
logger = get_logger(__name__)
1010

@@ -15,6 +15,8 @@ class DeployConfig(BaseModel):
1515
service_type:str
1616
framework_type:str
1717
model_tag:str
18+
extra_params: dict
19+
1820

1921

2022
class InvokeConfig(BaseModel):
@@ -26,6 +28,7 @@ class Task(BaseModel):
2628
invoke_config: InvokeConfig
2729

2830

31+
2932
def deploy(task:Task):
3033
model_id = task.deploy_config.model_id
3134
print("=="*50 + f"deploy: {model_id}" + "=="*50)
@@ -35,7 +38,8 @@ def deploy(task:Task):
3538
engine_type=task.deploy_config.engine_type,
3639
service_type=task.deploy_config.service_type,
3740
framework_type=task.deploy_config.framework_type,
38-
model_tag=task.deploy_config.model_tag
41+
model_tag=task.deploy_config.model_tag,
42+
extra_params=task.deploy_config.extra_params
3943
)
4044

4145
def invoke(task:Task):
@@ -87,16 +91,28 @@ def destroy(task:Task):
8791
)
8892

8993
def test_one_task(task:Task):
94+
print(f"task: \n{task.model_dump()}")
9095
model_id = task.deploy_config.model_id
9196
ret = {
9297
"code":0,
9398
"task":task,
94-
"error":0
99+
"error":"",
100+
"deploy_time":None,
101+
"invoke_time":None,
102+
"destroy_time":None
95103
}
96104
try:
105+
t0 = time.time()
97106
deploy(task)
107+
t1 = time.time()
108+
ret['deploy_time'] = t1-t0
98109
invoke(task)
110+
t2 = time.time()
111+
ret['invoke_time'] = t2-t1
99112
destroy(task)
113+
t3 = time.time()
114+
ret['destroy_time'] = t3-t2
115+
100116
logger.info(f"task: {model_id} success")
101117
except Exception as e:
102118
error = traceback.format_exc()
@@ -109,7 +125,18 @@ def test_one_task(task:Task):
109125
error = traceback.format_exc()
110126
logger.error(f"task: {model_id} destroy failed:\n{error}")
111127

112-
128+
result = f"""\
129+
<deploy_test_result>
130+
<model_id>{model_id}</model_id>
131+
<test_code>{ret['code']}</test_code>
132+
<test_error>{ret['error']}</test_error>
133+
<deploy_time>{ret['deploy_time']}</deploy_time>
134+
<invoke_time>{ret['invoke_time']}</invoke_time>
135+
<destroy_time>{ret['destroy_time']}</destroy_time>
136+
</deploy_test_result>
137+
"""
138+
logger.info(f"task: {model_id} test result:\n{result}")
139+
ret['summary'] = result
113140
return ret
114141

115142

@@ -178,7 +205,17 @@ def test_one_task(task:Task):
178205
for ret in test_ret:
179206
task = ret['task']
180207
model_id = task.deploy_config.model_id
181-
print(f"model_id: {model_id}\ntest code:{ret['code']}\nerror:{ret['error']}")
208+
result = f"""\
209+
<deploy_test_result>
210+
<model_id>{model_id}</model_id>
211+
<test_code>{ret['code']}</test_code>
212+
<test_error>{ret['error']}</test_error>
213+
<deploy_time>{ret['deploy_time']}</deploy_time>
214+
<invoke_time>{ret['invoke_time']}</invoke_time>
215+
<destroy_time>{ret['destroy_time']}</destroy_time>
216+
</deploy_test_result>
217+
"""
218+
# print(f"<model_id: {model_id}\ntest code:{ret['code']}\nerror:{ret['error']}")
182219
print("=="*50)
183220

184221
if all([ret['code'] == 0 for ret in test_ret]):

0 commit comments

Comments
 (0)