Skip to content
This repository was archived by the owner on Sep 20, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/emd/models/llms/deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
],
allow_china_region=True,
huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
# modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
require_huggingface_token=False,
application_scenario="Agent, tool use, translation, summary",
description="The latest series of DeepSeek LLMs for reasoning",
Expand Down Expand Up @@ -85,7 +85,7 @@
],
allow_china_region=True,
huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
# modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
require_huggingface_token=False,
application_scenario="Agent, tool use, translation, summary",
description="The latest series of DeepSeek LLMs for reasoning",
Expand Down Expand Up @@ -116,7 +116,7 @@
],
allow_china_region=True,
huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
# modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
require_huggingface_token=False,
application_scenario="Agent, tool use, translation, summary",
description="The latest series of DeepSeek LLMs for reasoning",
Expand Down Expand Up @@ -147,7 +147,7 @@
],
allow_china_region=True,
huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
# modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
require_huggingface_token=False,
application_scenario="Agent, tool use, translation, summary",
description="The latest series of DeepSeek LLMs for reasoning",
Expand Down Expand Up @@ -244,7 +244,7 @@
],
allow_china_region=True,
huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
# modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
require_huggingface_token=False,
application_scenario="Agent, tool use, translation, summary",
description="The latest series of DeepSeek LLMs for reasoning",
Expand Down
115 changes: 57 additions & 58 deletions src/emd/models/llms/qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,7 @@
model_id = "Qwen2.5-72B-Instruct-AWQ",
supported_engines=[
vllm_qwen2d5_engine064,
tgi_qwen2d5_72b_engine064,
tgi_qwen2d5_72b_on_inf2
tgi_qwen2d5_72b_engine064
],
supported_instances=[
g5d12xlarge_instance,
Expand Down Expand Up @@ -108,35 +107,35 @@
)
)

Model.register(
dict(
model_id = "Qwen2.5-72B-Instruct-AWQ-inf2",
supported_engines=[
tgi_qwen2d5_72b_on_inf2
],
supported_instances=[
inf2d24xlarge_instance,
local_instance
],
supported_services=[
sagemaker_service,
sagemaker_async_service,
ecs_service,
local_service
],
supported_frameworks=[
fastapi_framework
],
allow_china_region=True,
huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
require_huggingface_token=False,
application_scenario="Agent, tool use, translation, summary",
description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
model_type=ModelType.LLM,
model_series=QWEN2D5_SERIES
)
)
# Model.register(
# dict(
# model_id = "Qwen2.5-72B-Instruct-AWQ-inf2",
# supported_engines=[
# tgi_qwen2d5_72b_on_inf2
# ],
# supported_instances=[
# inf2d24xlarge_instance,
# local_instance
# ],
# supported_services=[
# sagemaker_service,
# sagemaker_async_service,
# ecs_service,
# local_service
# ],
# supported_frameworks=[
# fastapi_framework
# ],
# allow_china_region=True,
# huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
# modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
# require_huggingface_token=False,
# application_scenario="Agent, tool use, translation, summary",
# description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
# model_type=ModelType.LLM,
# model_series=QWEN2D5_SERIES
# )
# )


Model.register(
Expand Down Expand Up @@ -230,33 +229,33 @@
)
)

Model.register(
dict(
model_id = "Qwen2.5-32B-Instruct-inf2",
supported_engines=[tgi_qwen2d5_72b_on_inf2],
supported_instances=[
inf2d24xlarge_instance,
local_instance
],
supported_services=[
sagemaker_service,
sagemaker_async_service,
ecs_service,
local_service
],
supported_frameworks=[
fastapi_framework
],
allow_china_region=True,
huggingface_model_id="Qwen/Qwen2.5-32B-Instruct",
modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
require_huggingface_token=False,
application_scenario="Agent, tool use, translation, summary",
description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
model_type=ModelType.LLM,
model_series=QWEN2D5_SERIES
)
)
# Model.register(
# dict(
# model_id = "Qwen2.5-32B-Instruct-inf2",
# supported_engines=[tgi_qwen2d5_72b_on_inf2],
# supported_instances=[
# inf2d24xlarge_instance,
# local_instance
# ],
# supported_services=[
# sagemaker_service,
# sagemaker_async_service,
# ecs_service,
# local_service
# ],
# supported_frameworks=[
# fastapi_framework
# ],
# allow_china_region=True,
# huggingface_model_id="Qwen/Qwen2.5-32B-Instruct",
# modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
# require_huggingface_token=False,
# application_scenario="Agent, tool use, translation, summary",
# description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
# model_type=ModelType.LLM,
# model_series=QWEN2D5_SERIES
# )
# )

Model.register(
dict(
Expand Down
2 changes: 2 additions & 0 deletions src/emd/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
FrameworkType,
ModelType,
ModelSeriesType,
ModelFilesDownloadSource
# ModelPrepareMethod
)
import boto3
Expand Down Expand Up @@ -183,6 +184,7 @@ class Model(ModelBase,Generic[T]):
# download model files directly from s3
model_files_s3_path: Union[str,None] = None
model_files_local_path: Union[str,None] = None
model_files_download_source: ModelFilesDownloadSource = ModelFilesDownloadSource.AUTO
model_series: ModelSeries
executable_config: Union[ExecutableConfig,None] = None

Expand Down
11 changes: 4 additions & 7 deletions src/emd/models/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,10 @@ class ServiceCode(ConstantBase):
SAGEMAKER = "sagemaker"


# class ModelPrepareMethod(ConstantBase):
# UPLOAD_TO_S3 = "upload to s3"
# DOANLOWD_FROM_S3 = "download from s3"
# IGNORE = "ignore"



class ModelFilesDownloadSource(ConstantBase):
HUGGINGFACE = "huggingface"
MODELSCOPE= "modelscope"
AUTO = "auto"

class ServiceQuotaCode(ConstantBase):
G5dXLARGE_ENDPOINT = "L-1928E07B"
Expand Down
26 changes: 17 additions & 9 deletions src/pipeline/deploy/prepare_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from huggingface_hub import snapshot_download as hf_snapshot_download
from modelscope import snapshot_download as ms_snapshot_download
from emd.models import Model
from emd.models.utils.constants import ServiceType,EngineType
from emd.models.utils.constants import ServiceType,EngineType,ModelFilesDownloadSource
from emd.utils.aws_service_utils import check_cn_region
from emd.utils.logger_utils import get_logger
from utils.common import upload_dir_to_s3_by_s5cmd,download_dir_from_s3_by_s5cmd
Expand Down Expand Up @@ -110,15 +110,23 @@ def download_model_files(model:Model,model_dir=None):
if engine_type == EngineType.COMFYUI:
download_comfyui_model(model,model_dir=model_dir)
else:
if check_cn_region(region):
try:
download_modelscope_model(model,model_dir=model_dir)
except Exception as e:
logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}")
logger.info("download from huggingface...")
download_huggingface_model(model, model_dir=model_dir)
if model.model_files_download_source == ModelFilesDownloadSource.AUTO:
if check_cn_region(region):
try:
download_modelscope_model(model,model_dir=model_dir)
except Exception as e:
logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}")
logger.info("download from huggingface...")
download_huggingface_model(model, model_dir=model_dir)
else:
download_huggingface_model(model,model_dir=model_dir)
else:
download_huggingface_model(model,model_dir=model_dir)
if model.model_files_download_source == ModelFilesDownloadSource.HUGGINGFACE:
download_huggingface_model(model, model_dir=model_dir)
elif model.model_files_download_source == ModelFilesDownloadSource.MODELSCOPE:
download_modelscope_model(model, model_dir=model_dir)
else:
raise ValueError(f"Invalid model_files_download_source: {model.model_files_download_source}")


def run(model:Model):#, model_s3_bucket, backend_type, service_type, region,args):
Expand Down
1 change: 1 addition & 0 deletions src/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import logging
from concurrent.futures import as_completed,ProcessPoolExecutor

from emd.models import Model
from emd.constants import MODEL_DEFAULT_TAG,LOCAL_REGION
from emd.models.utils.constants import FrameworkType,ServiceType,InstanceType
Expand Down