aws-samples · AoyuQC · Feb 21, 2025 · Feb 19, 2025 · Feb 19, 2025 · Feb 19, 2025
diff --git a/src/emd/models/llms/deepseek.py b/src/emd/models/llms/deepseek.py
@@ -55,7 +55,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
-        # modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -85,7 +85,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -116,7 +116,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -147,7 +147,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -244,7 +244,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",

diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py
@@ -78,8 +78,7 @@
         model_id = "Qwen2.5-72B-Instruct-AWQ",
         supported_engines=[
             vllm_qwen2d5_engine064,
-            tgi_qwen2d5_72b_engine064,
-            tgi_qwen2d5_72b_on_inf2
+            tgi_qwen2d5_72b_engine064
         ],
         supported_instances=[
             g5d12xlarge_instance,
@@ -108,35 +107,35 @@
     )
 )
 
-Model.register(
-    dict(
-        model_id = "Qwen2.5-72B-Instruct-AWQ-inf2",
-        supported_engines=[
-            tgi_qwen2d5_72b_on_inf2
-        ],
-        supported_instances=[
-            inf2d24xlarge_instance,
-            local_instance
-        ],
-        supported_services=[
-            sagemaker_service,
-            sagemaker_async_service,
-            ecs_service,
-            local_service
-        ],
-        supported_frameworks=[
-            fastapi_framework
-        ],
-        allow_china_region=True,
-        huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
-        modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
-        require_huggingface_token=False,
-        application_scenario="Agent, tool use, translation, summary",
-        description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
-        model_type=ModelType.LLM,
-        model_series=QWEN2D5_SERIES
-    )
-)
+# Model.register(
+#     dict(
+#         model_id = "Qwen2.5-72B-Instruct-AWQ-inf2",
+#         supported_engines=[
+#             tgi_qwen2d5_72b_on_inf2
+#         ],
+#         supported_instances=[
+#             inf2d24xlarge_instance,
+#             local_instance
+#         ],
+#         supported_services=[
+#             sagemaker_service,
+#             sagemaker_async_service,
+#             ecs_service,
+#             local_service
+#         ],
+#         supported_frameworks=[
+#             fastapi_framework
+#         ],
+#         allow_china_region=True,
+#         huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
+#         modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
+#         require_huggingface_token=False,
+#         application_scenario="Agent, tool use, translation, summary",
+#         description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
+#         model_type=ModelType.LLM,
+#         model_series=QWEN2D5_SERIES
+#     )
+# )
 
 
 Model.register(
@@ -230,33 +229,33 @@
     )
 )
 
-Model.register(
-    dict(
-        model_id = "Qwen2.5-32B-Instruct-inf2",
-        supported_engines=[tgi_qwen2d5_72b_on_inf2],
-        supported_instances=[
-            inf2d24xlarge_instance,
-            local_instance
-        ],
-        supported_services=[
-            sagemaker_service,
-            sagemaker_async_service,
-            ecs_service,
-            local_service
-        ],
-        supported_frameworks=[
-            fastapi_framework
-        ],
-        allow_china_region=True,
-        huggingface_model_id="Qwen/Qwen2.5-32B-Instruct",
-        modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
-        require_huggingface_token=False,
-        application_scenario="Agent, tool use, translation, summary",
-        description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
-        model_type=ModelType.LLM,
-        model_series=QWEN2D5_SERIES
-    )
-)
+# Model.register(
+#     dict(
+#         model_id = "Qwen2.5-32B-Instruct-inf2",
+#         supported_engines=[tgi_qwen2d5_72b_on_inf2],
+#         supported_instances=[
+#             inf2d24xlarge_instance,
+#             local_instance
+#         ],
+#         supported_services=[
+#             sagemaker_service,
+#             sagemaker_async_service,
+#             ecs_service,
+#             local_service
+#         ],
+#         supported_frameworks=[
+#             fastapi_framework
+#         ],
+#         allow_china_region=True,
+#         huggingface_model_id="Qwen/Qwen2.5-32B-Instruct",
+#         modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
+#         require_huggingface_token=False,
+#         application_scenario="Agent, tool use, translation, summary",
+#         description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
+#         model_type=ModelType.LLM,
+#         model_series=QWEN2D5_SERIES
+#     )
+# )
 
 Model.register(
     dict(

diff --git a/src/emd/models/model.py b/src/emd/models/model.py
@@ -10,6 +10,7 @@
     FrameworkType,
     ModelType,
     ModelSeriesType,
+    ModelFilesDownloadSource
     # ModelPrepareMethod
 )
 import boto3
@@ -183,6 +184,7 @@ class Model(ModelBase,Generic[T]):
     # download model files directly from s3
     model_files_s3_path: Union[str,None] = None
     model_files_local_path: Union[str,None] = None
+    model_files_download_source: ModelFilesDownloadSource = ModelFilesDownloadSource.AUTO
     model_series: ModelSeries
     executable_config: Union[ExecutableConfig,None] = None
 

diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py
@@ -131,13 +131,10 @@ class ServiceCode(ConstantBase):
     SAGEMAKER = "sagemaker"
 
 
-# class ModelPrepareMethod(ConstantBase):
-#     UPLOAD_TO_S3 = "upload to s3"
-#     DOANLOWD_FROM_S3 = "download from s3"
-#     IGNORE = "ignore"
-
-
-
+class ModelFilesDownloadSource(ConstantBase):
+    HUGGINGFACE = "huggingface"
+    MODELSCOPE= "modelscope"
+    AUTO = "auto"
 
 class ServiceQuotaCode(ConstantBase):
     G5dXLARGE_ENDPOINT = "L-1928E07B"

diff --git a/src/pipeline/deploy/prepare_model.py b/src/pipeline/deploy/prepare_model.py
@@ -5,7 +5,7 @@
 from huggingface_hub import snapshot_download as hf_snapshot_download
 from modelscope import snapshot_download as ms_snapshot_download
 from emd.models import Model
-from emd.models.utils.constants import ServiceType,EngineType
+from emd.models.utils.constants import ServiceType,EngineType,ModelFilesDownloadSource
 from emd.utils.aws_service_utils import check_cn_region
 from emd.utils.logger_utils import get_logger
 from utils.common import upload_dir_to_s3_by_s5cmd,download_dir_from_s3_by_s5cmd
@@ -110,15 +110,23 @@ def download_model_files(model:Model,model_dir=None):
     if engine_type == EngineType.COMFYUI:
         download_comfyui_model(model,model_dir=model_dir)
     else:
-        if check_cn_region(region):
-            try:
-                download_modelscope_model(model,model_dir=model_dir)
-            except Exception as e:
-                logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}")
-                logger.info("download from huggingface...")
-                download_huggingface_model(model, model_dir=model_dir)
+        if model.model_files_download_source == ModelFilesDownloadSource.AUTO:
+            if check_cn_region(region):
+                try:
+                    download_modelscope_model(model,model_dir=model_dir)
+                except Exception as e:
+                    logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}")
+                    logger.info("download from huggingface...")
+                    download_huggingface_model(model, model_dir=model_dir)
+            else:
+                download_huggingface_model(model,model_dir=model_dir)
         else:
-            download_huggingface_model(model,model_dir=model_dir)
+            if model.model_files_download_source == ModelFilesDownloadSource.HUGGINGFACE:
+                download_huggingface_model(model, model_dir=model_dir)
+            elif model.model_files_download_source == ModelFilesDownloadSource.MODELSCOPE:
+                download_modelscope_model(model, model_dir=model_dir)
+            else:
+                raise ValueError(f"Invalid model_files_download_source: {model.model_files_download_source}")
 
 
 def run(model:Model):#, model_s3_bucket, backend_type, service_type, region,args):

diff --git a/src/pipeline/pipeline.py b/src/pipeline/pipeline.py
@@ -6,6 +6,7 @@
 import json
 import logging
 from concurrent.futures import as_completed,ProcessPoolExecutor
+
 from emd.models import Model
 from emd.constants import MODEL_DEFAULT_TAG,LOCAL_REGION
 from emd.models.utils.constants import FrameworkType,ServiceType,InstanceType