Fix: local deploy cn (#13)

11zhouxuan · yanbasic · web-flow · commit 911f24b9a7c7 · 2025-02-21T09:33:57.000+08:00
* fix cn region local deploy bug

* fix bugs in download_s5cmd

* add disable_hf_transfer params to Model

* add model_files_download_source

* fix multiple engines selection

* fix: update esc template

---------

Co-authored-by: Yi Yan &lt;yiyanz@amazon.com&gt;
diff --git a/src/emd/models/llms/deepseek.py b/src/emd/models/llms/deepseek.py
@@ -55,7 +55,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
-        # modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -85,7 +85,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -116,7 +116,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -147,7 +147,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -244,7 +244,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py
@@ -78,8 +78,7 @@
         model_id = "Qwen2.5-72B-Instruct-AWQ",
         supported_engines=[
             vllm_qwen2d5_engine064,
-            tgi_qwen2d5_72b_engine064,
-            tgi_qwen2d5_72b_on_inf2
+            tgi_qwen2d5_72b_engine064
         ],
         supported_instances=[
             g5d12xlarge_instance,
diff --git a/src/emd/models/model.py b/src/emd/models/model.py
@@ -10,6 +10,7 @@
     FrameworkType,
     ModelType,
     ModelSeriesType,
+    ModelFilesDownloadSource
     # ModelPrepareMethod
 )
 import boto3
@@ -183,6 +184,7 @@ class Model(ModelBase,Generic[T]):
     # download model files directly from s3
     model_files_s3_path: Union[str,None] = None
     model_files_local_path: Union[str,None] = None
+    model_files_download_source: ModelFilesDownloadSource = ModelFilesDownloadSource.AUTO
     model_series: ModelSeries
     executable_config: Union[ExecutableConfig,None] = None
 
diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py
@@ -131,13 +131,10 @@ class ServiceCode(ConstantBase):
     SAGEMAKER = "sagemaker"
 
 
-# class ModelPrepareMethod(ConstantBase):
-#     UPLOAD_TO_S3 = "upload to s3"
-#     DOANLOWD_FROM_S3 = "download from s3"
-#     IGNORE = "ignore"
-
-
-
+class ModelFilesDownloadSource(ConstantBase):
+    HUGGINGFACE = "huggingface"
+    MODELSCOPE= "modelscope"
+    AUTO = "auto"
 
 class ServiceQuotaCode(ConstantBase):
     G5dXLARGE_ENDPOINT = "L-1928E07B"
diff --git a/src/pipeline/deploy/prepare_model.py b/src/pipeline/deploy/prepare_model.py
@@ -5,7 +5,7 @@
 from huggingface_hub import snapshot_download as hf_snapshot_download
 from modelscope import snapshot_download as ms_snapshot_download
 from emd.models import Model
-from emd.models.utils.constants import ServiceType,EngineType
+from emd.models.utils.constants import ServiceType,EngineType,ModelFilesDownloadSource
 from emd.utils.aws_service_utils import check_cn_region
 from emd.utils.logger_utils import get_logger
 from utils.common import upload_dir_to_s3_by_s5cmd,download_dir_from_s3_by_s5cmd
@@ -110,15 +110,23 @@ def download_model_files(model:Model,model_dir=None):
     if engine_type == EngineType.COMFYUI:
         download_comfyui_model(model,model_dir=model_dir)
     else:
-        if check_cn_region(region):
-            try:
-                download_modelscope_model(model,model_dir=model_dir)
-            except Exception as e:
-                logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}")
-                logger.info("download from huggingface...")
-                download_huggingface_model(model, model_dir=model_dir)
+        if model.model_files_download_source == ModelFilesDownloadSource.AUTO:
+            if check_cn_region(region):
+                try:
+                    download_modelscope_model(model,model_dir=model_dir)
+                except Exception as e:
+                    logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}")
+                    logger.info("download from huggingface...")
+                    download_huggingface_model(model, model_dir=model_dir)
+            else:
+                download_huggingface_model(model,model_dir=model_dir)
         else:
-            download_huggingface_model(model,model_dir=model_dir)
+            if model.model_files_download_source == ModelFilesDownloadSource.HUGGINGFACE:
+                download_huggingface_model(model, model_dir=model_dir)
+            elif model.model_files_download_source == ModelFilesDownloadSource.MODELSCOPE:
+                download_modelscope_model(model, model_dir=model_dir)
+            else:
+                raise ValueError(f"Invalid model_files_download_source: {model.model_files_download_source}")
 
 
 def run(model:Model):#, model_s3_bucket, backend_type, service_type, region,args):
diff --git a/src/pipeline/pipeline.py b/src/pipeline/pipeline.py
@@ -6,6 +6,7 @@
 import json
 import logging
 from concurrent.futures import as_completed,ProcessPoolExecutor
+
 from emd.models import Model
 from emd.constants import MODEL_DEFAULT_TAG,LOCAL_REGION
 from emd.models.utils.constants import FrameworkType,ServiceType,InstanceType