From afe6676112398ecb9372998b5f45f1447e536980 Mon Sep 17 00:00:00 2001
From: zhouxss <zhouxss@amazon.com>
Date: Wed, 19 Feb 2025 08:27:19 +0000
Subject: [PATCH 1/6] fix cn region local deploy bug

---
 src/dmaa/models/llms/deepseek.py            | 10 +++----
 src/pipeline/deploy/build_and_push_image.py | 29 ++++++++++++++++-----
 2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/src/dmaa/models/llms/deepseek.py b/src/dmaa/models/llms/deepseek.py
index 784c33a4..047c8fb4 100644
--- a/src/dmaa/models/llms/deepseek.py
+++ b/src/dmaa/models/llms/deepseek.py
@@ -53,7 +53,7 @@
         supported_frameworks=[
             fastapi_framework
         ],
-        allow_china_region=False,
+        allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
         # modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
         require_huggingface_token=False,
@@ -83,7 +83,7 @@
         supported_frameworks=[
             fastapi_framework
         ],
-        allow_china_region=False,
+        allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
         # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
         require_huggingface_token=False,
@@ -114,7 +114,7 @@
         supported_frameworks=[
             fastapi_framework
         ],
-        allow_china_region=False,
+        allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
         # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
         require_huggingface_token=False,
@@ -145,7 +145,7 @@
         supported_frameworks=[
             fastapi_framework
         ],
-        allow_china_region=False,
+        allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
         require_huggingface_token=False,
@@ -242,7 +242,7 @@
         supported_frameworks=[
             fastapi_framework
         ],
-        allow_china_region=False,
+        allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
         # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
         require_huggingface_token=False,
diff --git a/src/pipeline/deploy/build_and_push_image.py b/src/pipeline/deploy/build_and_push_image.py
index 0df89969..cf3f24a8 100644
--- a/src/pipeline/deploy/build_and_push_image.py
+++ b/src/pipeline/deploy/build_and_push_image.py
@@ -236,17 +236,34 @@ def run(
     docker_login_region = docker_login_region or region
 
     if build_image_host:
-        if check_cn_region(region):
-            build_image_script = (
+        build_image_script_cn = (
                 f"cd {execute_dir}"
                 f' && docker build --platform linux/amd64 -f {dockerfile_name} -t "{ecr_repo_uri}" .'
             )
-        else:
-            build_image_script = (
+        build_image_script_global = (
                 f"cd {execute_dir}"
                 f" && aws {ecr_name} get-login-password --region {docker_login_region} | docker login --username AWS --password-stdin {build_image_host}"
                 f' && docker build --platform linux/amd64 -f {dockerfile_name} -t "{ecr_repo_uri}" .'
             )
+        if check_cn_region(region):
+            build_image_scripts = [build_image_script_cn]
+        else:
+            build_image_scripts = [build_image_script_global,build_image_script_cn]
+
+        is_build_success = False
+        for build_image_script in build_image_scripts:
+            logger.info(f"building image: {build_image_script}")
+            try:
+                assert os.system(build_image_script) == 0
+                is_build_success = True
+                break
+            except Exception as e:
+                logger.error(f"docker build errorr: {e}")
+
+        if not is_build_success:
+            raise RuntimeError("docker build errorr")
+
+
         # build_image_script = (
         #     f"cd {execute_dir}"
         #     f" && aws {ecr_name} get-login-password --region {docker_login_region} | docker login --username AWS --password-stdin {build_image_host}"
@@ -258,8 +275,8 @@ def run(
             f' && docker build --platform linux/amd64 -f {dockerfile_name} -t "{ecr_repo_uri}" .'
         )
 
-    logger.info(f"building image: {build_image_script}")
-    assert os.system(build_image_script) == 0
+        logger.info(f"building image: {build_image_script}")
+        assert os.system(build_image_script) == 0
 
     # push image
     # It should not push the image to ecr when service_type is `local`

From 6435664ed09a7e57f74de18364e7c3c1f16c51db Mon Sep 17 00:00:00 2001
From: zhouxss <zhouxss@amazon.com>
Date: Wed, 19 Feb 2025 08:45:26 +0000
Subject: [PATCH 2/6] fix bugs in download_s5cmd

---
 src/dmaa/constants.py    | 2 ++
 src/dmaa/sdk/deploy.py   | 3 ++-
 src/pipeline/pipeline.py | 4 ++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/dmaa/constants.py b/src/dmaa/constants.py
index 31fc3319..35208dd8 100644
--- a/src/dmaa/constants.py
+++ b/src/dmaa/constants.py
@@ -21,4 +21,6 @@
 DMAA_DEFAULT_PROFILE_PARH = "~/.dmaa_default_profile"
 
 MODEL_TAG_PATTERN = r'^[a-z0-9]([a-z0-9-_]{0,61}[a-z0-9])?$'
+
+LOCAL_REGION = "local"
 # DMAA_USE_NO_PROFILE_CHOICE = "Don't set"
diff --git a/src/dmaa/sdk/deploy.py b/src/dmaa/sdk/deploy.py
index 2c446378..a38bd0a2 100644
--- a/src/dmaa/sdk/deploy.py
+++ b/src/dmaa/sdk/deploy.py
@@ -11,6 +11,7 @@
     ENV_STACK_NAME,
     MODEL_DEFAULT_TAG,
     VERSION,
+    LOCAL_REGION
 )
 from dmaa.models import Model
 from dmaa.models.utils.constants import FrameworkType, ServiceType,InstanceType
@@ -311,7 +312,7 @@ def deploy_local(
         f" --service_type {service_type}"
         f" --backend_type {engine_type}"
         f" --framework_type {framework_type}"
-        f" --region 'local'"
+        f" --region '{LOCAL_REGION}'"
         f" --extra_params '{extra_params}'"
     )
     logger.info(f"pipeline cmd: {pipeline_cmd}")
diff --git a/src/pipeline/pipeline.py b/src/pipeline/pipeline.py
index 0735071e..9c227cb6 100644
--- a/src/pipeline/pipeline.py
+++ b/src/pipeline/pipeline.py
@@ -7,7 +7,7 @@
 import logging
 from concurrent.futures import as_completed,ProcessPoolExecutor
 from dmaa.models import Model
-from dmaa.constants import MODEL_DEFAULT_TAG
+from dmaa.constants import MODEL_DEFAULT_TAG,LOCAL_REGION
 from dmaa.models.utils.constants import FrameworkType,ServiceType,InstanceType
 from utils.common import str2bool
 from dmaa.utils.aws_service_utils import check_cn_region
@@ -217,7 +217,7 @@ def download_s5cmd():
     t0 = time.time()
     start_time = time.time()
     args = parse_args()
-    if not check_cn_region(args.region):
+    if not (check_cn_region(args.region) or args.region == LOCAL_REGION):
         download_s5cmd()
     extra_params = args.extra_params
     for k,v in extra_params.items():

From 19cef26e4db8aa00448aa55c9440b7aee4c49bad Mon Sep 17 00:00:00 2001
From: zhouxss <zhouxss@amazon.com>
Date: Wed, 19 Feb 2025 09:23:16 +0000
Subject: [PATCH 3/6] add disable_hf_transfer params to Model

---
 src/dmaa/models/model.py             | 2 ++
 src/pipeline/deploy/prepare_model.py | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/dmaa/models/model.py b/src/dmaa/models/model.py
index 4fcdb849..32987662 100644
--- a/src/dmaa/models/model.py
+++ b/src/dmaa/models/model.py
@@ -169,6 +169,8 @@ class Model(ModelBase,Generic[T]):
     # allow_china_region_ecs: bool = False
     huggingface_model_id: str = ""
     huggingface_endpoints: List[str] =  ["https://huggingface.co","https://hf-mirror.com"]
+    disable_hf_transfer:bool = False
+
     huggingface_model_download_kwargs: dict = Field(default_factory=dict)
     ollama_model_id:Union[str,None] = None
     require_huggingface_token: bool = False
diff --git a/src/pipeline/deploy/prepare_model.py b/src/pipeline/deploy/prepare_model.py
index 01a1e919..df1f9ee4 100644
--- a/src/pipeline/deploy/prepare_model.py
+++ b/src/pipeline/deploy/prepare_model.py
@@ -28,7 +28,8 @@ def enable_hf_transfer():
 
 
 def download_huggingface_model(model:Model,model_dir=None):
-    enable_hf_transfer()
+    if not model.disable_hf_transfer:
+        enable_hf_transfer()
     huggingface_model_id = model.huggingface_model_id
     service_type = model.executable_config.current_service.service_type
     model_id = model.model_id

From 7f79626af934f1b9e238cf5cf0fbb2e3b74d85bb Mon Sep 17 00:00:00 2001
From: zhouxss <zhouxss@amazon.com>
Date: Thu, 20 Feb 2025 05:31:20 +0000
Subject: [PATCH 4/6] add model_files_download_source

---
 src/emd/models/llms/deepseek.py      | 10 +++++-----
 src/emd/models/model.py              |  2 ++
 src/emd/models/utils/constants.py    | 11 ++++-------
 src/pipeline/deploy/prepare_model.py | 26 +++++++++++++++++---------
 4 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/src/emd/models/llms/deepseek.py b/src/emd/models/llms/deepseek.py
index d76edc1e..dc202151 100644
--- a/src/emd/models/llms/deepseek.py
+++ b/src/emd/models/llms/deepseek.py
@@ -55,7 +55,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
-        # modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -85,7 +85,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -116,7 +116,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -147,7 +147,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
@@ -244,7 +244,7 @@
         ],
         allow_china_region=True,
         huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+        modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
         require_huggingface_token=False,
         application_scenario="Agent, tool use, translation, summary",
         description="The latest series of DeepSeek LLMs for reasoning",
diff --git a/src/emd/models/model.py b/src/emd/models/model.py
index 43fde8c7..ac29d05a 100644
--- a/src/emd/models/model.py
+++ b/src/emd/models/model.py
@@ -10,6 +10,7 @@
     FrameworkType,
     ModelType,
     ModelSeriesType,
+    ModelFilesDownloadSource
     # ModelPrepareMethod
 )
 import boto3
@@ -183,6 +184,7 @@ class Model(ModelBase,Generic[T]):
     # download model files directly from s3
     model_files_s3_path: Union[str,None] = None
     model_files_local_path: Union[str,None] = None
+    model_files_download_source: ModelFilesDownloadSource = ModelFilesDownloadSource.AUTO
     model_series: ModelSeries
     executable_config: Union[ExecutableConfig,None] = None
 
diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py
index 8427f7c7..5c50f522 100644
--- a/src/emd/models/utils/constants.py
+++ b/src/emd/models/utils/constants.py
@@ -131,13 +131,10 @@ class ServiceCode(ConstantBase):
     SAGEMAKER = "sagemaker"
 
 
-# class ModelPrepareMethod(ConstantBase):
-#     UPLOAD_TO_S3 = "upload to s3"
-#     DOANLOWD_FROM_S3 = "download from s3"
-#     IGNORE = "ignore"
-
-
-
+class ModelFilesDownloadSource(ConstantBase):
+    HUGGINGFACE = "huggingface"
+    MODELSCOPE= "modelscope"
+    AUTO = "auto"
 
 class ServiceQuotaCode(ConstantBase):
     G5dXLARGE_ENDPOINT = "L-1928E07B"
diff --git a/src/pipeline/deploy/prepare_model.py b/src/pipeline/deploy/prepare_model.py
index f93f99b7..e6333943 100644
--- a/src/pipeline/deploy/prepare_model.py
+++ b/src/pipeline/deploy/prepare_model.py
@@ -5,7 +5,7 @@
 from huggingface_hub import snapshot_download as hf_snapshot_download
 from modelscope import snapshot_download as ms_snapshot_download
 from emd.models import Model
-from emd.models.utils.constants import ServiceType,EngineType
+from emd.models.utils.constants import ServiceType,EngineType,ModelFilesDownloadSource
 from emd.utils.aws_service_utils import check_cn_region
 from emd.utils.logger_utils import get_logger
 from utils.common import upload_dir_to_s3_by_s5cmd,download_dir_from_s3_by_s5cmd
@@ -110,15 +110,23 @@ def download_model_files(model:Model,model_dir=None):
     if engine_type == EngineType.COMFYUI:
         download_comfyui_model(model,model_dir=model_dir)
     else:
-        if check_cn_region(region):
-            try:
-                download_modelscope_model(model,model_dir=model_dir)
-            except Exception as e:
-                logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}")
-                logger.info("download from huggingface...")
-                download_huggingface_model(model, model_dir=model_dir)
+        if model.model_files_download_source == ModelFilesDownloadSource.AUTO:
+            if check_cn_region(region):
+                try:
+                    download_modelscope_model(model,model_dir=model_dir)
+                except Exception as e:
+                    logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}")
+                    logger.info("download from huggingface...")
+                    download_huggingface_model(model, model_dir=model_dir)
+            else:
+                download_huggingface_model(model,model_dir=model_dir)
         else:
-            download_huggingface_model(model,model_dir=model_dir)
+            if model.model_files_download_source == ModelFilesDownloadSource.HUGGINGFACE:
+                download_huggingface_model(model, model_dir=model_dir)
+            elif model.model_files_download_source == ModelFilesDownloadSource.MODELSCOPE:
+                download_modelscope_model(model, model_dir=model_dir)
+            else:
+                raise ValueError(f"Invalid model_files_download_source: {model.model_files_download_source}")
 
 
 def run(model:Model):#, model_s3_bucket, backend_type, service_type, region,args):

From e8e7df78aa9e85ca242332f10e6491245e996b13 Mon Sep 17 00:00:00 2001
From: zhouxss <zhouxss@amazon.com>
Date: Thu, 20 Feb 2025 05:42:09 +0000
Subject: [PATCH 5/6] fix multiple engines selection

---
 src/emd/models/llms/qwen.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py
index 2236c4ff..630f6977 100644
--- a/src/emd/models/llms/qwen.py
+++ b/src/emd/models/llms/qwen.py
@@ -78,8 +78,7 @@
         model_id = "Qwen2.5-72B-Instruct-AWQ",
         supported_engines=[
             vllm_qwen2d5_engine064,
-            tgi_qwen2d5_72b_engine064,
-            tgi_qwen2d5_72b_on_inf2
+            tgi_qwen2d5_72b_engine064
         ],
         supported_instances=[
             g5d12xlarge_instance,

From 3d44e5280aa5d2e780f1b7e366b5425d767d30ff Mon Sep 17 00:00:00 2001
From: zhouxss <zhouxss@amazon.com>
Date: Thu, 20 Feb 2025 07:14:03 +0000
Subject: [PATCH 6/6] modify qwen model suuporting

---
 src/emd/models/llms/qwen.py | 112 ++++++++++++++++++------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py
index 630f6977..153bf738 100644
--- a/src/emd/models/llms/qwen.py
+++ b/src/emd/models/llms/qwen.py
@@ -107,35 +107,35 @@
     )
 )
 
-Model.register(
-    dict(
-        model_id = "Qwen2.5-72B-Instruct-AWQ-inf2",
-        supported_engines=[
-            tgi_qwen2d5_72b_on_inf2
-        ],
-        supported_instances=[
-            inf2d24xlarge_instance,
-            local_instance
-        ],
-        supported_services=[
-            sagemaker_service,
-            sagemaker_async_service,
-            ecs_service,
-            local_service
-        ],
-        supported_frameworks=[
-            fastapi_framework
-        ],
-        allow_china_region=True,
-        huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
-        modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
-        require_huggingface_token=False,
-        application_scenario="Agent, tool use, translation, summary",
-        description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
-        model_type=ModelType.LLM,
-        model_series=QWEN2D5_SERIES
-    )
-)
+# Model.register(
+#     dict(
+#         model_id = "Qwen2.5-72B-Instruct-AWQ-inf2",
+#         supported_engines=[
+#             tgi_qwen2d5_72b_on_inf2
+#         ],
+#         supported_instances=[
+#             inf2d24xlarge_instance,
+#             local_instance
+#         ],
+#         supported_services=[
+#             sagemaker_service,
+#             sagemaker_async_service,
+#             ecs_service,
+#             local_service
+#         ],
+#         supported_frameworks=[
+#             fastapi_framework
+#         ],
+#         allow_china_region=True,
+#         huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
+#         modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ",
+#         require_huggingface_token=False,
+#         application_scenario="Agent, tool use, translation, summary",
+#         description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
+#         model_type=ModelType.LLM,
+#         model_series=QWEN2D5_SERIES
+#     )
+# )
 
 
 Model.register(
@@ -229,33 +229,33 @@
     )
 )
 
-Model.register(
-    dict(
-        model_id = "Qwen2.5-32B-Instruct-inf2",
-        supported_engines=[tgi_qwen2d5_72b_on_inf2],
-        supported_instances=[
-            inf2d24xlarge_instance,
-            local_instance
-        ],
-        supported_services=[
-            sagemaker_service,
-            sagemaker_async_service,
-            ecs_service,
-            local_service
-        ],
-        supported_frameworks=[
-            fastapi_framework
-        ],
-        allow_china_region=True,
-        huggingface_model_id="Qwen/Qwen2.5-32B-Instruct",
-        modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
-        require_huggingface_token=False,
-        application_scenario="Agent, tool use, translation, summary",
-        description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
-        model_type=ModelType.LLM,
-        model_series=QWEN2D5_SERIES
-    )
-)
+# Model.register(
+#     dict(
+#         model_id = "Qwen2.5-32B-Instruct-inf2",
+#         supported_engines=[tgi_qwen2d5_72b_on_inf2],
+#         supported_instances=[
+#             inf2d24xlarge_instance,
+#             local_instance
+#         ],
+#         supported_services=[
+#             sagemaker_service,
+#             sagemaker_async_service,
+#             ecs_service,
+#             local_service
+#         ],
+#         supported_frameworks=[
+#             fastapi_framework
+#         ],
+#         allow_china_region=True,
+#         huggingface_model_id="Qwen/Qwen2.5-32B-Instruct",
+#         modelscope_model_id="Qwen/Qwen2.5-32B-Instruct",
+#         require_huggingface_token=False,
+#         application_scenario="Agent, tool use, translation, summary",
+#         description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
+#         model_type=ModelType.LLM,
+#         model_series=QWEN2D5_SERIES
+#     )
+# )
 
 Model.register(
     dict(