From afe6676112398ecb9372998b5f45f1447e536980 Mon Sep 17 00:00:00 2001 From: zhouxss Date: Wed, 19 Feb 2025 08:27:19 +0000 Subject: [PATCH 1/6] fix cn region local deploy bug --- src/dmaa/models/llms/deepseek.py | 10 +++---- src/pipeline/deploy/build_and_push_image.py | 29 ++++++++++++++++----- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/dmaa/models/llms/deepseek.py b/src/dmaa/models/llms/deepseek.py index 784c33a4..047c8fb4 100644 --- a/src/dmaa/models/llms/deepseek.py +++ b/src/dmaa/models/llms/deepseek.py @@ -53,7 +53,7 @@ supported_frameworks=[ fastapi_framework ], - allow_china_region=False, + allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", # modelscope_model_id="Qwen/Qwen2.5-32B-Instruct", require_huggingface_token=False, @@ -83,7 +83,7 @@ supported_frameworks=[ fastapi_framework ], - allow_china_region=False, + allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", require_huggingface_token=False, @@ -114,7 +114,7 @@ supported_frameworks=[ fastapi_framework ], - allow_china_region=False, + allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", require_huggingface_token=False, @@ -145,7 +145,7 @@ supported_frameworks=[ fastapi_framework ], - allow_china_region=False, + allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", require_huggingface_token=False, @@ -242,7 +242,7 @@ supported_frameworks=[ fastapi_framework ], - allow_china_region=False, + allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B", # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", require_huggingface_token=False, diff --git a/src/pipeline/deploy/build_and_push_image.py b/src/pipeline/deploy/build_and_push_image.py index 0df89969..cf3f24a8 100644 --- a/src/pipeline/deploy/build_and_push_image.py +++ b/src/pipeline/deploy/build_and_push_image.py @@ -236,17 +236,34 @@ def run( docker_login_region = docker_login_region or region if build_image_host: - if check_cn_region(region): - build_image_script = ( + build_image_script_cn = ( f"cd {execute_dir}" f' && docker build --platform linux/amd64 -f {dockerfile_name} -t "{ecr_repo_uri}" .' ) - else: - build_image_script = ( + build_image_script_global = ( f"cd {execute_dir}" f" && aws {ecr_name} get-login-password --region {docker_login_region} | docker login --username AWS --password-stdin {build_image_host}" f' && docker build --platform linux/amd64 -f {dockerfile_name} -t "{ecr_repo_uri}" .' ) + if check_cn_region(region): + build_image_scripts = [build_image_script_cn] + else: + build_image_scripts = [build_image_script_global,build_image_script_cn] + + is_build_success = False + for build_image_script in build_image_scripts: + logger.info(f"building image: {build_image_script}") + try: + assert os.system(build_image_script) == 0 + is_build_success = True + break + except Exception as e: + logger.error(f"docker build errorr: {e}") + + if not is_build_success: + raise RuntimeError("docker build errorr") + + # build_image_script = ( # f"cd {execute_dir}" # f" && aws {ecr_name} get-login-password --region {docker_login_region} | docker login --username AWS --password-stdin {build_image_host}" @@ -258,8 +275,8 @@ def run( f' && docker build --platform linux/amd64 -f {dockerfile_name} -t "{ecr_repo_uri}" .' ) - logger.info(f"building image: {build_image_script}") - assert os.system(build_image_script) == 0 + logger.info(f"building image: {build_image_script}") + assert os.system(build_image_script) == 0 # push image # It should not push the image to ecr when service_type is `local` From 6435664ed09a7e57f74de18364e7c3c1f16c51db Mon Sep 17 00:00:00 2001 From: zhouxss Date: Wed, 19 Feb 2025 08:45:26 +0000 Subject: [PATCH 2/6] fix bugs in download_s5cmd --- src/dmaa/constants.py | 2 ++ src/dmaa/sdk/deploy.py | 3 ++- src/pipeline/pipeline.py | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/dmaa/constants.py b/src/dmaa/constants.py index 31fc3319..35208dd8 100644 --- a/src/dmaa/constants.py +++ b/src/dmaa/constants.py @@ -21,4 +21,6 @@ DMAA_DEFAULT_PROFILE_PARH = "~/.dmaa_default_profile" MODEL_TAG_PATTERN = r'^[a-z0-9]([a-z0-9-_]{0,61}[a-z0-9])?$' + +LOCAL_REGION = "local" # DMAA_USE_NO_PROFILE_CHOICE = "Don't set" diff --git a/src/dmaa/sdk/deploy.py b/src/dmaa/sdk/deploy.py index 2c446378..a38bd0a2 100644 --- a/src/dmaa/sdk/deploy.py +++ b/src/dmaa/sdk/deploy.py @@ -11,6 +11,7 @@ ENV_STACK_NAME, MODEL_DEFAULT_TAG, VERSION, + LOCAL_REGION ) from dmaa.models import Model from dmaa.models.utils.constants import FrameworkType, ServiceType,InstanceType @@ -311,7 +312,7 @@ def deploy_local( f" --service_type {service_type}" f" --backend_type {engine_type}" f" --framework_type {framework_type}" - f" --region 'local'" + f" --region '{LOCAL_REGION}'" f" --extra_params '{extra_params}'" ) logger.info(f"pipeline cmd: {pipeline_cmd}") diff --git a/src/pipeline/pipeline.py b/src/pipeline/pipeline.py index 0735071e..9c227cb6 100644 --- a/src/pipeline/pipeline.py +++ b/src/pipeline/pipeline.py @@ -7,7 +7,7 @@ import logging from concurrent.futures import as_completed,ProcessPoolExecutor from dmaa.models import Model -from dmaa.constants import MODEL_DEFAULT_TAG +from dmaa.constants import MODEL_DEFAULT_TAG,LOCAL_REGION from dmaa.models.utils.constants import FrameworkType,ServiceType,InstanceType from utils.common import str2bool from dmaa.utils.aws_service_utils import check_cn_region @@ -217,7 +217,7 @@ def download_s5cmd(): t0 = time.time() start_time = time.time() args = parse_args() - if not check_cn_region(args.region): + if not (check_cn_region(args.region) or args.region == LOCAL_REGION): download_s5cmd() extra_params = args.extra_params for k,v in extra_params.items(): From 19cef26e4db8aa00448aa55c9440b7aee4c49bad Mon Sep 17 00:00:00 2001 From: zhouxss Date: Wed, 19 Feb 2025 09:23:16 +0000 Subject: [PATCH 3/6] add disable_hf_transfer params to Model --- src/dmaa/models/model.py | 2 ++ src/pipeline/deploy/prepare_model.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/dmaa/models/model.py b/src/dmaa/models/model.py index 4fcdb849..32987662 100644 --- a/src/dmaa/models/model.py +++ b/src/dmaa/models/model.py @@ -169,6 +169,8 @@ class Model(ModelBase,Generic[T]): # allow_china_region_ecs: bool = False huggingface_model_id: str = "" huggingface_endpoints: List[str] = ["https://huggingface.co","https://hf-mirror.com"] + disable_hf_transfer:bool = False + huggingface_model_download_kwargs: dict = Field(default_factory=dict) ollama_model_id:Union[str,None] = None require_huggingface_token: bool = False diff --git a/src/pipeline/deploy/prepare_model.py b/src/pipeline/deploy/prepare_model.py index 01a1e919..df1f9ee4 100644 --- a/src/pipeline/deploy/prepare_model.py +++ b/src/pipeline/deploy/prepare_model.py @@ -28,7 +28,8 @@ def enable_hf_transfer(): def download_huggingface_model(model:Model,model_dir=None): - enable_hf_transfer() + if not model.disable_hf_transfer: + enable_hf_transfer() huggingface_model_id = model.huggingface_model_id service_type = model.executable_config.current_service.service_type model_id = model.model_id From 7f79626af934f1b9e238cf5cf0fbb2e3b74d85bb Mon Sep 17 00:00:00 2001 From: zhouxss Date: Thu, 20 Feb 2025 05:31:20 +0000 Subject: [PATCH 4/6] add model_files_download_source --- src/emd/models/llms/deepseek.py | 10 +++++----- src/emd/models/model.py | 2 ++ src/emd/models/utils/constants.py | 11 ++++------- src/pipeline/deploy/prepare_model.py | 26 +++++++++++++++++--------- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/emd/models/llms/deepseek.py b/src/emd/models/llms/deepseek.py index d76edc1e..dc202151 100644 --- a/src/emd/models/llms/deepseek.py +++ b/src/emd/models/llms/deepseek.py @@ -55,7 +55,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - # modelscope_model_id="Qwen/Qwen2.5-32B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", @@ -85,7 +85,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", @@ -116,7 +116,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", @@ -147,7 +147,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", @@ -244,7 +244,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", diff --git a/src/emd/models/model.py b/src/emd/models/model.py index 43fde8c7..ac29d05a 100644 --- a/src/emd/models/model.py +++ b/src/emd/models/model.py @@ -10,6 +10,7 @@ FrameworkType, ModelType, ModelSeriesType, + ModelFilesDownloadSource # ModelPrepareMethod ) import boto3 @@ -183,6 +184,7 @@ class Model(ModelBase,Generic[T]): # download model files directly from s3 model_files_s3_path: Union[str,None] = None model_files_local_path: Union[str,None] = None + model_files_download_source: ModelFilesDownloadSource = ModelFilesDownloadSource.AUTO model_series: ModelSeries executable_config: Union[ExecutableConfig,None] = None diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py index 8427f7c7..5c50f522 100644 --- a/src/emd/models/utils/constants.py +++ b/src/emd/models/utils/constants.py @@ -131,13 +131,10 @@ class ServiceCode(ConstantBase): SAGEMAKER = "sagemaker" -# class ModelPrepareMethod(ConstantBase): -# UPLOAD_TO_S3 = "upload to s3" -# DOANLOWD_FROM_S3 = "download from s3" -# IGNORE = "ignore" - - - +class ModelFilesDownloadSource(ConstantBase): + HUGGINGFACE = "huggingface" + MODELSCOPE= "modelscope" + AUTO = "auto" class ServiceQuotaCode(ConstantBase): G5dXLARGE_ENDPOINT = "L-1928E07B" diff --git a/src/pipeline/deploy/prepare_model.py b/src/pipeline/deploy/prepare_model.py index f93f99b7..e6333943 100644 --- a/src/pipeline/deploy/prepare_model.py +++ b/src/pipeline/deploy/prepare_model.py @@ -5,7 +5,7 @@ from huggingface_hub import snapshot_download as hf_snapshot_download from modelscope import snapshot_download as ms_snapshot_download from emd.models import Model -from emd.models.utils.constants import ServiceType,EngineType +from emd.models.utils.constants import ServiceType,EngineType,ModelFilesDownloadSource from emd.utils.aws_service_utils import check_cn_region from emd.utils.logger_utils import get_logger from utils.common import upload_dir_to_s3_by_s5cmd,download_dir_from_s3_by_s5cmd @@ -110,15 +110,23 @@ def download_model_files(model:Model,model_dir=None): if engine_type == EngineType.COMFYUI: download_comfyui_model(model,model_dir=model_dir) else: - if check_cn_region(region): - try: - download_modelscope_model(model,model_dir=model_dir) - except Exception as e: - logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}") - logger.info("download from huggingface...") - download_huggingface_model(model, model_dir=model_dir) + if model.model_files_download_source == ModelFilesDownloadSource.AUTO: + if check_cn_region(region): + try: + download_modelscope_model(model,model_dir=model_dir) + except Exception as e: + logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}") + logger.info("download from huggingface...") + download_huggingface_model(model, model_dir=model_dir) + else: + download_huggingface_model(model,model_dir=model_dir) else: - download_huggingface_model(model,model_dir=model_dir) + if model.model_files_download_source == ModelFilesDownloadSource.HUGGINGFACE: + download_huggingface_model(model, model_dir=model_dir) + elif model.model_files_download_source == ModelFilesDownloadSource.MODELSCOPE: + download_modelscope_model(model, model_dir=model_dir) + else: + raise ValueError(f"Invalid model_files_download_source: {model.model_files_download_source}") def run(model:Model):#, model_s3_bucket, backend_type, service_type, region,args): From e8e7df78aa9e85ca242332f10e6491245e996b13 Mon Sep 17 00:00:00 2001 From: zhouxss Date: Thu, 20 Feb 2025 05:42:09 +0000 Subject: [PATCH 5/6] fix multiple engines selection --- src/emd/models/llms/qwen.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py index 2236c4ff..630f6977 100644 --- a/src/emd/models/llms/qwen.py +++ b/src/emd/models/llms/qwen.py @@ -78,8 +78,7 @@ model_id = "Qwen2.5-72B-Instruct-AWQ", supported_engines=[ vllm_qwen2d5_engine064, - tgi_qwen2d5_72b_engine064, - tgi_qwen2d5_72b_on_inf2 + tgi_qwen2d5_72b_engine064 ], supported_instances=[ g5d12xlarge_instance, From 3d44e5280aa5d2e780f1b7e366b5425d767d30ff Mon Sep 17 00:00:00 2001 From: zhouxss Date: Thu, 20 Feb 2025 07:14:03 +0000 Subject: [PATCH 6/6] modify qwen model suuporting --- src/emd/models/llms/qwen.py | 112 ++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py index 630f6977..153bf738 100644 --- a/src/emd/models/llms/qwen.py +++ b/src/emd/models/llms/qwen.py @@ -107,35 +107,35 @@ ) ) -Model.register( - dict( - model_id = "Qwen2.5-72B-Instruct-AWQ-inf2", - supported_engines=[ - tgi_qwen2d5_72b_on_inf2 - ], - supported_instances=[ - inf2d24xlarge_instance, - local_instance - ], - supported_services=[ - sagemaker_service, - sagemaker_async_service, - ecs_service, - local_service - ], - supported_frameworks=[ - fastapi_framework - ], - allow_china_region=True, - huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ", - modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ", - require_huggingface_token=False, - application_scenario="Agent, tool use, translation, summary", - description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", - model_type=ModelType.LLM, - model_series=QWEN2D5_SERIES - ) -) +# Model.register( +# dict( +# model_id = "Qwen2.5-72B-Instruct-AWQ-inf2", +# supported_engines=[ +# tgi_qwen2d5_72b_on_inf2 +# ], +# supported_instances=[ +# inf2d24xlarge_instance, +# local_instance +# ], +# supported_services=[ +# sagemaker_service, +# sagemaker_async_service, +# ecs_service, +# local_service +# ], +# supported_frameworks=[ +# fastapi_framework +# ], +# allow_china_region=True, +# huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ", +# modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ", +# require_huggingface_token=False, +# application_scenario="Agent, tool use, translation, summary", +# description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", +# model_type=ModelType.LLM, +# model_series=QWEN2D5_SERIES +# ) +# ) Model.register( @@ -229,33 +229,33 @@ ) ) -Model.register( - dict( - model_id = "Qwen2.5-32B-Instruct-inf2", - supported_engines=[tgi_qwen2d5_72b_on_inf2], - supported_instances=[ - inf2d24xlarge_instance, - local_instance - ], - supported_services=[ - sagemaker_service, - sagemaker_async_service, - ecs_service, - local_service - ], - supported_frameworks=[ - fastapi_framework - ], - allow_china_region=True, - huggingface_model_id="Qwen/Qwen2.5-32B-Instruct", - modelscope_model_id="Qwen/Qwen2.5-32B-Instruct", - require_huggingface_token=False, - application_scenario="Agent, tool use, translation, summary", - description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", - model_type=ModelType.LLM, - model_series=QWEN2D5_SERIES - ) -) +# Model.register( +# dict( +# model_id = "Qwen2.5-32B-Instruct-inf2", +# supported_engines=[tgi_qwen2d5_72b_on_inf2], +# supported_instances=[ +# inf2d24xlarge_instance, +# local_instance +# ], +# supported_services=[ +# sagemaker_service, +# sagemaker_async_service, +# ecs_service, +# local_service +# ], +# supported_frameworks=[ +# fastapi_framework +# ], +# allow_china_region=True, +# huggingface_model_id="Qwen/Qwen2.5-32B-Instruct", +# modelscope_model_id="Qwen/Qwen2.5-32B-Instruct", +# require_huggingface_token=False, +# application_scenario="Agent, tool use, translation, summary", +# description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", +# model_type=ModelType.LLM, +# model_series=QWEN2D5_SERIES +# ) +# ) Model.register( dict(