From 8d1b0c2b2224614c983fb02ec0348db9e685bfe7 Mon Sep 17 00:00:00 2001 From: zhouxss Date: Tue, 25 Mar 2025 03:47:27 +0000 Subject: [PATCH 1/5] merge --- docs/en/supported_models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/supported_models.md b/docs/en/supported_models.md index e8749d1d..a1d94a8a 100644 --- a/docs/en/supported_models.md +++ b/docs/en/supported_models.md @@ -52,4 +52,4 @@ | jina-embeddings-v3 | jina | embedding | huggingface | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | | bge-reranker-v2-m3 | bge | rerank | vllm | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | | bge-reranker-large | bge | rerank | vllm | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| jina-reranker-v2-base-multilingual | jina | rerank | huggingface | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | +| jina-reranker-v2-base-multilingual | jina | rerank | huggingface | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | \ No newline at end of file From ef01e39d77fd3e9fdba4bd89943244f01729d989 Mon Sep 17 00:00:00 2001 From: zhouxss Date: Tue, 25 Mar 2025 03:52:23 +0000 Subject: [PATCH 2/5] merge --- docs/en/supported_models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/supported_models.md b/docs/en/supported_models.md index a1d94a8a..e8749d1d 100644 --- a/docs/en/supported_models.md +++ b/docs/en/supported_models.md @@ -52,4 +52,4 @@ | jina-embeddings-v3 | jina | embedding | huggingface | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | | bge-reranker-v2-m3 | bge | rerank | vllm | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | | bge-reranker-large | bge | rerank | vllm | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | -| jina-reranker-v2-base-multilingual | jina | rerank | huggingface | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | \ No newline at end of file +| jina-reranker-v2-base-multilingual | jina | rerank | huggingface | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | From 2845bc357fdca586143bf1738c432501b754a4e2 Mon Sep 17 00:00:00 2001 From: zhouxss Date: Thu, 27 Mar 2025 08:14:39 +0000 Subject: [PATCH 3/5] add Mistral-Small-3.1-24B-Instruct-2503 --- docs/en/best_deployment_practices.md | 5 +++ docs/en/supported_models.md | 1 + src/emd/models/engines.py | 11 ++++++ src/emd/models/model_series.py | 6 ++++ src/emd/models/utils/constants.py | 1 + src/emd/models/vlms/__init__.py | 1 + src/emd/models/vlms/mistral.py | 54 ++++++++++++++++++++++++++++ 7 files changed, 79 insertions(+) create mode 100644 src/emd/models/vlms/mistral.py diff --git a/docs/en/best_deployment_practices.md b/docs/en/best_deployment_practices.md index 3a66bfb8..26fae4d3 100644 --- a/docs/en/best_deployment_practices.md +++ b/docs/en/best_deployment_practices.md @@ -4,6 +4,11 @@ This document provides examples of best practices for deploying models using EMD ## Famous Models +### Mistral Small Series +``` +emd deploy --model-id Mistral-Small-3.1-24B-Instruct-2503 --instance-type g5.12xlarge --engine-type vllm --service-type sagemaker_realtime +``` + ### Gemma 3 Series ``` diff --git a/docs/en/supported_models.md b/docs/en/supported_models.md index e8749d1d..0ecfeca7 100644 --- a/docs/en/supported_models.md +++ b/docs/en/supported_models.md @@ -44,6 +44,7 @@ | gemma-3-4b-it | gemma3 | vlm | vllm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ❎ | | gemma-3-12b-it | gemma3 | vlm | vllm | g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,sagemaker_async,ecs | ❎ | | gemma-3-27b-it | gemma3 | vlm | vllm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ❎ | +| Mistral-Small-3.1-24B-Instruct-2503 | mistral | vlm | vllm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs | ❎ | | txt2video-LTX | comfyui | video | comfyui | g5.4xlarge,g5.8xlarge,g6e.2xlarge | sagemaker_async | ❎ | | whisper | whisper | whisper | huggingface | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_async | ❎ | | bce-embedding-base_v1 | bce | embedding | vllm | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs | ✅ | diff --git a/src/emd/models/engines.py b/src/emd/models/engines.py index f251071a..e748ab83 100644 --- a/src/emd/models/engines.py +++ b/src/emd/models/engines.py @@ -77,6 +77,17 @@ class KtransformersEngine(OpenAICompitableEngine): } ) + +vllm_mistral_small_engine082 = VllmEngine( + **{ + **vllm_engine064.model_dump(), + "engine_dockerfile_config": {"VERSION":"v0.8.2"}, + "dockerfile_name":"Dockerfile", + "default_cli_args": " --tokenizer-mode mistral --config-format mistral --load-format mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384", + "environment_variables": "" + } +) + vllm_deepseek_r1_distill_qwen_engine071 = VllmEngine(**{ **vllm_engine064.model_dump(), "engine_dockerfile_config": {"VERSION":"v0.7.1"}, diff --git a/src/emd/models/model_series.py b/src/emd/models/model_series.py index 09062998..ceae13ad 100644 --- a/src/emd/models/model_series.py +++ b/src/emd/models/model_series.py @@ -97,6 +97,12 @@ reference_link="https://blog.google/technology/developers/gemma-3/" ) +MISTRAL_SERIES = ModelSeries( + model_series_name=ModelSeriesType.MISTRAL, + description="LLMs and VLMs provided by MISTRAL AI.", + reference_link="https://huggingface.co/mistralai" +) + DEEPSEEK_REASONING_MODEL = ModelSeries( model_series_name=ModelSeriesType.DEEPSEEK_REASONING_MODEL, description="DeepSeek-R1-Zero and DeepSeek-R1 are innovative reasoning models, with the former showcasing strong performance through reinforcement learning alone, while the latter enhances reasoning capabilities by incorporating cold-start data, achieving results comparable to OpenAI-o1 and setting new benchmarks with its distilled versions.", diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py index b9c21b49..d78414e6 100644 --- a/src/emd/models/utils/constants.py +++ b/src/emd/models/utils/constants.py @@ -214,6 +214,7 @@ def get_service_quota_code(cls, instance_type: str): class ModelSeriesType(ConstantBase): GEMMA3 = "gemma3" + MISTRAL = "mistral" QWEN2D5 = "qwen2.5" GLM4 = "glm4" INTERLM2d5 = "internlm2.5" diff --git a/src/emd/models/vlms/__init__.py b/src/emd/models/vlms/__init__.py index bf74f45c..4440a29e 100644 --- a/src/emd/models/vlms/__init__.py +++ b/src/emd/models/vlms/__init__.py @@ -1,3 +1,4 @@ from . import qwen from . import internvl from . import gemma3 +from . import mistral diff --git a/src/emd/models/vlms/mistral.py b/src/emd/models/vlms/mistral.py new file mode 100644 index 00000000..fc597105 --- /dev/null +++ b/src/emd/models/vlms/mistral.py @@ -0,0 +1,54 @@ +from ..engines import vllm_mistral_small_engine082 +from .. import Model +from ..frameworks import fastapi_framework +from ..services import ( + sagemaker_service, + sagemaker_async_service, + ecs_service, + local_service +) +from emd.models.utils.constants import ModelType +from ..model_series import MISTRAL_SERIES +from ..instances import ( + g5d2xlarge_instance, + g5d4xlarge_instance, + g5d8xlarge_instance, + g5d12xlarge_instance, + g5d16xlarge_instance, + g5d24xlarge_instance, + g5d48xlarge_instance, + g6e2xlarge_instance, + local_instance +) +from ..utils.constants import ModelFilesDownloadSource + + +Model.register( + dict( + model_id = "Mistral-Small-3.1-24B-Instruct-2503", + supported_engines=[vllm_mistral_small_engine082], + supported_instances=[ + g5d12xlarge_instance, + g5d24xlarge_instance, + g5d48xlarge_instance, + local_instance + ], + supported_services=[ + sagemaker_service, + sagemaker_async_service, + ecs_service, + local_service + ], + supported_frameworks=[ + fastapi_framework + ], + huggingface_model_id="unsloth/Mistral-Small-3.1-24B-Instruct-2503", + # require_huggingface_token=False, + modelscope_model_id="mistralai/Mistral-Small-3.1-24B-Instruct-2503", + # model_files_download_source=ModelFilesDownloadSource.MODELSCOPE, + application_scenario="vision llms for image understanding", + description="The latest series of mistral small", + model_type=ModelType.VLM, + model_series=MISTRAL_SERIES, + ) +) From d72b12b22282e6e67dc069f99fdd3ee59e25bcbd Mon Sep 17 00:00:00 2001 From: zhouxss Date: Tue, 1 Apr 2025 08:37:49 +0000 Subject: [PATCH 4/5] modify qwq-32b deploy --- src/emd/models/engines.py | 7 +++++++ src/emd/models/llms/qwen.py | 5 +++-- src/emd/models/services.py | 1 + src/pipeline/backend/backend.py | 7 ++++--- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/emd/models/engines.py b/src/emd/models/engines.py index e748ab83..0dc243f7 100644 --- a/src/emd/models/engines.py +++ b/src/emd/models/engines.py @@ -135,6 +135,13 @@ class KtransformersEngine(OpenAICompitableEngine): "default_cli_args": " --chat-template emd/models/chat_templates/qwq_32b_add_prefill_chat_template.jinja --max_model_len 16000 --max_num_seq 10 --disable-log-stats --enable-auto-tool-choice --tool-call-parser hermes" }) +vllm_qwq_engine082 = VllmEngine(**{ + **vllm_qwen25vl72b_engine073.model_dump(), + "engine_dockerfile_config": {"VERSION":"v0.8.2"}, + "environment_variables": "export VLLM_ATTENTION_BACKEND=FLASHINFER && export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True", + "default_cli_args": " --chat-template emd/models/chat_templates/qwq_32b_add_prefill_chat_template.jinja --max_model_len 16000 --max_num_seq 10 --disable-log-stats --enable-auto-tool-choice --tool-call-parser hermes --enable-reasoning --reasoning-parser deepseek_r1" +}) + vllm_internvl2d5_76b_engine064 = VllmEngine(**{ **vllm_engine064.model_dump(), diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py index fa48ce79..35a2cc1f 100644 --- a/src/emd/models/llms/qwen.py +++ b/src/emd/models/llms/qwen.py @@ -7,7 +7,8 @@ tgi_qwen2d5_on_inf2, tgi_qwen2d5_72b_on_inf2, vllm_qwen2d5_72b_engine064, - vllm_qwq_engine073 + vllm_qwq_engine073, + vllm_qwq_engine082 ) from ..services import ( sagemaker_service, @@ -471,7 +472,7 @@ Model.register( dict( model_id = "QwQ-32B", - supported_engines=[vllm_qwq_engine073], + supported_engines=[vllm_qwq_engine082], supported_instances=[ g5d12xlarge_instance, g5d24xlarge_instance, diff --git a/src/emd/models/services.py b/src/emd/models/services.py index 05737773..859a12c9 100644 --- a/src/emd/models/services.py +++ b/src/emd/models/services.py @@ -91,6 +91,7 @@ "ServiceType":"service_type", "EngineType":"engine_type", "Region": "region", + "DesiredCapacity": "desired_capacity", "ContainerCpu": "container_cpu", "ContainerMemory": "container_memory", "ContainerGpu":"instance_gpu_num" diff --git a/src/pipeline/backend/backend.py b/src/pipeline/backend/backend.py index fc46d39d..4996c97a 100644 --- a/src/pipeline/backend/backend.py +++ b/src/pipeline/backend/backend.py @@ -134,9 +134,10 @@ def start_server(self, server_start_command): logger.info(f"Starting {self.engine_type} server with command: {server_start_command}") t = threading.Thread(target=os.system,args=(server_start_command,),daemon=True) t.start() - t2 = threading.Thread(target=self.check_model_serve_ready,args=(t, "127.0.0.1", self.server_port),daemon=True) - t2.start() - t2.join() + self.check_model_serve_ready(t, "127.0.0.1", self.server_port) + logger.info(f"Server started successfully.") + # t2.start() + # t2.join() return From 48b97c44c487ca29336f94ef69eb0d8741d7a0fd Mon Sep 17 00:00:00 2001 From: zhouxss Date: Mon, 7 Apr 2025 08:43:51 +0000 Subject: [PATCH 5/5] add txgemma model; --- docs/en/best_deployment_practices.md | 42 +++++++++++++ src/emd/constants.py | 6 ++ src/emd/models/engines.py | 12 ++++ src/emd/models/llms/__init__.py | 3 +- src/emd/models/llms/deepseek.py | 54 ++++++++--------- src/emd/models/llms/txgemma.py | 91 ++++++++++++++++++++++++++++ src/emd/models/model_series.py | 7 +++ src/emd/models/services.py | 2 +- src/emd/models/utils/constants.py | 1 + src/emd/models/vlms/gemma3.py | 4 ++ src/emd/sdk/deploy.py | 13 +++- src/emd/utils/file_utils.py | 6 ++ 12 files changed, 209 insertions(+), 32 deletions(-) create mode 100644 src/emd/models/llms/txgemma.py create mode 100644 src/emd/utils/file_utils.py diff --git a/docs/en/best_deployment_practices.md b/docs/en/best_deployment_practices.md index 26fae4d3..d4747e57 100644 --- a/docs/en/best_deployment_practices.md +++ b/docs/en/best_deployment_practices.md @@ -64,6 +64,48 @@ emd deploy --model-id Qwen2.5-14B-Instruct-AWQ --instance-type g4dn.2xlarge --en }' ``` +### Example: Customize model download methods +- You can load models from different locations by addingappropriate values in the extra-params parameter +1. Load model from S3 +```json +{ + "model_params":{ + "model_files_s3_path":"" + } +} +``` +2. Load model from local path (only applicable for local deployment) +```json +{ + "model_params": { "model_files_local_path":"" + } +} +``` +3. Skip downloading and uploading model files in codebuild, which will significantly reducedeployment time +```json +{ + "model_params": { + "need_prepare_model":false + } +} +``` +4. Specify the download source for model files +```json +{ + "model_params":{ + "model_files_download_source":"huggingface|modelscope|auto(default)" + } +} +``` +5. Specify the model ID on huggingface or modelscope +```json +{ + "model_params": { + "huggingface_model_id":"model id on huggingface","modelscope_model_id":"model id on modelscope" + } +} +``` + ## Environmental variables - `LOCAL_DEPLOY_PORT: ` Local deployment port, default: `8080` diff --git a/src/emd/constants.py b/src/emd/constants.py index a3f949b1..53dd60c8 100644 --- a/src/emd/constants.py +++ b/src/emd/constants.py @@ -1,4 +1,5 @@ from .revision import VERSION, convert_version_name_to_stack_name +import os ENV_STACK_NAME = f'EMD-Env' MODEL_STACK_NAME_PREFIX = f"EMD-Model" ENV_BUCKET_NAME_PREFIX = "emd-env-artifactbucket" @@ -25,3 +26,8 @@ LOCAL_REGION = "local" # EMD_USE_NO_PROFILE_CHOICE = "Don't set" + +LOCAL_DEPLOY_PIPELINE_ZIP_DIR = os.path.join( + os.path.expanduser("~"), + f"emd_{VERSION}" +) diff --git a/src/emd/models/engines.py b/src/emd/models/engines.py index 0dc243f7..0bc13595 100644 --- a/src/emd/models/engines.py +++ b/src/emd/models/engines.py @@ -78,6 +78,18 @@ class KtransformersEngine(OpenAICompitableEngine): ) +vllm_texgemma082 = VllmEngine(**{ + "engine_type":EngineType.VLLM, + "engine_dockerfile_config": {"VERSION":"v0.8.2"}, + "engine_cls":"vllm.vllm_backend.VLLMBackend", + "base_image_host":"public.ecr.aws", + "use_public_ecr":True, + "docker_login_region":"us-east-1", + "default_cli_args": " --max_num_seq 10 --disable-log-stats" +} +) + + vllm_mistral_small_engine082 = VllmEngine( **{ **vllm_engine064.model_dump(), diff --git a/src/emd/models/llms/__init__.py b/src/emd/models/llms/__init__.py index e823ab18..2750df0c 100644 --- a/src/emd/models/llms/__init__.py +++ b/src/emd/models/llms/__init__.py @@ -5,5 +5,6 @@ llama, deepseek, baichuan, - jina + jina, + txgemma ) diff --git a/src/emd/models/llms/deepseek.py b/src/emd/models/llms/deepseek.py index df61526b..1359ac33 100644 --- a/src/emd/models/llms/deepseek.py +++ b/src/emd/models/llms/deepseek.py @@ -334,33 +334,33 @@ ) ) -Model.register( - dict( - model_id = "deepseek-r1-671b-1.58bit_ollama", - supported_engines=[ollama_deepseek_r1_qwen2d5_1d5b_engine057], - supported_instances=[ - g5d48xlarge_instance, - local_instance - ], - supported_services=[ - sagemaker_service, - sagemaker_async_service, - ecs_service, - local_service - ], - supported_frameworks=[ - fastapi_framework - ], - allow_china_region=False, - ollama_model_id="SIGJNF/deepseek-r1-671b-1.58bit", - # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", - require_huggingface_token=False, - application_scenario="Agent, tool use, translation, summary", - description="The latest series of DeepSeek LLMs for reasoning", - model_type=ModelType.LLM, - model_series=DEEPSEEK_REASONING_MODEL - ) -) +# Model.register( +# dict( +# model_id = "deepseek-r1-671b-1.58bit_ollama", +# supported_engines=[ollama_deepseek_r1_qwen2d5_1d5b_engine057], +# supported_instances=[ +# g5d48xlarge_instance, +# local_instance +# ], +# supported_services=[ +# sagemaker_service, +# sagemaker_async_service, +# ecs_service, +# local_service +# ], +# supported_frameworks=[ +# fastapi_framework +# ], +# allow_china_region=False, +# ollama_model_id="SIGJNF/deepseek-r1-671b-1.58bit", +# # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", +# require_huggingface_token=False, +# application_scenario="Agent, tool use, translation, summary", +# description="The latest series of DeepSeek LLMs for reasoning", +# model_type=ModelType.LLM, +# model_series=DEEPSEEK_REASONING_MODEL +# ) +# ) Model.register( diff --git a/src/emd/models/llms/txgemma.py b/src/emd/models/llms/txgemma.py new file mode 100644 index 00000000..0c4e29ff --- /dev/null +++ b/src/emd/models/llms/txgemma.py @@ -0,0 +1,91 @@ +from ..engines import vllm_texgemma082 +from .. import Model +from ..frameworks import fastapi_framework +from ..services import ( + sagemaker_service, + sagemaker_async_service, + ecs_service, + local_service +) +from emd.models.utils.constants import ModelType +from ..model_series import TXGEMMA_SERIES +from ..instances import ( + g5d2xlarge_instance, + g5d4xlarge_instance, + g5d8xlarge_instance, + g5d12xlarge_instance, + g5d16xlarge_instance, + g5d24xlarge_instance, + g5d48xlarge_instance, + g6e2xlarge_instance, + local_instance +) +from ..utils.constants import ModelFilesDownloadSource + + +Model.register( + dict( + model_id = "txgemma-9b-chat", + supported_engines=[vllm_texgemma082], + supported_instances=[ + g5d12xlarge_instance, + g5d24xlarge_instance, + g5d48xlarge_instance, + g5d2xlarge_instance, + g5d4xlarge_instance, + g5d8xlarge_instance, + g5d16xlarge_instance, + local_instance + ], + disable_hf_transfer=True, + supported_services=[ + sagemaker_service, + sagemaker_async_service, + ecs_service, + local_service + ], + supported_frameworks=[ + fastapi_framework + ], + huggingface_model_id="google/txgemma-9b-chat", + modelscope_model_id="AI-ModelScope/txgemma-9b-chat", + model_files_download_source=ModelFilesDownloadSource.MODELSCOPE, + # require_huggingface_token=True, + application_scenario="llms for the development of therapeutics.", + description="The latest series of txgemma", + model_type=ModelType.LLM, + model_series=TXGEMMA_SERIES, + ) +) + + +Model.register( + dict( + model_id = "txgemma-27b-chat", + supported_engines=[vllm_texgemma082], + supported_instances=[ + g5d12xlarge_instance, + g5d24xlarge_instance, + g5d48xlarge_instance, + local_instance + ], + disable_hf_transfer=True, + supported_services=[ + sagemaker_service, + sagemaker_async_service, + ecs_service, + local_service + ], + supported_frameworks=[ + fastapi_framework + ], + huggingface_model_id="google/txgemma-27b-chat", + modelscope_model_id="AI-ModelScope/txgemma-27b-chat", + model_files_download_source=ModelFilesDownloadSource.MODELSCOPE, + # require_huggingface_token=True, + application_scenario="llms for the development of therapeutics.", + description="The latest series of txgemma", + model_type=ModelType.LLM, + model_series=TXGEMMA_SERIES, + ) +) diff --git a/src/emd/models/model_series.py b/src/emd/models/model_series.py index ceae13ad..896fa512 100644 --- a/src/emd/models/model_series.py +++ b/src/emd/models/model_series.py @@ -97,6 +97,13 @@ reference_link="https://blog.google/technology/developers/gemma-3/" ) +TXGEMMA_SERIES = ModelSeries( + model_series_name=ModelSeriesType.TXGEMMA, + description="TXGemma is a series of open models to accelerate the development of therapeutics.", + reference_link="https://huggingface.co/collections/google/txgemma-release-67dd92e931c857d15e4d1e87" +) + + MISTRAL_SERIES = ModelSeries( model_series_name=ModelSeriesType.MISTRAL, description="LLMs and VLMs provided by MISTRAL AI.", diff --git a/src/emd/models/services.py b/src/emd/models/services.py index 859a12c9..824792f0 100644 --- a/src/emd/models/services.py +++ b/src/emd/models/services.py @@ -91,7 +91,7 @@ "ServiceType":"service_type", "EngineType":"engine_type", "Region": "region", - "DesiredCapacity": "desired_capacity", + "DesiredCapacity": ValueWithDefault(name="desired_capacity",default=1), "ContainerCpu": "container_cpu", "ContainerMemory": "container_memory", "ContainerGpu":"instance_gpu_num" diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py index d78414e6..27311173 100644 --- a/src/emd/models/utils/constants.py +++ b/src/emd/models/utils/constants.py @@ -214,6 +214,7 @@ def get_service_quota_code(cls, instance_type: str): class ModelSeriesType(ConstantBase): GEMMA3 = "gemma3" + TXGEMMA = "txgemma" MISTRAL = "mistral" QWEN2D5 = "qwen2.5" GLM4 = "glm4" diff --git a/src/emd/models/vlms/gemma3.py b/src/emd/models/vlms/gemma3.py index 4f049787..822cddd6 100644 --- a/src/emd/models/vlms/gemma3.py +++ b/src/emd/models/vlms/gemma3.py @@ -10,6 +10,7 @@ from emd.models.utils.constants import ModelType from ..model_series import Gemma3_SERIES from ..instances import ( + g4dn12xlarge_instance, g5d2xlarge_instance, g5d4xlarge_instance, g5d8xlarge_instance, @@ -43,6 +44,7 @@ supported_frameworks=[ fastapi_framework ], + allow_china_region = True, modelscope_model_id="LLM-Research/gemma-3-4b-it", model_files_download_source=ModelFilesDownloadSource.MODELSCOPE, # require_huggingface_token=False, @@ -74,6 +76,7 @@ supported_frameworks=[ fastapi_framework ], + allow_china_region = True, # huggingface_model_id="google/gemma-3-12b-it", # require_huggingface_token=False, modelscope_model_id="LLM-Research/gemma-3-12b-it", @@ -106,6 +109,7 @@ supported_frameworks=[ fastapi_framework ], + allow_china_region = True, # huggingface_model_id="unsloth/gemma-3-27b-it", modelscope_model_id="LLM-Research/gemma-3-27b-it", model_files_download_source=ModelFilesDownloadSource.MODELSCOPE, diff --git a/src/emd/sdk/deploy.py b/src/emd/sdk/deploy.py index 6426215d..03443ebf 100644 --- a/src/emd/sdk/deploy.py +++ b/src/emd/sdk/deploy.py @@ -14,8 +14,10 @@ MODEL_DEFAULT_TAG, MODEL_STACK_NAME_PREFIX, VERSION, - LOCAL_REGION + LOCAL_REGION, + LOCAL_DEPLOY_PIPELINE_ZIP_DIR ) +from emd.utils.file_utils import mkdir_with_mode from emd.models import Model from emd.models.utils.constants import FrameworkType, ServiceType,InstanceType from emd.models.utils.serialize_utils import dump_extra_params @@ -318,7 +320,10 @@ def deploy_local( # region: Optional[str] = None, # model_stack_name=None, extra_params=None, - pipeline_zip_local_path=f"/tmp/emd_{VERSION}/pipeline.zip", + pipeline_zip_local_path=os.path.join( + LOCAL_DEPLOY_PIPELINE_ZIP_DIR, + "pipeline.zip" + ), # env_stack_on_failure = "ROLLBACK", # force_env_stack_update = False, # waiting_until_deploy_complete = True @@ -328,7 +333,9 @@ def deploy_local( logger.info(f"parsed extra_params: {extra_params}") extra_params = dump_extra_params(extra_params or {}) dir = os.path.dirname(pipeline_zip_local_path) - os.makedirs(dir, exist_ok=True) + + mkdir_with_mode(dir, exist_ok=True,mode=0o777) + # os.makedirs(dir, exist_ok=True,mode=0o777) with open(pipeline_zip_local_path, "wb") as f: buffer = ziped_pipeline() f.write(buffer.read()) diff --git a/src/emd/utils/file_utils.py b/src/emd/utils/file_utils.py new file mode 100644 index 00000000..11599dc1 --- /dev/null +++ b/src/emd/utils/file_utils.py @@ -0,0 +1,6 @@ +import os + +def mkdir_with_mode(directory,exist_ok=True,mode=0o777): + oldmask = os.umask(0) + os.makedirs(directory, mode=mode,exist_ok=exist_ok) + os.umask(oldmask)