aws-samples · yanbasic · Apr 7, 2025 · Mar 25, 2025 · Mar 25, 2025 · Mar 27, 2025
diff --git a/docs/en/best_deployment_practices.md b/docs/en/best_deployment_practices.md
@@ -64,6 +64,48 @@ emd deploy --model-id Qwen2.5-14B-Instruct-AWQ --instance-type g4dn.2xlarge --en
 }'
 ```
 
+### Example: Customize model download methods
+- You can load models from different locations by addingappropriate values in the extra-params parameter
+1. Load model from S3
+```json
+{
+  "model_params":{
+    "model_files_s3_path":"<S3_PATH>"
+    }
+}
+```
+2. Load model from local path (only applicable for local deployment)
+```json
+{
+  "model_params": {    "model_files_local_path":"<LOCAL_PATH>"
+  }
+}
+```
+3. Skip downloading and uploading model files in codebuild, which will significantly reducedeployment time
+```json
+{
+  "model_params": {
+    "need_prepare_model":false
+  }
+}
+```
+4. Specify the download source for model files
+```json
+{
+  "model_params":{
+    "model_files_download_source":"huggingface|modelscope|auto(default)"
+    }
+}
+```
+5. Specify the model ID on huggingface or modelscope
+```json
+{
+  "model_params": {
+    "huggingface_model_id":"model id on huggingface","modelscope_model_id":"model id on modelscope"
+    }
+}
+```
+
 ## Environmental variables
 - `LOCAL_DEPLOY_PORT: ` Local deployment port, default: `8080`
 

diff --git a/src/emd/constants.py b/src/emd/constants.py
@@ -1,4 +1,5 @@
 from .revision import VERSION, convert_version_name_to_stack_name
+import os
 ENV_STACK_NAME = f'EMD-Env'
 MODEL_STACK_NAME_PREFIX = f"EMD-Model"
 ENV_BUCKET_NAME_PREFIX = "emd-env-artifactbucket"
@@ -25,3 +26,8 @@
 
 LOCAL_REGION = "local"
 # EMD_USE_NO_PROFILE_CHOICE = "Don't set"
+
+LOCAL_DEPLOY_PIPELINE_ZIP_DIR = os.path.join(
+    os.path.expanduser("~"),
+    f"emd_{VERSION}"
+)
diff --git a/src/emd/models/engines.py b/src/emd/models/engines.py
@@ -78,6 +78,18 @@ class KtransformersEngine(OpenAICompitableEngine):
 )
 
 
+vllm_texgemma082 = VllmEngine(**{
+            "engine_type":EngineType.VLLM,
+            "engine_dockerfile_config": {"VERSION":"v0.8.2"},
+            "engine_cls":"vllm.vllm_backend.VLLMBackend",
+            "base_image_host":"public.ecr.aws",
+            "use_public_ecr":True,
+            "docker_login_region":"us-east-1",
+            "default_cli_args": " --max_num_seq 10 --disable-log-stats"
+}
+)
+
+
 vllm_mistral_small_engine082 = VllmEngine(
     **{
     **vllm_engine064.model_dump(),

diff --git a/src/emd/models/llms/__init__.py b/src/emd/models/llms/__init__.py
@@ -5,5 +5,6 @@
     llama,
     deepseek,
     baichuan,
-    jina
+    jina,
+    txgemma
 )
diff --git a/src/emd/models/llms/deepseek.py b/src/emd/models/llms/deepseek.py
@@ -334,33 +334,33 @@
     )
 )
 
-Model.register(
-    dict(
-        model_id = "deepseek-r1-671b-1.58bit_ollama",
-        supported_engines=[ollama_deepseek_r1_qwen2d5_1d5b_engine057],
-        supported_instances=[
-            g5d48xlarge_instance,
-            local_instance
-        ],
-        supported_services=[
-            sagemaker_service,
-            sagemaker_async_service,
-            ecs_service,
-            local_service
-        ],
-        supported_frameworks=[
-            fastapi_framework
-        ],
-        allow_china_region=False,
-        ollama_model_id="SIGJNF/deepseek-r1-671b-1.58bit",
-        # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
-        require_huggingface_token=False,
-        application_scenario="Agent, tool use, translation, summary",
-        description="The latest series of DeepSeek LLMs for reasoning",
-        model_type=ModelType.LLM,
-        model_series=DEEPSEEK_REASONING_MODEL
-    )
-)
+# Model.register(
+#     dict(
+#         model_id = "deepseek-r1-671b-1.58bit_ollama",
+#         supported_engines=[ollama_deepseek_r1_qwen2d5_1d5b_engine057],
+#         supported_instances=[
+#             g5d48xlarge_instance,
+#             local_instance
+#         ],
+#         supported_services=[
+#             sagemaker_service,
+#             sagemaker_async_service,
+#             ecs_service,
+#             local_service
+#         ],
+#         supported_frameworks=[
+#             fastapi_framework
+#         ],
+#         allow_china_region=False,
+#         ollama_model_id="SIGJNF/deepseek-r1-671b-1.58bit",
+#         # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
+#         require_huggingface_token=False,
+#         application_scenario="Agent, tool use, translation, summary",
+#         description="The latest series of DeepSeek LLMs for reasoning",
+#         model_type=ModelType.LLM,
+#         model_series=DEEPSEEK_REASONING_MODEL
+#     )
+# )
 
 
 Model.register(

diff --git a/src/emd/models/llms/txgemma.py b/src/emd/models/llms/txgemma.py
@@ -0,0 +1,91 @@
+from ..engines import vllm_texgemma082
+from .. import Model
+from ..frameworks import fastapi_framework
+from ..services import (
+    sagemaker_service,
+    sagemaker_async_service,
+    ecs_service,
+    local_service
+)
+from emd.models.utils.constants import ModelType
+from ..model_series import TXGEMMA_SERIES
+from ..instances import (
+    g5d2xlarge_instance,
+    g5d4xlarge_instance,
+    g5d8xlarge_instance,
+    g5d12xlarge_instance,
+    g5d16xlarge_instance,
+    g5d24xlarge_instance,
+    g5d48xlarge_instance,
+    g6e2xlarge_instance,
+    local_instance
+)
+from ..utils.constants import ModelFilesDownloadSource
+
+
+Model.register(
+    dict(
+        model_id = "txgemma-9b-chat",
+        supported_engines=[vllm_texgemma082],
+        supported_instances=[
+            g5d12xlarge_instance,
+            g5d24xlarge_instance,
+            g5d48xlarge_instance,
+            g5d2xlarge_instance,
+            g5d4xlarge_instance,
+            g5d8xlarge_instance,
+            g5d16xlarge_instance,
+            local_instance
+        ],
+        disable_hf_transfer=True,
+        supported_services=[
+            sagemaker_service,
+            sagemaker_async_service,
+            ecs_service,
+            local_service
+        ],
+        supported_frameworks=[
+            fastapi_framework
+        ],
+        huggingface_model_id="google/txgemma-9b-chat",
+        modelscope_model_id="AI-ModelScope/txgemma-9b-chat",
+        model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
+        # require_huggingface_token=True,
+        application_scenario="llms for the development of therapeutics.",
+        description="The latest series of txgemma",
+        model_type=ModelType.LLM,
+        model_series=TXGEMMA_SERIES,
+    )
+)
+
+
+Model.register(
+    dict(
+        model_id = "txgemma-27b-chat",
+        supported_engines=[vllm_texgemma082],
+        supported_instances=[
+            g5d12xlarge_instance,
+            g5d24xlarge_instance,
+            g5d48xlarge_instance,
+            local_instance
+        ],
+        disable_hf_transfer=True,
+        supported_services=[
+            sagemaker_service,
+            sagemaker_async_service,
+            ecs_service,
+            local_service
+        ],
+        supported_frameworks=[
+            fastapi_framework
+        ],
+        huggingface_model_id="google/txgemma-27b-chat",
+        modelscope_model_id="AI-ModelScope/txgemma-27b-chat",
+        model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
+        # require_huggingface_token=True,
+        application_scenario="llms for the development of therapeutics.",
+        description="The latest series of txgemma",
+        model_type=ModelType.LLM,
+        model_series=TXGEMMA_SERIES,
+    )
+)
diff --git a/src/emd/models/model_series.py b/src/emd/models/model_series.py
@@ -97,6 +97,13 @@
     reference_link="https://blog.google/technology/developers/gemma-3/"
 )
 
+TXGEMMA_SERIES = ModelSeries(
+    model_series_name=ModelSeriesType.TXGEMMA,
+    description="TXGemma is a series of open models to accelerate the development of therapeutics.",
+    reference_link="https://huggingface.co/collections/google/txgemma-release-67dd92e931c857d15e4d1e87"
+)
+
+
 MISTRAL_SERIES = ModelSeries(
     model_series_name=ModelSeriesType.MISTRAL,
     description="LLMs and VLMs provided by MISTRAL AI.",

diff --git a/src/emd/models/services.py b/src/emd/models/services.py
@@ -91,7 +91,7 @@
         "ServiceType":"service_type",
         "EngineType":"engine_type",
         "Region": "region",
-        "DesiredCapacity": "desired_capacity",
+        "DesiredCapacity": ValueWithDefault(name="desired_capacity",default=1),
         "ContainerCpu": "container_cpu",
         "ContainerMemory": "container_memory",
         "ContainerGpu":"instance_gpu_num"

diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py
@@ -214,6 +214,7 @@ def get_service_quota_code(cls, instance_type: str):
 
 class ModelSeriesType(ConstantBase):
     GEMMA3 = "gemma3"
+    TXGEMMA = "txgemma"
     MISTRAL = "mistral"
     QWEN2D5 = "qwen2.5"
     GLM4 = "glm4"

diff --git a/src/emd/models/vlms/gemma3.py b/src/emd/models/vlms/gemma3.py
@@ -10,6 +10,7 @@
 from emd.models.utils.constants import ModelType
 from ..model_series import Gemma3_SERIES
 from ..instances import (
+    g4dn12xlarge_instance,
     g5d2xlarge_instance,
     g5d4xlarge_instance,
     g5d8xlarge_instance,
@@ -43,6 +44,7 @@
         supported_frameworks=[
             fastapi_framework
         ],
+        allow_china_region = True,
         modelscope_model_id="LLM-Research/gemma-3-4b-it",
         model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
         # require_huggingface_token=False,
@@ -74,6 +76,7 @@
         supported_frameworks=[
             fastapi_framework
         ],
+        allow_china_region = True,
         # huggingface_model_id="google/gemma-3-12b-it",
         # require_huggingface_token=False,
         modelscope_model_id="LLM-Research/gemma-3-12b-it",
@@ -106,6 +109,7 @@
         supported_frameworks=[
             fastapi_framework
         ],
+        allow_china_region = True,
         # huggingface_model_id="unsloth/gemma-3-27b-it",
         modelscope_model_id="LLM-Research/gemma-3-27b-it",
         model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,

diff --git a/src/emd/sdk/deploy.py b/src/emd/sdk/deploy.py
@@ -14,8 +14,10 @@
     MODEL_DEFAULT_TAG,
     MODEL_STACK_NAME_PREFIX,
     VERSION,
-    LOCAL_REGION
+    LOCAL_REGION,
+    LOCAL_DEPLOY_PIPELINE_ZIP_DIR
 )
+from emd.utils.file_utils import mkdir_with_mode
 from emd.models import Model
 from emd.models.utils.constants import FrameworkType, ServiceType,InstanceType
 from emd.models.utils.serialize_utils import dump_extra_params
@@ -318,7 +320,10 @@ def deploy_local(
     # region: Optional[str] = None,
     # model_stack_name=None,
     extra_params=None,
-    pipeline_zip_local_path=f"/tmp/emd_{VERSION}/pipeline.zip",
+    pipeline_zip_local_path=os.path.join(
+        LOCAL_DEPLOY_PIPELINE_ZIP_DIR,
+        "pipeline.zip"
+    ),
     # env_stack_on_failure = "ROLLBACK",
     # force_env_stack_update = False,
     # waiting_until_deploy_complete = True
@@ -328,7 +333,9 @@ def deploy_local(
     logger.info(f"parsed extra_params: {extra_params}")
     extra_params = dump_extra_params(extra_params or {})
     dir = os.path.dirname(pipeline_zip_local_path)
-    os.makedirs(dir, exist_ok=True)
+
+    mkdir_with_mode(dir, exist_ok=True,mode=0o777)
+    # os.makedirs(dir, exist_ok=True,mode=0o777)
     with open(pipeline_zip_local_path, "wb") as f:
         buffer = ziped_pipeline()
         f.write(buffer.read())

diff --git a/src/emd/utils/file_utils.py b/src/emd/utils/file_utils.py
@@ -0,0 +1,6 @@
+import os
+
+def mkdir_with_mode(directory,exist_ok=True,mode=0o777):
+    oldmask = os.umask(0)
+    os.makedirs(directory, mode=mode,exist_ok=exist_ok)
+    os.umask(oldmask)
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,5 +5,6 @@ @@
         llama,
         deepseek,
         baichuan,
-        jina
+        jina,
+        txgemma
     )