Skip to content
This repository was archived by the owner on Sep 20, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions docs/en/best_deployment_practices.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,48 @@ emd deploy --model-id Qwen2.5-14B-Instruct-AWQ --instance-type g4dn.2xlarge --en
}'
```

### Example: Customize model download methods
- You can load models from different locations by addingappropriate values in the extra-params parameter
1. Load model from S3
```json
{
"model_params":{
"model_files_s3_path":"<S3_PATH>"
}
}
```
2. Load model from local path (only applicable for local deployment)
```json
{
"model_params": { "model_files_local_path":"<LOCAL_PATH>"
}
}
```
3. Skip downloading and uploading model files in codebuild, which will significantly reducedeployment time
```json
{
"model_params": {
"need_prepare_model":false
}
}
```
4. Specify the download source for model files
```json
{
"model_params":{
"model_files_download_source":"huggingface|modelscope|auto(default)"
}
}
```
5. Specify the model ID on huggingface or modelscope
```json
{
"model_params": {
"huggingface_model_id":"model id on huggingface","modelscope_model_id":"model id on modelscope"
}
}
```

## Environmental variables
- `LOCAL_DEPLOY_PORT: ` Local deployment port, default: `8080`

Expand Down
6 changes: 6 additions & 0 deletions src/emd/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .revision import VERSION, convert_version_name_to_stack_name
import os
ENV_STACK_NAME = f'EMD-Env'
MODEL_STACK_NAME_PREFIX = f"EMD-Model"
ENV_BUCKET_NAME_PREFIX = "emd-env-artifactbucket"
Expand All @@ -25,3 +26,8 @@

LOCAL_REGION = "local"
# EMD_USE_NO_PROFILE_CHOICE = "Don't set"

LOCAL_DEPLOY_PIPELINE_ZIP_DIR = os.path.join(
os.path.expanduser("~"),
f"emd_{VERSION}"
)
12 changes: 12 additions & 0 deletions src/emd/models/engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ class KtransformersEngine(OpenAICompitableEngine):
)


vllm_texgemma082 = VllmEngine(**{
"engine_type":EngineType.VLLM,
"engine_dockerfile_config": {"VERSION":"v0.8.2"},
"engine_cls":"vllm.vllm_backend.VLLMBackend",
"base_image_host":"public.ecr.aws",
"use_public_ecr":True,
"docker_login_region":"us-east-1",
"default_cli_args": " --max_num_seq 10 --disable-log-stats"
}
)


vllm_mistral_small_engine082 = VllmEngine(
**{
**vllm_engine064.model_dump(),
Expand Down
3 changes: 2 additions & 1 deletion src/emd/models/llms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
llama,
deepseek,
baichuan,
jina
jina,
txgemma
)
54 changes: 27 additions & 27 deletions src/emd/models/llms/deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,33 +334,33 @@
)
)

Model.register(
dict(
model_id = "deepseek-r1-671b-1.58bit_ollama",
supported_engines=[ollama_deepseek_r1_qwen2d5_1d5b_engine057],
supported_instances=[
g5d48xlarge_instance,
local_instance
],
supported_services=[
sagemaker_service,
sagemaker_async_service,
ecs_service,
local_service
],
supported_frameworks=[
fastapi_framework
],
allow_china_region=False,
ollama_model_id="SIGJNF/deepseek-r1-671b-1.58bit",
# modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
require_huggingface_token=False,
application_scenario="Agent, tool use, translation, summary",
description="The latest series of DeepSeek LLMs for reasoning",
model_type=ModelType.LLM,
model_series=DEEPSEEK_REASONING_MODEL
)
)
# Model.register(
# dict(
# model_id = "deepseek-r1-671b-1.58bit_ollama",
# supported_engines=[ollama_deepseek_r1_qwen2d5_1d5b_engine057],
# supported_instances=[
# g5d48xlarge_instance,
# local_instance
# ],
# supported_services=[
# sagemaker_service,
# sagemaker_async_service,
# ecs_service,
# local_service
# ],
# supported_frameworks=[
# fastapi_framework
# ],
# allow_china_region=False,
# ollama_model_id="SIGJNF/deepseek-r1-671b-1.58bit",
# # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
# require_huggingface_token=False,
# application_scenario="Agent, tool use, translation, summary",
# description="The latest series of DeepSeek LLMs for reasoning",
# model_type=ModelType.LLM,
# model_series=DEEPSEEK_REASONING_MODEL
# )
# )


Model.register(
Expand Down
91 changes: 91 additions & 0 deletions src/emd/models/llms/txgemma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from ..engines import vllm_texgemma082
from .. import Model
from ..frameworks import fastapi_framework
from ..services import (
sagemaker_service,
sagemaker_async_service,
ecs_service,
local_service
)
from emd.models.utils.constants import ModelType
from ..model_series import TXGEMMA_SERIES
from ..instances import (
g5d2xlarge_instance,
g5d4xlarge_instance,
g5d8xlarge_instance,
g5d12xlarge_instance,
g5d16xlarge_instance,
g5d24xlarge_instance,
g5d48xlarge_instance,
g6e2xlarge_instance,
local_instance
)
from ..utils.constants import ModelFilesDownloadSource


Model.register(
dict(
model_id = "txgemma-9b-chat",
supported_engines=[vllm_texgemma082],
supported_instances=[
g5d12xlarge_instance,
g5d24xlarge_instance,
g5d48xlarge_instance,
g5d2xlarge_instance,
g5d4xlarge_instance,
g5d8xlarge_instance,
g5d16xlarge_instance,
local_instance
],
disable_hf_transfer=True,
supported_services=[
sagemaker_service,
sagemaker_async_service,
ecs_service,
local_service
],
supported_frameworks=[
fastapi_framework
],
huggingface_model_id="google/txgemma-9b-chat",
modelscope_model_id="AI-ModelScope/txgemma-9b-chat",
model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
# require_huggingface_token=True,
application_scenario="llms for the development of therapeutics.",
description="The latest series of txgemma",
model_type=ModelType.LLM,
model_series=TXGEMMA_SERIES,
)
)


Model.register(
dict(
model_id = "txgemma-27b-chat",
supported_engines=[vllm_texgemma082],
supported_instances=[
g5d12xlarge_instance,
g5d24xlarge_instance,
g5d48xlarge_instance,
local_instance
],
disable_hf_transfer=True,
supported_services=[
sagemaker_service,
sagemaker_async_service,
ecs_service,
local_service
],
supported_frameworks=[
fastapi_framework
],
huggingface_model_id="google/txgemma-27b-chat",
modelscope_model_id="AI-ModelScope/txgemma-27b-chat",
model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
# require_huggingface_token=True,
application_scenario="llms for the development of therapeutics.",
description="The latest series of txgemma",
model_type=ModelType.LLM,
model_series=TXGEMMA_SERIES,
)
)
7 changes: 7 additions & 0 deletions src/emd/models/model_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@
reference_link="https://blog.google/technology/developers/gemma-3/"
)

TXGEMMA_SERIES = ModelSeries(
model_series_name=ModelSeriesType.TXGEMMA,
description="TXGemma is a series of open models to accelerate the development of therapeutics.",
reference_link="https://huggingface.co/collections/google/txgemma-release-67dd92e931c857d15e4d1e87"
)


MISTRAL_SERIES = ModelSeries(
model_series_name=ModelSeriesType.MISTRAL,
description="LLMs and VLMs provided by MISTRAL AI.",
Expand Down
2 changes: 1 addition & 1 deletion src/emd/models/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
"ServiceType":"service_type",
"EngineType":"engine_type",
"Region": "region",
"DesiredCapacity": "desired_capacity",
"DesiredCapacity": ValueWithDefault(name="desired_capacity",default=1),
"ContainerCpu": "container_cpu",
"ContainerMemory": "container_memory",
"ContainerGpu":"instance_gpu_num"
Expand Down
1 change: 1 addition & 0 deletions src/emd/models/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def get_service_quota_code(cls, instance_type: str):

class ModelSeriesType(ConstantBase):
GEMMA3 = "gemma3"
TXGEMMA = "txgemma"
MISTRAL = "mistral"
QWEN2D5 = "qwen2.5"
GLM4 = "glm4"
Expand Down
4 changes: 4 additions & 0 deletions src/emd/models/vlms/gemma3.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from emd.models.utils.constants import ModelType
from ..model_series import Gemma3_SERIES
from ..instances import (
g4dn12xlarge_instance,
g5d2xlarge_instance,
g5d4xlarge_instance,
g5d8xlarge_instance,
Expand Down Expand Up @@ -43,6 +44,7 @@
supported_frameworks=[
fastapi_framework
],
allow_china_region = True,
modelscope_model_id="LLM-Research/gemma-3-4b-it",
model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
# require_huggingface_token=False,
Expand Down Expand Up @@ -74,6 +76,7 @@
supported_frameworks=[
fastapi_framework
],
allow_china_region = True,
# huggingface_model_id="google/gemma-3-12b-it",
# require_huggingface_token=False,
modelscope_model_id="LLM-Research/gemma-3-12b-it",
Expand Down Expand Up @@ -106,6 +109,7 @@
supported_frameworks=[
fastapi_framework
],
allow_china_region = True,
# huggingface_model_id="unsloth/gemma-3-27b-it",
modelscope_model_id="LLM-Research/gemma-3-27b-it",
model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
Expand Down
13 changes: 10 additions & 3 deletions src/emd/sdk/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
MODEL_DEFAULT_TAG,
MODEL_STACK_NAME_PREFIX,
VERSION,
LOCAL_REGION
LOCAL_REGION,
LOCAL_DEPLOY_PIPELINE_ZIP_DIR
)
from emd.utils.file_utils import mkdir_with_mode
from emd.models import Model
from emd.models.utils.constants import FrameworkType, ServiceType,InstanceType
from emd.models.utils.serialize_utils import dump_extra_params
Expand Down Expand Up @@ -318,7 +320,10 @@ def deploy_local(
# region: Optional[str] = None,
# model_stack_name=None,
extra_params=None,
pipeline_zip_local_path=f"/tmp/emd_{VERSION}/pipeline.zip",
pipeline_zip_local_path=os.path.join(
LOCAL_DEPLOY_PIPELINE_ZIP_DIR,
"pipeline.zip"
),
# env_stack_on_failure = "ROLLBACK",
# force_env_stack_update = False,
# waiting_until_deploy_complete = True
Expand All @@ -328,7 +333,9 @@ def deploy_local(
logger.info(f"parsed extra_params: {extra_params}")
extra_params = dump_extra_params(extra_params or {})
dir = os.path.dirname(pipeline_zip_local_path)
os.makedirs(dir, exist_ok=True)

mkdir_with_mode(dir, exist_ok=True,mode=0o777)
# os.makedirs(dir, exist_ok=True,mode=0o777)
with open(pipeline_zip_local_path, "wb") as f:
buffer = ziped_pipeline()
f.write(buffer.read())
Expand Down
6 changes: 6 additions & 0 deletions src/emd/utils/file_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import os

def mkdir_with_mode(directory,exist_ok=True,mode=0o777):
oldmask = os.umask(0)
os.makedirs(directory, mode=mode,exist_ok=exist_ok)
os.umask(oldmask)