Skip to content
This repository was archived by the owner on Sep 20, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/emd/cfn/sagemaker_realtime/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ Parameters:
Type: Number
Description: The target value for the autoscaling
Default: 10
SageMakerEndpointName:
Type: String
Description: The name of the SageMaker Endpoint
Default: "noname"

Conditions:
UseDefaultEndpointName: !Equals [!Ref SageMakerEndpointName, "noname"]

Resources:
ExecutionRole:
Expand Down Expand Up @@ -90,7 +97,10 @@ Resources:
SageMakerEndpoint:
Type: AWS::SageMaker::Endpoint
Properties:
EndpointName: !Sub '${AWS::StackName}-endpoint'
EndpointName: !If
- UseDefaultEndpointName
- !Sub '${AWS::StackName}-endpoint'
- !Ref SageMakerEndpointName
EndpointConfigName: !GetAtt SageMakerEndpointConfig.EndpointConfigName


Expand Down
23 changes: 22 additions & 1 deletion src/emd/commands/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,34 @@ def deploy(
] = False,
only_allow_local_deploy: Annotated[
Optional[bool], typer.Option("--only-allow-local-deploy", help="only allow local instance")
] = False
] = False,
dockerfile_local_path: Annotated[
str, typer.Option("--dockerfile-local-path", help="Your custom Dockerfile path for building the model image, all files must be in the same directory")
] = None,
):
if only_allow_local_deploy:
allow_local_deploy = True
region = LOCAL_REGION
else:
region = get_current_region()

if dockerfile_local_path:
response = sdk_deploy(
model_id='custom-docker',
model_tag=f"{model_id}-{model_tag}",
instance_type=instance_type,
engine_type='custom',
framework_type='custom',
service_type='sagemaker_realtime',
region=region,
extra_params = extra_params,
env_stack_on_failure = "ROLLBACK",
force_env_stack_update = force_update_env_stack,
waiting_until_deploy_complete=True,
dockerfile_local_path=dockerfile_local_path,
)
return response

vpc_id = None
# ask model id
model_id = ask_model_id(region,model_id=model_id)
Expand Down
2 changes: 2 additions & 0 deletions src/emd/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
asr,
embeddings,
reranks,
custom,
)
# text-2-image,text-2-video

Expand All @@ -27,3 +28,4 @@
from . import services
from . import model_series
from . import frameworks

6 changes: 3 additions & 3 deletions src/emd/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def check_inf2_instance(cls,instance_type:str):
class Engine(ModelBase):
engine_type: EngineType
engine_dockerfile_config: Union[dict,None] = Field(default_factory=dict)
engine_cls: str
engine_cls: Union[str,None] = None
dockerfile_name: str = "Dockerfile"
base_image_account_id: Union[str,None] = None
base_image_host: Union[str,None] = None
Expand Down Expand Up @@ -181,15 +181,15 @@ class Model(ModelBase,Generic[T]):
require_huggingface_token: bool = False
modelscope_model_id: str = ""
require_modelscope_token: bool = False
application_scenario: str
application_scenario: str = ""
description: str = ""
model_type: ModelType = ModelType.LLM
need_prepare_model: bool = True
# download model files directly from s3
model_files_s3_path: Union[str,None] = None
model_files_local_path: Union[str,None] = None
model_files_download_source: ModelFilesDownloadSource = ModelFilesDownloadSource.AUTO
model_series: ModelSeries
model_series: ModelSeries = None
executable_config: Union[ExecutableConfig,None] = None

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion src/emd/models/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
"EngineType":"engine_type",
"Region":"region",
"MaxCapacity": ValueWithDefault(name="max_capacity",default=1),
"AutoScalingTargetValue": ValueWithDefault(name="auto_scaling_target_value",default=10)
"AutoScalingTargetValue": ValueWithDefault(name="auto_scaling_target_value",default=10),
"SageMakerEndpointName": ValueWithDefault(name="sagemaker_endpoint_name",default="noname")
},
name = "Amazon SageMaker AI Real-time inference",
service_type=ServiceType.SAGEMAKER,
Expand Down
1 change: 1 addition & 0 deletions src/emd/models/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class EngineType(ConstantBase):
LLAMA_CPP = "llama.cpp"
TGI = "tgi"
LMDEPLOY = 'lmdeploy'
CUSTOM = "custom"
KTRANFORMERS = 'ktransformers'

# @classmethod
Expand Down
83 changes: 61 additions & 22 deletions src/emd/sdk/deploy.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import json
import os
import io
import time
from typing import Optional

import boto3
import zipfile
import sys

from emd.constants import (
CODEPIPELINE_NAME,
ENV_STACK_NAME,
MODEL_DEFAULT_TAG,
MODEL_STACK_NAME_PREFIX,
VERSION,
LOCAL_REGION
)
Expand Down Expand Up @@ -57,11 +60,15 @@ def prepare_deploy(
service_type=None,
instance_type=None,
region=None,
dockerfile_local_path=None
):
model: Model = Model.get_model(model_id)
model_stack_name = model.get_model_stack_name_prefix(
model_id, model_tag=model_tag
)
if dockerfile_local_path:
model_stack_name = f"{MODEL_STACK_NAME_PREFIX}-{model_id}-{model_tag}"
else:
model: Model = Model.get_model(model_id)
model_stack_name = model.get_model_stack_name_prefix(
model_id, model_tag=model_tag
)
# check if model_id is inprogress in pipeline execution
if check_stack_exists(model_stack_name):
raise RuntimeError(
Expand Down Expand Up @@ -113,6 +120,7 @@ def deploy(
env_stack_on_failure="ROLLBACK",
force_env_stack_update=False,
waiting_until_deploy_complete=True,
dockerfile_local_path=None,
) -> dict:
# Check if AWS environment is properly configured
if service_type == ServiceType.SAGEMAKER_OLDER:
Expand All @@ -132,9 +140,13 @@ def deploy(
service_type=service_type,
instance_type=instance_type,
region=region,
dockerfile_local_path=dockerfile_local_path
)
# logger.info("Checking AWS environment...")
extra_params = extra_params or {}
if isinstance(extra_params, str):
extra_params = json.loads(extra_params)
else:
extra_params = extra_params or {}
if model_stack_name is None:
# stack_name_suffix = random_suffix()
model_stack_name = (
Expand Down Expand Up @@ -168,23 +180,38 @@ def deploy(
pipeline_name = pipeline_resources[0]["PhysicalResourceId"]
logger.info("AWS environment is properly configured.")

model = Model.get_model(model_id)

# check instance,service,engine
supported_instances = model.supported_instance_types
assert (
instance_type in supported_instances
), f"Instance type {instance_type} is not supported for model {model_id}"

supported_engines = model.supported_engine_types
assert (
engine_type in supported_engines
), f"Engine type {engine_type} is not supported for model {model_id}"

supported_services = model.supported_service_types
assert (
service_type in supported_services
), f"Service type {service_type} is not supported for model {model_id}"
if dockerfile_local_path:
if not os.path.exists(dockerfile_local_path):
raise FileNotFoundError(f"Dockerfile path {dockerfile_local_path} does not exist.")

# Create a zip file of the dockerfile directory
zip_buffer = zipped_dockerfile(dockerfile_local_path)

# Upload the zip file to S3
s3 = boto3.client('s3', region_name=region)
s3_key = f"emd_models/{model_id}-{model_tag}.zip"
s3.upload_fileobj(zip_buffer, bucket_name, s3_key)
extra_params["model_params"] = extra_params.get("model_params", {})
extra_params["model_params"]["custom_dockerfile_path"] = f"s3://{bucket_name}/{s3_key}"
logger.info(f"extra_params: {extra_params}")
else:
model = Model.get_model(model_id)

# check instance,service,engine
supported_instances = model.supported_instance_types
assert (
instance_type in supported_instances
), f"Instance type {instance_type} is not supported for model {model_id}"

supported_engines = model.supported_engine_types
assert (
engine_type in supported_engines
), f"Engine type {engine_type} is not supported for model {model_id}"

supported_services = model.supported_service_types
assert (
service_type in supported_services
), f"Service type {service_type} is not supported for model {model_id}"

# Start pipeline execution
codepipeline = boto3.client("codepipeline", region_name=region)
Expand Down Expand Up @@ -329,3 +356,15 @@ def deploy_local(
assert (
os.system(pipeline_cmd) == 0
), f"run pipeline cmd failed: {pipeline_cmd}"

def zipped_dockerfile(dockerfile_local_path):
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'a', zipfile.ZIP_DEFLATED) as zipf:
dockerfile_dir = os.path.dirname(dockerfile_local_path)
for root, dirs, files in os.walk(dockerfile_dir):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, dockerfile_dir)
zipf.write(file_path, arcname)
zip_buffer.seek(0)
return zip_buffer
Loading
Loading