merge

11zhouxuan · 11zhouxuan · commit 0a5e2f3dbf24 · 2025-08-18T05:24:39.000Z
diff --git a/src/emd/commands/deploy.py b/src/emd/commands/deploy.py
@@ -253,8 +253,9 @@ def deploy(
     else:
         region = get_current_region()
 
-    if region != LOCAL_REGION:
-        smart_bootstrap_manager.auto_bootstrap_if_needed(region)
+    # Only bootstrap for non-local deployments
+    if region != LOCAL_REGION and not only_allow_local_deploy:
+        smart_bootstrap_manager.auto_bootstrap_if_needed(region, skip_confirm)
 
     if dockerfile_local_path:
         response = sdk_deploy(
@@ -417,7 +418,7 @@ def deploy(
                 support_gpu_num = support_gpu_num or gpu_num
                 default_gpus_str = ",".join([str(i) for i in range(min(gpu_num,support_gpu_num))])
                 gpus_to_deploy = questionary.text(
-                        "input the local gpu ids to deploy the model (e.g. 0,1,2):",
+                        "Please specify the local GPU IDs for model deployment (e.g., 0,1,2):",
                         default=f"{default_gpus_str}"
                     ).ask()
                 os.environ['CUDA_VISIBLE_DEVICES']=gpus_to_deploy
diff --git a/src/emd/models/engines.py b/src/emd/models/engines.py
@@ -558,6 +558,16 @@ class KtransformersEngine(OpenAICompitableEngine):
             "default_cli_args": " --max_new_tokens 2048",
 })
 
+# VLLM Engine v0.9.1 for dots.ocr
+vllm_dots_ocr_engine091 = VllmEngine(**{
+    **vllm_engine064.model_dump(),
+    "engine_dockerfile_config": {"VERSION":"v0.9.1"},
+    "dockerfile_name": "Dockerfile_dots_ocr",
+    "environment_variables": "export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True",
+    "default_cli_args": " --trust-remote-code --chat-template-content-format string --gpu-memory-utilization 0.95 --max_model_len 8192 --disable-log-stats --max_num_seq 5 --enforce-eager",
+    "description": "VLLM v0.9.1 engine for dots.ocr multilingual document parsing model with flash-attn support and eager execution for custom models"
+})
+
 custom_engine = Engine(**{
             "engine_type":EngineType.CUSTOM,
 })
diff --git a/src/emd/models/model_series.py b/src/emd/models/model_series.py
@@ -163,3 +163,9 @@
     description="GPT-OSS (GPT Open Source Software) is OpenAI's initiative to provide open-source AI models, making advanced language models accessible to developers, researchers, and organizations for building, experimenting, and scaling generative AI applications. These models are designed to foster innovation and collaboration in the open-source AI community.",
     reference_link="https://openai.com/index/introducing-gpt-oss/"
 )
+
+DOTS_OCR_SERIES = ModelSeries(
+    model_series_name=ModelSeriesType.DOTS_OCR,
+    description="dots.ocr is a powerful, multilingual document parser that unifies layout detection and content recognition within a single vision-language model while maintaining good reading order. Despite its compact 1.7B-parameter LLM foundation, it achieves state-of-the-art(SOTA) performance on text, tables, and reading order tasks with multilingual support for over 100 languages.",
+    reference_link="https://github.com/rednote-hilab/dots.ocr"
+)
diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py
@@ -236,3 +236,4 @@ class ModelSeriesType(ConstantBase):
     DEEPSEEK_v3 = "deepseek v3"
     BAICHUAN = "baichuan"
     GPTOSS = "gptoss"
+    DOTS_OCR = "dots_ocr"
diff --git a/src/emd/models/vlms/__init__.py b/src/emd/models/vlms/__init__.py
@@ -2,3 +2,4 @@
 from . import internvl
 from . import gemma3
 from . import mistral
+from . import dots_ocr
diff --git a/src/emd/models/vlms/dots_ocr.py b/src/emd/models/vlms/dots_ocr.py
@@ -0,0 +1,42 @@
+from .. import Model
+from ..model_series import DOTS_OCR_SERIES
+from ..engines import vllm_dots_ocr_engine091, huggingface_llm_engine_4d41d2
+from ..instances import (
+    g5dxlarge_instance,
+    g5d2xlarge_instance,
+    g5d4xlarge_instance,
+    g5d8xlarge_instance,
+    local_instance
+)
+from ..services import (
+    sagemaker_service,
+    sagemaker_async_service,
+    ecs_service,
+    local_service
+)
+from ..frameworks import fastapi_framework
+from emd.models.utils.constants import ModelType
+
+Model.register(
+    dict(
+        model_id="dotsocr",
+        model_type=ModelType.VLM,
+        description="dots.ocr is a powerful, multilingual document parser that unifies layout detection and content recognition within a single vision-language model. Built on a compact 1.7B-parameter LLM foundation, it achieves state-of-the-art performance on text, tables, and reading order tasks with support for over 100 languages including English, Chinese, and many others.",
+        application_scenario="multilingual document layout parsing, OCR, document understanding, table extraction, formula recognition, reading order detection",
+        supported_engines=[vllm_dots_ocr_engine091],
+        supported_instances=[
+            g5dxlarge_instance, g5d2xlarge_instance, g5d4xlarge_instance, g5d8xlarge_instance, local_instance
+        ],
+        supported_services=[
+            sagemaker_service, sagemaker_async_service, ecs_service, local_service
+        ],
+        supported_frameworks=[
+            fastapi_framework
+        ],
+        allow_china_region=True,
+        huggingface_model_id="rednote-hilab/dots.ocr",
+        modelscope_model_id="rednote-hilab/dots.ocr",
+        require_huggingface_token=False,
+        model_series=DOTS_OCR_SERIES,
+    )
+)
diff --git a/src/emd/utils/smart_bootstrap.py b/src/emd/utils/smart_bootstrap.py
@@ -121,7 +121,7 @@ def show_version_mismatch_warning(self, current_version: str, deployed_version:
         self.console.print()  # Empty line for spacing
 
 
-    def auto_bootstrap_if_needed(self, region: str) -> bool:
+    def auto_bootstrap_if_needed(self, region: str, skip_confirm: bool = False) -> bool:
         """
         Automatically run bootstrap if needed based on comprehensive infrastructure check
         Returns: True if bootstrap was run, False otherwise
@@ -145,18 +145,19 @@ def auto_bootstrap_if_needed(self, region: str) -> bool:
             # Infrastructure missing/incomplete OR version mismatch - ask for confirmation
             self.show_bootstrap_notification(current_version, deployed_version)
 
-            # Ask for user confirmation
-            if deployed_version:
-                # Update scenario
-                confirm_msg = f"Update infrastructure from {deployed_version} to {current_version}?"
-            else:
-                # Initialize scenario
-                confirm_msg = f"Initialize EMD infrastructure for version {current_version}?"
-
-            if not typer.confirm(confirm_msg, default=False):
-                self.console.print("[yellow]Bootstrap cancelled. Infrastructure will not be updated.[/yellow]")
-                self.console.print("[red]Deployment cannot proceed without compatible infrastructure.[/red]")
-                raise typer.Exit(1)
+            # Ask for user confirmation unless skip_confirm is True
+            if not skip_confirm:
+                if deployed_version:
+                    # Update scenario
+                    confirm_msg = f"Update infrastructure from {deployed_version} to {current_version}?"
+                else:
+                    # Initialize scenario
+                    confirm_msg = f"Initialize EMD infrastructure for version {current_version}?"
+
+                if not typer.confirm(confirm_msg, default=False):
+                    self.console.print("[yellow]Bootstrap cancelled. Infrastructure will not be updated.[/yellow]")
+                    self.console.print("[red]Deployment cannot proceed without compatible infrastructure.[/red]")
+                    raise typer.Exit(1)
 
             # User confirmed - proceed with bootstrap
             try:
diff --git a/src/pipeline/backend/comfyui/build_and_push_image.sh b/src/pipeline/backend/comfyui/build_and_push_image.sh
@@ -49,6 +49,9 @@ function build_and_push_image() {
     aws ecr get-login-password --region "${region}" | \
 	docker login --username AWS --password-stdin "${ecr_repo_uri}"
 
+    # Update ECR policy file with current account ID
+    sed -i "s/{{ACCOUNT_ID}}/${account}/g" "${policy_file}"
+
     aws ecr set-repository-policy \
         --repository-name "${image_name}" \
         --policy-text "file://${policy_file}" \
diff --git a/src/pipeline/backend/comfyui/ecr-policy.json b/src/pipeline/backend/comfyui/ecr-policy.json
@@ -1,20 +1,31 @@
 {
     "Version": "2008-10-17",
     "Statement": [
-      {
-        "Sid": "new statement",
-        "Effect": "Allow",
-        "Principal": "*",
-        "Action": [
-          "ecr: CompleteLayerUpload",
-          "ecr: InitiateLayerUpload",
-          "ecr: ListImages",
-          "ecr:BatchCheckLayerAvailability",
-          "ecr:BatchGetImage",
-          "ecr:DescribeImages",
-          "ecr:DescribeRepositories",
-          "ecr:GetDownloadUrlForLayer"
-        ]
-      }
+        {
+            "Sid": "AllowAccountUserAccess",
+            "Effect": "Allow",
+            "Principal": {
+                "AWS": "arn:aws:iam::{{ACCOUNT_ID}}:root"
+            },
+            "Action": [
+                "ecr:BatchCheckLayerAvailability",
+                "ecr:BatchGetImage",
+                "ecr:GetDownloadUrlForLayer",
+                "ecr:DescribeImages",
+                "ecr:DescribeRepositories"
+            ]
+        },
+        {
+            "Sid": "AllowSageMakerService",
+            "Effect": "Allow",
+            "Principal": {
+                "Service": "sagemaker.amazonaws.com"
+            },
+            "Action": [
+                "ecr:BatchCheckLayerAvailability",
+                "ecr:BatchGetImage",
+                "ecr:GetDownloadUrlForLayer"
+            ]
+        }
     ]
 }
diff --git a/src/pipeline/backend/vllm/Dockerfile_dots_ocr b/src/pipeline/backend/vllm/Dockerfile_dots_ocr
@@ -0,0 +1,18 @@
+FROM public.ecr.aws/aws-gcr-solutions/dmaa-vllm/vllm-openai:{{VERSION}} AS vllm-base
+
+WORKDIR /opt/ml/code
+
+COPY ./backend/vllm/requirements_dots_ocr.txt /opt/ml/code/
+
+RUN python3 -m pip install -r /opt/ml/code/requirements_dots_ocr.txt
+
+ENV PYTHONPATH="./emd_models:${PYTHONPATH}"
+
+# see https://github.com/rednote-hilab/dots.ocr/blob/master/README.md#vllm-inference
+RUN sed -i '/^from vllm\.entrypoints\.cli\.main import main$/a\
+import dotsocr.modeling_dots_ocr_vllm' `which vllm`
+
+EXPOSE 8080
+WORKDIR /opt/ml/code
+
+ENTRYPOINT ["/usr/bin/serve"]
diff --git a/src/pipeline/backend/vllm/requirements_dots_ocr.txt b/src/pipeline/backend/vllm/requirements_dots_ocr.txt
@@ -0,0 +1,10 @@
+gradio
+gradio_image_annotation
+PyMuPDF
+openai
+qwen_vl_utils
+transformers==4.51.3
+huggingface_hub
+modelscope
+flash-attn==2.8.0.post2
+accelerate
diff --git a/src/pipeline/deploy/build_and_push_image.py b/src/pipeline/deploy/build_and_push_image.py
@@ -262,10 +262,10 @@ def run(
                 is_build_success = True
                 break
             except Exception as e:
-                logger.error(f"docker build errorr: {e}")
+                logger.error(f"Docker build failed: {e}")
 
         if not is_build_success:
-            raise RuntimeError("docker build errorr")
+            raise RuntimeError("Docker build failed")
 
 
         # build_image_script = (
@@ -299,19 +299,30 @@ def run(
             "Version": "2008-10-17",
             "Statement": [
                 {
-                    "Sid": "new statement",
+                    "Sid": "AllowAccountUserAccess",
                     "Effect": "Allow",
-                    "Principal": "*",
+                    "Principal": {
+                        "AWS": f"arn:aws:iam::{push_image_account_id}:root"
+                    },
                     "Action": [
-                        "ecr: CompleteLayerUpload",
-                        "ecr: InitiateLayerUpload",
-                        "ecr: ListImages",
                         "ecr:BatchCheckLayerAvailability",
                         "ecr:BatchGetImage",
-                        "ecr:DescribeImages",
-                        "ecr:DescribeRepositories",
                         "ecr:GetDownloadUrlForLayer",
-                    ],
+                        "ecr:DescribeImages",
+                        "ecr:DescribeRepositories"
+                    ]
+                },
+                {
+                    "Sid": "AllowSageMakerService",
+                    "Effect": "Allow",
+                    "Principal": {
+                        "Service": "sagemaker.amazonaws.com"
+                    },
+                    "Action": [
+                        "ecr:BatchCheckLayerAvailability",
+                        "ecr:BatchGetImage",
+                        "ecr:GetDownloadUrlForLayer"
+                    ]
                 }
             ],
         }
@@ -437,10 +448,10 @@ def run_custom(
                 is_build_success = True
                 break
             except Exception as e:
-                logger.error(f"docker build errorr: {e}")
+                logger.error(f"Docker build failed: {e}")
 
         if not is_build_success:
-            raise RuntimeError("docker build errorr")
+            raise RuntimeError("Docker build failed")
 
 
         # build_image_script = (
@@ -474,19 +485,30 @@ def run_custom(
             "Version": "2008-10-17",
             "Statement": [
                 {
-                    "Sid": "new statement",
+                    "Sid": "AllowAccountUserAccess",
                     "Effect": "Allow",
-                    "Principal": "*",
+                    "Principal": {
+                        "AWS": f"arn:aws:iam::{push_image_account_id}:root"
+                    },
                     "Action": [
-                        "ecr: CompleteLayerUpload",
-                        "ecr: InitiateLayerUpload",
-                        "ecr: ListImages",
                         "ecr:BatchCheckLayerAvailability",
                         "ecr:BatchGetImage",
-                        "ecr:DescribeImages",
-                        "ecr:DescribeRepositories",
                         "ecr:GetDownloadUrlForLayer",
-                    ],
+                        "ecr:DescribeImages",
+                        "ecr:DescribeRepositories"
+                    ]
+                },
+                {
+                    "Sid": "AllowSageMakerService",
+                    "Effect": "Allow",
+                    "Principal": {
+                        "Service": "sagemaker.amazonaws.com"
+                    },
+                    "Action": [
+                        "ecr:BatchCheckLayerAvailability",
+                        "ecr:BatchGetImage",
+                        "ecr:GetDownloadUrlForLayer"
+                    ]
                 }
             ],
         }
diff --git a/src/pipeline/pipeline.py b/src/pipeline/pipeline.py