Skip to content
This repository was archived by the owner on Sep 20, 2025. It is now read-only.

Commit 064952c

Browse files
authored
feat: optimize local deploy (#87)
* merge * merge * add ReaderLM-v2 * add embedding and rerank models provided by jinaai * modify custom docker model
1 parent 981f9ad commit 064952c

26 files changed

+611
-107
lines changed

docs/en/best_deployment_practices.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,12 @@ emd deploy --model-id Qwen2.5-14B-Instruct-AWQ --instance-type g4dn.2xlarge --en
5959
}'
6060
```
6161

62-
## Common Troubleshooting
62+
## Environmental variables
63+
- `LOCAL_DEPLOY_PORT: ` Local deployment port, default: `8080`
6364

64-
If your deployment fails due to out-of-memory issues, try:
65+
## Common Troubleshooting
6566

66-
- Using a larger instance type
67-
- Reducing max_model_len and max_num_seqs in the engine parameters
68-
- Setting a lower gpu_memory_utilization value (e.g., 0.8 instead of the default)
67+
- If your deployment fails due to out-of-memory issues, try:
68+
- Using a larger instance type
69+
- Reducing max_model_len and max_num_seqs in the engine parameters
70+
- Setting a lower gpu_memory_utilization value (e.g., 0.8 instead of the default)

docs/en/supported_models.md

Lines changed: 55 additions & 47 deletions
Large diffs are not rendered by default.

src/emd/models/custom/custom_docker.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
local_instance
1414
)
1515
from ..engines import custom_engine
16+
from ..utils.constants import CUSTOM_DOCKER_MODEL_ID
1617

1718
Model.register(
1819
dict(
19-
model_id = "custom-docker",
20+
model_id = CUSTOM_DOCKER_MODEL_ID,
2021
supported_engines=[custom_engine],
2122
supported_instances=[
2223
g5dxlarge_instance,
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
from .bert_embedding import *
1+
from . import bert_embedding
2+
from . import jina

src/emd/models/embeddings/bert_embedding.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121

2222

23-
2423
Model.register(
2524
dict(
2625
model_id = "bce-embedding-base_v1",

src/emd/models/embeddings/jina.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from .. import Model
2+
from ..engines import huggingface_embedding_engine449
3+
from ..services import sagemaker_service,local_service,ecs_service
4+
from ..frameworks import fastapi_framework
5+
from ..instances import (
6+
g4dn2xlarge_instance,
7+
g5dxlarge_instance,
8+
g5d2xlarge_instance,
9+
g5d4xlarge_instance,
10+
g5d8xlarge_instance,
11+
g5d12xlarge_instance,
12+
g5d16xlarge_instance,
13+
g5d24xlarge_instance,
14+
g5d48xlarge_instance,
15+
local_instance
16+
)
17+
from emd.models.utils.constants import ModelType
18+
from emd.models import ModelSeries
19+
from ..model_series import JINA_SERIES
20+
21+
22+
23+
Model.register(
24+
dict(
25+
model_id = "jina-embeddings-v3",
26+
supported_engines=[huggingface_embedding_engine449],
27+
supported_instances=[
28+
# g4dn2xlarge_instance,
29+
g5dxlarge_instance,
30+
g5d2xlarge_instance,
31+
g5d4xlarge_instance,
32+
g5d8xlarge_instance,
33+
# g5d12xlarge_instance,
34+
g5d16xlarge_instance,
35+
local_instance,
36+
# g5d24xlarge_instance,
37+
# g5d48xlarge_instance,
38+
],
39+
supported_services=[
40+
sagemaker_service,
41+
ecs_service,
42+
local_service
43+
],
44+
supported_frameworks=[
45+
fastapi_framework
46+
],
47+
allow_china_region=True,
48+
huggingface_model_id="jinaai/jina-embeddings-v3",
49+
modelscope_model_id="jinaai/jina-embeddings-v3",
50+
require_huggingface_token=False,
51+
application_scenario="RAG",
52+
model_type=ModelType.EMBEDDING,
53+
model_series=JINA_SERIES
54+
)
55+
)

src/emd/models/engines.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,27 @@ class KtransformersEngine(OpenAICompitableEngine):
377377
"pretrained_tokenizer_init_kwargs":{"trust_remote_code":True}
378378
})
379379

380+
huggingface_embedding_engine449 = HuggingFaceLLMEngine(**{
381+
"engine_type":EngineType.HUGGINGFACE,
382+
"engine_cls":"huggingface.embedding.transformers_embedding_backend.TransformerEmbeddingBackend",
383+
"python_name":"python3",
384+
"base_image_host":"public.ecr.aws",
385+
"use_public_ecr":True,
386+
"docker_login_region":"us-east-1",
387+
"engine_dockerfile_config": {"VERSION":"4.49.0"},
388+
"pretrained_model_init_kwargs":{"trust_remote_code":True,"torch_dtype":"float16"},
389+
})
390+
391+
huggingface_rerank_engine449 = HuggingFaceLLMEngine(**{
392+
"engine_type":EngineType.HUGGINGFACE,
393+
"engine_cls":"huggingface.rerank.transformers_rerank_backend.TransformerRerankBackend",
394+
"python_name":"python3",
395+
"base_image_host":"public.ecr.aws",
396+
"use_public_ecr":True,
397+
"docker_login_region":"us-east-1",
398+
"engine_dockerfile_config": {"VERSION":"4.49.0"},
399+
"pretrained_model_init_kwargs":{"trust_remote_code":True,"torch_dtype":"float16"},
400+
})
380401

381402
comfyui_engine = ComfyuiEngine(**{
382403
"engine_type":EngineType.COMFYUI,

src/emd/models/llms/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@
44
qwen,
55
llama,
66
deepseek,
7-
baichuan
7+
baichuan,
8+
jina
89
)

src/emd/models/llms/jina.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from .. import Model
2+
from ..engines import (
3+
vllm_qwen2d5_engine064,
4+
vllm_qwen2d5_128k_engine064,
5+
huggingface_llm_engine_4d41d2,
6+
tgi_qwen2d5_72b_engine064,
7+
tgi_qwen2d5_on_inf2,
8+
tgi_qwen2d5_72b_on_inf2,
9+
vllm_qwen2d5_72b_engine064,
10+
vllm_qwq_engine073
11+
)
12+
from ..services import (
13+
sagemaker_service,
14+
sagemaker_async_service,
15+
ecs_service,
16+
local_service
17+
)
18+
from ..frameworks import fastapi_framework
19+
from ..instances import (
20+
g5d2xlarge_instance,
21+
g5d4xlarge_instance,
22+
g5d8xlarge_instance,
23+
g5d12xlarge_instance,
24+
g5d16xlarge_instance,
25+
g5d24xlarge_instance,
26+
g5d48xlarge_instance,
27+
g4dn2xlarge_instance,
28+
g6e2xlarge_instance,
29+
inf2d8xlarge_instance,
30+
inf2d24xlarge_instance,
31+
local_instance
32+
)
33+
from emd.models.utils.constants import ModelType
34+
from emd.models.utils.constants import ModelType
35+
from emd.models import ModelSeries
36+
from ..model_series import JINA_SERIES
37+
38+
39+
Model.register(
40+
dict(
41+
model_id = "ReaderLM-v2",
42+
supported_engines=[
43+
vllm_qwen2d5_engine064,
44+
tgi_qwen2d5_on_inf2
45+
],
46+
supported_instances=[
47+
g4dn2xlarge_instance,
48+
g5d2xlarge_instance,
49+
g5d4xlarge_instance,
50+
g5d8xlarge_instance,
51+
# g5d12xlarge_instance,
52+
g5d16xlarge_instance,
53+
# g5d24xlarge_instance,
54+
# g5d48xlarge_instance,
55+
inf2d8xlarge_instance,
56+
local_instance
57+
],
58+
supported_services=[
59+
sagemaker_service,
60+
sagemaker_async_service,
61+
ecs_service,
62+
local_service
63+
],
64+
supported_frameworks=[
65+
fastapi_framework
66+
],
67+
allow_china_region=True,
68+
huggingface_model_id="jinaai/ReaderLM-v2",
69+
modelscope_model_id="jinaai/ReaderLM-v2",
70+
require_huggingface_token=False,
71+
application_scenario="Html information extraction",
72+
description="ReaderLM-v2 is a 1.5B parameter language model that converts raw HTML into beautifully formatted markdown or JSON with superior accuracy and improved longer context handling. Supporting multiple languages (29 in total), ReaderLM-v2 is specialized for tasks involving HTML parsing, transformation, and text extraction.",
73+
model_type=ModelType.LLM,
74+
model_series=JINA_SERIES,
75+
)
76+
)

src/emd/models/model_series.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@
4040
reference_link="https://huggingface.co/maidalun1020/bce-embedding-base_v1"
4141
)
4242

43+
JINA_SERIES = ModelSeries(
44+
model_series_name=ModelSeriesType.JINA,
45+
description="Search foundation models: embeddings, rerankers, small LMs for better search",
46+
reference_link="https://huggingface.co/jinaai"
47+
)
48+
4349

4450
QWEN2VL_SERIES = ModelSeries(
4551
model_series_name=ModelSeriesType.QWEN2VL,

0 commit comments

Comments
 (0)