|
| 1 | +from .. import Model |
| 2 | +from ..model_series import DOTS_OCR_SERIES |
| 3 | +from ..engines import vllm_dots_ocr_engine091, huggingface_llm_engine_4d41d2 |
| 4 | +from ..instances import ( |
| 5 | + g5dxlarge_instance, |
| 6 | + g5d2xlarge_instance, |
| 7 | + g5d4xlarge_instance, |
| 8 | + g5d8xlarge_instance, |
| 9 | + local_instance |
| 10 | +) |
| 11 | +from ..services import ( |
| 12 | + sagemaker_service, |
| 13 | + sagemaker_async_service, |
| 14 | + ecs_service, |
| 15 | + local_service |
| 16 | +) |
| 17 | +from ..frameworks import fastapi_framework |
| 18 | +from emd.models.utils.constants import ModelType |
| 19 | + |
| 20 | +Model.register( |
| 21 | + dict( |
| 22 | + model_id="dotsocr", |
| 23 | + model_type=ModelType.VLM, |
| 24 | + description="dots.ocr is a powerful, multilingual document parser that unifies layout detection and content recognition within a single vision-language model. Built on a compact 1.7B-parameter LLM foundation, it achieves state-of-the-art performance on text, tables, and reading order tasks with support for over 100 languages including English, Chinese, and many others.", |
| 25 | + application_scenario="multilingual document layout parsing, OCR, document understanding, table extraction, formula recognition, reading order detection", |
| 26 | + supported_engines=[vllm_dots_ocr_engine091], |
| 27 | + supported_instances=[ |
| 28 | + g5dxlarge_instance, g5d2xlarge_instance, g5d4xlarge_instance, g5d8xlarge_instance, local_instance |
| 29 | + ], |
| 30 | + supported_services=[ |
| 31 | + sagemaker_service, sagemaker_async_service, ecs_service, local_service |
| 32 | + ], |
| 33 | + supported_frameworks=[ |
| 34 | + fastapi_framework |
| 35 | + ], |
| 36 | + allow_china_region=True, |
| 37 | + huggingface_model_id="rednote-hilab/dots.ocr", |
| 38 | + modelscope_model_id="rednote-hilab/dots.ocr", |
| 39 | + require_huggingface_token=False, |
| 40 | + model_series=DOTS_OCR_SERIES, |
| 41 | + ) |
| 42 | +) |
0 commit comments