Skip to content

Commit 4045ba8

Browse files
authored
Disable prefix caching when serving a VLM model (InternLM#3990)
* warn VLM deployment not support prefix caching * Remove the restriction on NumPy version * fix according to reviewer comments
1 parent a96391b commit 4045ba8

File tree

7 files changed

+11
-13
lines changed

7 files changed

+11
-13
lines changed

lmdeploy/api.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,11 @@ def pipeline(model_path: str,
6868
if backend_config is not None else None
6969
model_path = get_model(model_path, download_dir, revision)
7070

71-
task, pipeline_class = get_task(model_path)
72-
if task == 'vlm':
73-
if backend_config and backend_config.enable_prefix_caching:
74-
backend_config.enable_prefix_caching = False
75-
logger.warning('VLM does not support prefix caching.')
76-
77-
if type(backend_config) is not PytorchEngineConfig:
71+
_, pipeline_class = get_task(model_path)
72+
if not isinstance(backend_config, PytorchEngineConfig):
7873
# set auto backend mode
7974
backend_config = autoget_backend_config(model_path, backend_config)
80-
backend = 'pytorch' if type(backend_config) is PytorchEngineConfig else 'turbomind'
75+
backend = 'pytorch' if isinstance(backend_config, PytorchEngineConfig) else 'turbomind'
8176
logger.info(f'Using {backend} engine')
8277

8378
return pipeline_class(model_path,

lmdeploy/serve/vl_async_engine.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ def __init__(self,
2828
**kwargs) -> None:
2929
if backend == 'pytorch':
3030
try_import_deeplink(backend_config.device_type)
31+
if backend_config and backend_config.enable_prefix_caching:
32+
backend_config.enable_prefix_caching = False
33+
logger.warning('Prefix caching is disabled since LMDeploy hasn\'t support in on VL models yet')
3134
self.vl_encoder = ImageEncoder(model_path, backend, vision_config, backend_config=backend_config)
3235
super().__init__(model_path, backend=backend, backend_config=backend_config, **kwargs)
3336
if self.model_name == 'base':

requirements/runtime_ascend.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ einops
44
fastapi
55
fire
66
mmengine-lite
7-
numpy<2.0.0
7+
numpy
88
openai
99
outlines<0.1.0
1010
partial_json_parser

requirements/runtime_camb.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ einops
33
fastapi
44
fire
55
mmengine-lite
6-
numpy<2.0.0
6+
numpy
77
openai
88
outlines<0.1.0
99
partial_json_parser

requirements/runtime_cuda.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ einops
44
fastapi
55
fire
66
mmengine-lite
7-
numpy<2.0.0
7+
numpy
88
openai
99
outlines
1010
partial_json_parser

requirements/runtime_maca.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ einops
33
fastapi
44
fire
55
mmengine-lite
6-
numpy<2.0.0
6+
numpy
77
openai
88
outlines<0.1.0
99
partial_json_parser

requirements/runtime_rocm.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ einops
33
fastapi
44
fire
55
mmengine-lite
6-
numpy<2.0.0
6+
numpy
77
openai
88
outlines
99
partial_json_parser

0 commit comments

Comments
 (0)