Skip to content

Commit 06e2de2

Browse files
authored
Merge branch 'main' into batch_invariant_b200
2 parents 6ad960b + 1fb4217 commit 06e2de2

File tree

10 files changed

+667
-330
lines changed

10 files changed

+667
-330
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,6 @@ csrc/moe/marlin_moe_wna16/kernel_*
221221

222222
# Ignore ep_kernels_workspace folder
223223
ep_kernels_workspace/
224+
225+
# Allow tracked library source folders under submodules (e.g., benchmarks/lib)
226+
!vllm/benchmarks/lib/

vllm/config/model.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,6 @@ class ModelConfig:
168168
"""The specific revision to use for the model code on the Hugging Face Hub.
169169
It can be a branch name, a tag name, or a commit id. If unspecified, will
170170
use the default version."""
171-
rope_scaling: dict[str, Any] = field(default_factory=dict)
172-
"""RoPE scaling configuration. For example,
173-
`{"rope_type":"dynamic","factor":2.0}`."""
174-
rope_theta: float | None = None
175-
"""RoPE theta. Use with `rope_scaling`. In some cases, changing the RoPE
176-
theta improves the performance of the scaled model."""
177171
tokenizer_revision: str | None = None
178172
"""The specific revision to use for the tokenizer on the Hugging Face Hub.
179173
It can be a branch name, a tag name, or a commit id. If unspecified, will
@@ -338,8 +332,6 @@ def compute_hash(self) -> str:
338332
factors.append(self.generation_config)
339333
factors.append(self.model_impl)
340334
factors.append(self.override_generation_config)
341-
factors.append(self.rope_scaling)
342-
factors.append(self.rope_theta)
343335
factors.append(self.video_pruning_rate)
344336
factors.append(self.enable_prompt_embeds)
345337

@@ -481,25 +473,6 @@ def __post_init__(
481473
hf_overrides_kw[key] = value
482474
hf_overrides_fn = None
483475

484-
if self.rope_scaling:
485-
hf_override: dict[str, Any] = {"rope_scaling": self.rope_scaling}
486-
hf_overrides_kw.update(hf_override)
487-
hf_overrides_str = json.dumps(hf_overrides_kw)
488-
msg = (
489-
"`--rope-scaling` will be removed in a future release. "
490-
f"'Please instead use `--hf-overrides '{hf_overrides_str}'`"
491-
)
492-
warnings.warn(DeprecationWarning(msg), stacklevel=2)
493-
if self.rope_theta is not None:
494-
hf_override = {"rope_theta": self.rope_theta}
495-
hf_overrides_kw.update(hf_override)
496-
hf_overrides_str = json.dumps(hf_overrides_kw)
497-
msg = (
498-
"`--rope-theta` will be removed in a future release. "
499-
f"'Please instead use `--hf-overrides '{hf_overrides_str}'`"
500-
)
501-
warnings.warn(DeprecationWarning(msg), stacklevel=2)
502-
503476
self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer)
504477

505478
if (

vllm/engine/arg_utils.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -438,8 +438,6 @@ class EngineArgs:
438438
aggregate_engine_logging: bool = False
439439
revision: str | None = ModelConfig.revision
440440
code_revision: str | None = ModelConfig.code_revision
441-
rope_scaling: dict[str, Any] = get_field(ModelConfig, "rope_scaling")
442-
rope_theta: float | None = ModelConfig.rope_theta
443441
hf_token: bool | str | None = ModelConfig.hf_token
444442
hf_overrides: HfOverrides = get_field(ModelConfig, "hf_overrides")
445443
tokenizer_revision: str | None = ModelConfig.tokenizer_revision
@@ -617,8 +615,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
617615
)
618616
model_group.add_argument("--revision", **model_kwargs["revision"])
619617
model_group.add_argument("--code-revision", **model_kwargs["code_revision"])
620-
model_group.add_argument("--rope-scaling", **model_kwargs["rope_scaling"])
621-
model_group.add_argument("--rope-theta", **model_kwargs["rope_theta"])
622618
model_group.add_argument(
623619
"--tokenizer-revision", **model_kwargs["tokenizer_revision"]
624620
)
@@ -1184,8 +1180,6 @@ def create_model_config(self) -> ModelConfig:
11841180
seed=self.seed,
11851181
revision=self.revision,
11861182
code_revision=self.code_revision,
1187-
rope_scaling=self.rope_scaling,
1188-
rope_theta=self.rope_theta,
11891183
hf_token=self.hf_token,
11901184
hf_overrides=self.hf_overrides,
11911185
tokenizer_revision=self.tokenizer_revision,

vllm/entrypoints/chat_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,12 @@
4343
# pydantic needs the TypedDict from typing_extensions
4444
from typing_extensions import Required, TypedDict
4545

46+
from vllm import envs
4647
from vllm.config import ModelConfig
4748
from vllm.logger import init_logger
4849
from vllm.model_executor.models import SupportsMultiModal
4950
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict, MultiModalUUIDDict
50-
from vllm.multimodal.utils import MediaConnector
51+
from vllm.multimodal.utils import MEDIA_CONNECTOR_REGISTRY, MediaConnector
5152
from vllm.transformers_utils.chat_templates import get_chat_template_fallback_path
5253
from vllm.transformers_utils.processor import cached_get_processor
5354
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
@@ -806,7 +807,9 @@ def __init__(self, tracker: MultiModalItemTracker) -> None:
806807
self._tracker = tracker
807808
multimodal_config = self._tracker.model_config.multimodal_config
808809
media_io_kwargs = getattr(multimodal_config, "media_io_kwargs", None)
809-
self._connector = MediaConnector(
810+
811+
self._connector: MediaConnector = MEDIA_CONNECTOR_REGISTRY.load(
812+
envs.VLLM_MEDIA_CONNECTOR,
810813
media_io_kwargs=media_io_kwargs,
811814
allowed_local_media_path=tracker.allowed_local_media_path,
812815
allowed_media_domains=tracker.allowed_media_domains,
@@ -891,7 +894,8 @@ def __init__(self, tracker: AsyncMultiModalItemTracker) -> None:
891894
self._tracker = tracker
892895
multimodal_config = self._tracker.model_config.multimodal_config
893896
media_io_kwargs = getattr(multimodal_config, "media_io_kwargs", None)
894-
self._connector = MediaConnector(
897+
self._connector: MediaConnector = MEDIA_CONNECTOR_REGISTRY.load(
898+
envs.VLLM_MEDIA_CONNECTOR,
895899
media_io_kwargs=media_io_kwargs,
896900
allowed_local_media_path=tracker.allowed_local_media_path,
897901
allowed_media_domains=tracker.allowed_media_domains,

vllm/envs.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
7171
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
7272
VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
73+
VLLM_MEDIA_CONNECTOR: str = "http"
7374
VLLM_MM_INPUT_CACHE_GIB: int = 4
7475
VLLM_TARGET_DEVICE: str = "cuda"
7576
VLLM_MAIN_CUDA_VERSION: str = "12.8"
@@ -738,6 +739,14 @@ def get_vllm_port() -> int | None:
738739
"VLLM_VIDEO_LOADER_BACKEND": lambda: os.getenv(
739740
"VLLM_VIDEO_LOADER_BACKEND", "opencv"
740741
),
742+
# Media connector implementation.
743+
# - "http": Default connector that supports fetching media via HTTP.
744+
#
745+
# Custom implementations can be registered
746+
# via `@MEDIA_CONNECTOR_REGISTRY.register("my_custom_media_connector")` and
747+
# imported at runtime.
748+
# If a non-existing backend is used, an AssertionError will be thrown.
749+
"VLLM_MEDIA_CONNECTOR": lambda: os.getenv("VLLM_MEDIA_CONNECTOR", "http"),
741750
# [DEPRECATED] Cache size (in GiB per process) for multimodal input cache
742751
# Default is 4 GiB per API process + 4 GiB per engine core process
743752
"VLLM_MM_INPUT_CACHE_GIB": lambda: int(os.getenv("VLLM_MM_INPUT_CACHE_GIB", "4")),

vllm/multimodal/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from vllm.connections import HTTPConnection, global_http_connection
2121
from vllm.logger import init_logger
2222
from vllm.utils.jsontree import json_map_leaves
23+
from vllm.utils.registry import ExtensionManager
2324

2425
from .audio import AudioMediaIO
2526
from .base import MediaIO
@@ -46,7 +47,10 @@
4647

4748
_M = TypeVar("_M")
4849

50+
MEDIA_CONNECTOR_REGISTRY = ExtensionManager()
4951

52+
53+
@MEDIA_CONNECTOR_REGISTRY.register("http")
5054
class MediaConnector:
5155
def __init__(
5256
self,

vllm/multimodal/video.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from vllm import envs
1616
from vllm.logger import init_logger
17+
from vllm.utils.registry import ExtensionManager
1718

1819
from .base import MediaIO
1920
from .image import ImageMediaIO
@@ -63,25 +64,7 @@ def load_bytes(
6364
raise NotImplementedError
6465

6566

66-
class VideoLoaderRegistry:
67-
def __init__(self) -> None:
68-
self.name2class: dict[str, type] = {}
69-
70-
def register(self, name: str):
71-
def wrap(cls_to_register):
72-
self.name2class[name] = cls_to_register
73-
return cls_to_register
74-
75-
return wrap
76-
77-
@staticmethod
78-
def load(cls_name: str) -> VideoLoader:
79-
cls = VIDEO_LOADER_REGISTRY.name2class.get(cls_name)
80-
assert cls is not None, f"VideoLoader class {cls_name} not found"
81-
return cls()
82-
83-
84-
VIDEO_LOADER_REGISTRY = VideoLoaderRegistry()
67+
VIDEO_LOADER_REGISTRY = ExtensionManager()
8568

8669

8770
@VIDEO_LOADER_REGISTRY.register("opencv")

vllm/utils/registry.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
from typing import Any
4+
5+
6+
class ExtensionManager:
7+
"""
8+
A registry for managing pluggable extension classes.
9+
10+
This class provides a simple mechanism to register and instantiate
11+
extension classes by name. It is commonly used to implement plugin
12+
systems where different implementations can be swapped at runtime.
13+
14+
Examples:
15+
Basic usage with a registry instance:
16+
17+
>>> FOO_REGISTRY = ExtensionManager()
18+
>>> @FOO_REGISTRY.register("my_foo_impl")
19+
... class MyFooImpl(Foo):
20+
... def __init__(self, value):
21+
... self.value = value
22+
>>> foo_impl = FOO_REGISTRY.load("my_foo_impl", value=123)
23+
24+
"""
25+
26+
def __init__(self) -> None:
27+
"""
28+
Initialize an empty extension registry.
29+
"""
30+
self.name2class: dict[str, type] = {}
31+
32+
def register(self, name: str):
33+
"""
34+
Decorator to register a class with the given name.
35+
"""
36+
37+
def wrap(cls_to_register):
38+
self.name2class[name] = cls_to_register
39+
return cls_to_register
40+
41+
return wrap
42+
43+
def load(self, cls_name: str, *args, **kwargs) -> Any:
44+
"""
45+
Instantiate and return a registered extension class by name.
46+
"""
47+
cls = self.name2class.get(cls_name)
48+
assert cls is not None, f"Extension class {cls_name} not found"
49+
return cls(*args, **kwargs)

0 commit comments

Comments
 (0)