From efedbfcc6088a5bd9f2b1c7b0300d43d9a3983ab Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Sat, 25 Oct 2025 22:57:08 -0400 Subject: [PATCH 1/4] add omnivici Signed-off-by: 0xrushi <6279035+0xrushi@users.noreply.github.com> --- .../model_loader/test_bitsandbytes_loader.py | 47 ++++++++++++ tests/model_executor/test_weight_utils.py | 24 ++++++ .../test_tokenizer_llm_subfolder.py | 53 +++++++++++++ .../test_hf_config_parser_subfolder.py | 74 +++++++++++++++++++ .../model_loader/bitsandbytes_loader.py | 56 ++++++++++---- .../model_loader/weight_utils.py | 6 +- vllm/model_executor/models/omnivinci.py | 23 ++++++ vllm/model_executor/models/registry.py | 4 + vllm/transformers_utils/config.py | 40 ++++++++++ vllm/transformers_utils/tokenizer.py | 20 ++++- 10 files changed, 329 insertions(+), 18 deletions(-) create mode 100644 tests/model_executor/model_loader/test_bitsandbytes_loader.py create mode 100644 tests/tokenization/test_tokenizer_llm_subfolder.py create mode 100644 tests/transformers_utils/test_hf_config_parser_subfolder.py create mode 100644 vllm/model_executor/models/omnivinci.py diff --git a/tests/model_executor/model_loader/test_bitsandbytes_loader.py b/tests/model_executor/model_loader/test_bitsandbytes_loader.py new file mode 100644 index 000000000000..0d67f881075d --- /dev/null +++ b/tests/model_executor/model_loader/test_bitsandbytes_loader.py @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import Optional + +from vllm.config.load import LoadConfig +from vllm.model_executor.model_loader.bitsandbytes_loader import ( + BitsAndBytesModelLoader, +) + + +class _DummyBitsAndBytesLoader(BitsAndBytesModelLoader): + """Test helper that bypasses any real HF interactions.""" + + def __init__( + self, load_config: LoadConfig, mock_result: tuple[str, list[str], str] + ): + super().__init__(load_config) + self._mock_result = mock_result + + def _get_weight_files( # type: ignore[override] + self, + model_name_or_path: str, + allowed_patterns: list[str], + revision: Optional[str] = None, + ) -> tuple[str, list[str], str]: + return self._mock_result + + +def test_bitsandbytes_loader_detects_safetensors_from_files(tmp_path): + """Even if the allow-pattern looks like *.bin, safetensors files are detected.""" + + llm_dir = tmp_path / "llm" + llm_dir.mkdir() + safetensor = llm_dir / "model-00001-of-00002.safetensors" + safetensor.write_bytes(b"test") + + load_config = LoadConfig() + loader = _DummyBitsAndBytesLoader( + load_config, + mock_result=(str(tmp_path), [str(safetensor)], "*.bin"), + ) + + files, use_safetensors = loader._prepare_weights(str(tmp_path), revision=None) + + assert use_safetensors is True + assert files == [str(safetensor)] diff --git a/tests/model_executor/test_weight_utils.py b/tests/model_executor/test_weight_utils.py index 6dc120ddbac9..b6e7d022648d 100644 --- a/tests/model_executor/test_weight_utils.py +++ b/tests/model_executor/test_weight_utils.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import json import os import tempfile @@ -11,6 +12,7 @@ from vllm.model_executor.model_loader.weight_utils import ( download_weights_from_hf, enable_hf_transfer, + filter_duplicate_safetensors_files, ) @@ -61,6 +63,28 @@ def test_download_weights_from_hf(): ) +def test_filter_duplicate_safetensors_files_with_subfolder(tmp_path): + llm_dir = tmp_path / "llm" + llm_dir.mkdir() + kept_file = llm_dir / "model-00001-of-00002.safetensors" + kept_file.write_bytes(b"0") + dropped_file = tmp_path / "other.safetensors" + dropped_file.write_bytes(b"0") + + index_path = llm_dir / "model.safetensors.index.json" + index_path.write_text( + json.dumps({"weight_map": {"w": "model-00001-of-00002.safetensors"}}) + ) + + filtered = filter_duplicate_safetensors_files( + [str(kept_file), str(dropped_file)], + str(tmp_path), + "llm/model.safetensors.index.json", + ) + + assert filtered == [str(kept_file)] + + if __name__ == "__main__": test_hf_transfer_auto_activation() test_download_weights_from_hf() diff --git a/tests/tokenization/test_tokenizer_llm_subfolder.py b/tests/tokenization/test_tokenizer_llm_subfolder.py new file mode 100644 index 000000000000..14756e851344 --- /dev/null +++ b/tests/tokenization/test_tokenizer_llm_subfolder.py @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import Any + +from vllm.transformers_utils import tokenizer as tokenizer_module +from vllm.transformers_utils.tokenizer import get_tokenizer + + +class _DummyTokenizer: + def __init__(self): + self.all_special_ids: list[int] = [] + self.all_special_tokens: list[str] = [] + self.all_special_tokens_extended: list[str] = [] + self.special_tokens_map: dict[str, str] = {} + self.vocab_size = 1 + + def get_vocab(self) -> dict[str, int]: + return {"a": 0} + + def __len__(self) -> int: # pragma: no cover - trivial + return 1 + + def decode(self, *args: Any, **kwargs: Any) -> str: + return "" + + def encode(self, *args: Any, **kwargs: Any) -> list[int]: + return [] + + +def test_tokenizer_prefers_llm_subfolder(monkeypatch): + captured = {} + + def fake_file_exists(repo_id: str, file_name: str, **kwargs: Any) -> bool: + return file_name == "llm/tokenizer.json" + + def fake_auto_from_pretrained(*args: Any, **kwargs: Any): + captured["subfolder"] = kwargs.get("subfolder") + return _DummyTokenizer() + + monkeypatch.setattr(tokenizer_module, "file_exists", fake_file_exists) + monkeypatch.setattr( + tokenizer_module.AutoTokenizer, + "from_pretrained", + classmethod( + lambda cls, *args, **kwargs: fake_auto_from_pretrained(*args, **kwargs) + ), + ) + + tokenizer = get_tokenizer("fake/model") + + assert tokenizer is not None + assert captured["subfolder"] == "llm" diff --git a/tests/transformers_utils/test_hf_config_parser_subfolder.py b/tests/transformers_utils/test_hf_config_parser_subfolder.py new file mode 100644 index 000000000000..c03fdf16f6d5 --- /dev/null +++ b/tests/transformers_utils/test_hf_config_parser_subfolder.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import Optional, Union + +from transformers import GenerationConfig, PretrainedConfig + +from vllm.transformers_utils import config as config_module +from vllm.transformers_utils.config import HFConfigParser, try_get_generation_config + + +def test_hf_config_parser_uses_llm_subfolder(monkeypatch): + parser = HFConfigParser() + base_config = PretrainedConfig() + subfolder_config = PretrainedConfig() + + def fake_get_config_dict( + cls, + model: Union[str, bytes], + revision: Optional[str] = None, + code_revision: Optional[str] = None, + **kwargs, + ): + return {"llm_cfg": {}}, base_config + + def fake_file_exists( + model: Union[str, bytes], config_name: str, revision: Optional[str] + ): + return config_name == "llm/config.json" + + auto_called = {} + + def fake_auto_from_pretrained(cls, *args, **kwargs): + auto_called["subfolder"] = kwargs.get("subfolder") + return subfolder_config + + monkeypatch.setattr( + PretrainedConfig, + "get_config_dict", + classmethod(fake_get_config_dict), + ) + monkeypatch.setattr(config_module, "file_or_path_exists", fake_file_exists) + monkeypatch.setattr( + config_module.AutoConfig, + "from_pretrained", + classmethod(fake_auto_from_pretrained), + ) + + returned_dict, returned_config = parser.parse("fake/model", trust_remote_code=False) + + assert returned_dict == {"llm_cfg": {}} + assert returned_config is subfolder_config + assert auto_called["subfolder"] == "llm" + + +def test_try_get_generation_config_llm_subfolder(monkeypatch): + calls = [] + + def fake_from_pretrained(cls, model: str, **kwargs): + calls.append(kwargs.get("subfolder")) + if len(calls) == 1: + raise OSError("missing") + return GenerationConfig() + + monkeypatch.setattr( + config_module.GenerationConfig, + "from_pretrained", + classmethod(fake_from_pretrained), + ) + + result = try_get_generation_config("fake/model", trust_remote_code=False) + + assert isinstance(result, GenerationConfig) + assert calls == [None, "llm"] diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py index 8c1ff0300b24..0ca5140954ee 100644 --- a/vllm/model_executor/model_loader/bitsandbytes_loader.py +++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py @@ -96,14 +96,27 @@ def _get_weight_files( is_local = os.path.isdir(model_name_or_path) if is_local: - for pattern in allowed_patterns: + patterns = list(allowed_patterns) + # Prefer subfolder patterns if common subfolder exists locally. + if os.path.isdir(os.path.join(model_name_or_path, "llm")): + patterns = [f"llm/{p}" for p in allowed_patterns] + patterns + for pattern in patterns: weight_files = glob.glob(os.path.join(model_name_or_path, pattern)) if weight_files: return model_name_or_path, weight_files, pattern else: hf_api = HfApi() repo_files = hf_api.list_repo_files(repo_id=model_name_or_path) - for pattern in allowed_patterns: + search_patterns = list(allowed_patterns) + # Prefer 'llm/' weights when present in the repo. + if any( + f.startswith("llm/") and f.endswith((".safetensors", ".bin", ".pt")) + for f in repo_files + ): + search_patterns = [ + f"llm/{p}" for p in allowed_patterns + ] + search_patterns + for pattern in search_patterns: matching_files = fnmatch.filter(repo_files, pattern) if matching_files: hf_folder = download_weights_from_hf( @@ -128,26 +141,35 @@ def _prepare_weights( allowed_patterns = ["*.safetensors", "*.bin", "*.pt"] + if getattr(self, "allow_patterns_overrides", None): + allowed_patterns = list(self.allow_patterns_overrides) + hf_folder, hf_weights_files, matched_pattern = self._get_weight_files( model_name_or_path, allowed_patterns, revision ) - use_safetensors = matched_pattern == "*.safetensors" + # Detect safetensors robustly (pattern may include subfolder) + use_safetensors = matched_pattern.endswith(".safetensors") + # Additionally guard by checking actual files + if not use_safetensors: + use_safetensors = any(f.endswith(".safetensors") for f in hf_weights_files) is_local = os.path.isdir(model_name_or_path) - index_file = SAFE_WEIGHTS_INDEX_NAME + # If weights live under a subfolder (e.g., 'llm/*.safetensors'), + # the index file will also live there. + if "/" in matched_pattern: + folder_prefix = matched_pattern.rsplit("/", 1)[0] + "/" + else: + folder_prefix = "" + index_file = folder_prefix + SAFE_WEIGHTS_INDEX_NAME + if use_safetensors and not is_local: + # Download index for safetensors to select correct shards. + download_safetensors_index_file_from_hf( + model_name_or_path, + index_file, + self.load_config.download_dir, + revision, + ) if use_safetensors: - # For models like Mistral-7B-Instruct-v0.3 - # there are both sharded safetensors files and a consolidated - # safetensors file. Using both breaks. - # Here, we download the `model.safetensors.index.json` and filter - # any files not found in the index. - if not is_local: - download_safetensors_index_file_from_hf( - model_name_or_path, - index_file, - self.load_config.download_dir, - revision, - ) hf_weights_files = filter_duplicate_safetensors_files( hf_weights_files, hf_folder, index_file ) @@ -587,6 +609,8 @@ def _initialize_loader_state( self._get_bnb_target_modules(model) self._classify_module_sharding(model) + self.allow_patterns_overrides = getattr(model, "allow_patterns_overrides", None) + def _dequantize_dq(self, quant_states: Any): """ When BNB employs Double Quantization, we perform the dequantization of diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 5f83482bec3a..1d383e6610e9 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -499,8 +499,12 @@ def filter_duplicate_safetensors_files( with open(index_file_name) as f: weight_map = json.load(f)["weight_map"] weight_files_in_index = set() + # If the index file is inside a subfolder (e.g., 'llm/model.safetensors.index.json'), + # the shard paths in `weight_map` are relative to that subfolder. Use the + # index file's directory as the base for joining shard filenames. + base_dir = os.path.dirname(index_file_name) for weight_name in weight_map: - weight_files_in_index.add(os.path.join(hf_folder, weight_map[weight_name])) + weight_files_in_index.add(os.path.join(base_dir, weight_map[weight_name])) # Filter out any fields that are not found in the index file. hf_weights_files = [f for f in hf_weights_files if f in weight_files_in_index] return hf_weights_files diff --git a/vllm/model_executor/models/omnivinci.py b/vllm/model_executor/models/omnivinci.py new file mode 100644 index 000000000000..23a8ea0c6a66 --- /dev/null +++ b/vllm/model_executor/models/omnivinci.py @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +"""Thin wrapper to support nvidia/omnivinci LLM weights stored under llm/. + +This model maps the root architecture (VILAForCausalLM) to the text-only +Qwen2 architecture by reusing vLLM's Qwen2ForCausalLM and ensures the weight +loader searches in the `llm/` subfolder of the repository. +""" + +from vllm.config import VllmConfig +from vllm.model_executor.models.qwen2 import Qwen2ForCausalLM + + +class OmniVinciForCausalLM(Qwen2ForCausalLM): + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) + # direct the default loader to read weights from the llm/ subfolder + self.allow_patterns_overrides = [ + "llm/*.safetensors", + "llm/consolidated*.safetensors", + "llm/*.pt", + ] diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 32e50f9a8e48..6f6032bed901 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -166,6 +166,10 @@ "TeleFLMForCausalLM": ("teleflm", "TeleFLMForCausalLM"), "XverseForCausalLM": ("llama", "LlamaForCausalLM"), "Zamba2ForCausalLM": ("zamba2", "Zamba2ForCausalLM"), + # nvidia/omnivinci root config advertises VILAForCausalLM but the LLM + # component is Qwen2 with weights/config under the llm/ subfolder. + # Map it to a thin wrapper that reuses Qwen2 implementation. + "VILAForCausalLM": ("omnivinci", "OmniVinciForCausalLM"), } _EMBEDDING_MODELS = { diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 87bbe73d834a..f30f3d4f9517 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -96,6 +96,8 @@ def __getitem__(self, key): _CONFIG_ATTRS_MAPPING: dict[str, str] = { "llm_config": "text_config", + # Some repos (e.g., nvidia/omnivinci) use `llm_cfg` for nested text config + "llm_cfg": "text_config", } _AUTO_CONFIG_KWARGS_OVERRIDES: dict[str, dict[str, Any]] = { @@ -121,6 +123,36 @@ def parse( token=_get_hf_token(), **kwargs, ) + # repos like nvidia/omnivinci keep the LLM config under a subfolder (e.g., `llm/`) + if ( + "llm_cfg" in config_dict or "llm_config" in config_dict + ) and file_or_path_exists(model, "llm/config.json", revision): + try: + sub_cfg = AutoConfig.from_pretrained( + model, + trust_remote_code=trust_remote_code, + revision=revision, + code_revision=code_revision, + token=_get_hf_token(), + subfolder="llm", + **{k: v for k, v in kwargs.items() if k != "local_files_only"}, + ) + return config_dict, sub_cfg + except ValueError as e: + if ( + not trust_remote_code + and "requires you to execute the configuration file" in str(e) + ): + err_msg = ( + "Failed to load the model config. If the model " + "is a custom model not yet available in the " + "HuggingFace transformers library, consider setting " + "`trust_remote_code=True` in LLM or using the " + "`--trust-remote-code` flag in the CLI." + ) + raise RuntimeError(err_msg) from e + else: + raise e # Use custom model class if it's in our registry model_type = config_dict.get("model_type") if model_type is None: @@ -1000,6 +1032,14 @@ def try_get_generation_config( revision=revision, ) except OSError: # Not found + try: + return GenerationConfig.from_pretrained( + model, + revision=revision, + subfolder="llm", + ) + except OSError: + pass try: config = get_config( model, diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 9537295c6dcd..fafd62965328 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -15,7 +15,10 @@ from vllm import envs from vllm.logger import init_logger -from vllm.transformers_utils.config import get_sentence_transformer_tokenizer_config +from vllm.transformers_utils.config import ( + file_exists, + get_sentence_transformer_tokenizer_config, +) from vllm.transformers_utils.tokenizers import MistralTokenizer from vllm.transformers_utils.utils import check_gguf_file @@ -212,6 +215,21 @@ def get_tokenizer( ) else: try: + needs_subfolder_probe = ( + isinstance(tokenizer_name, (str, os.PathLike)) + and not Path(str(tokenizer_name)).exists() + and "subfolder" not in kwargs + ) + if needs_subfolder_probe: + root_has = file_exists( + str(tokenizer_name), "tokenizer.json", revision=revision + ) + llm_has = file_exists( + str(tokenizer_name), "llm/tokenizer.json", revision=revision + ) + if not root_has and llm_has: + kwargs["subfolder"] = "llm" + tokenizer = AutoTokenizer.from_pretrained( tokenizer_name, *args, From a309aa9ad8ca2210c6101796c1bab00e72caa306 Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Sat, 25 Oct 2025 23:26:09 -0400 Subject: [PATCH 2/4] default loader fix Signed-off-by: 0xrushi <6279035+0xrushi@users.noreply.github.com> --- .../model_loader/default_loader.py | 22 +++++++++++++++---- vllm/transformers_utils/config.py | 4 +++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py index 00944989a002..3a1a6c93b2da 100644 --- a/vllm/model_executor/model_loader/default_loader.py +++ b/vllm/model_executor/model_loader/default_loader.py @@ -130,14 +130,28 @@ def _prepare_weights( hf_folder = model_name_or_path hf_weights_files: list[str] = [] + matched_pattern: Optional[str] = None for pattern in allow_patterns: - hf_weights_files += glob.glob(os.path.join(hf_folder, pattern)) - if len(hf_weights_files) > 0: - if pattern == "*.safetensors": - use_safetensors = True + files = glob.glob(os.path.join(hf_folder, pattern)) + if files: + hf_weights_files = files + matched_pattern = pattern break + if hf_weights_files: + use_safetensors = any(f.endswith(".safetensors") for f in hf_weights_files) + if use_safetensors: + # If weights live under a subfolder (e.g., 'llm/*.safetensors'), the index file will also be under that subfolder. Derive the prefix + # from the matched pattern or the first file's directory. + if matched_pattern and "/" in matched_pattern: + folder_prefix = matched_pattern.rsplit("/", 1)[0] + "/" + else: + first_dir_rel = os.path.relpath( + os.path.dirname(hf_weights_files[0]), hf_folder + ) + folder_prefix = "" if first_dir_rel in ("", ".") else first_dir_rel.rstrip("/") + "/" + index_file = folder_prefix + index_file # For models like Mistral-7B-Instruct-v0.3 # there are both sharded safetensors files and a consolidated # safetensors file. Using both breaks. diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index f30f3d4f9517..fd34302b2777 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -128,6 +128,8 @@ def parse( "llm_cfg" in config_dict or "llm_config" in config_dict ) and file_or_path_exists(model, "llm/config.json", revision): try: + # Respect offline mode by passing through all kwargs, including + # local_files_only, rather than stripping it. sub_cfg = AutoConfig.from_pretrained( model, trust_remote_code=trust_remote_code, @@ -135,7 +137,7 @@ def parse( code_revision=code_revision, token=_get_hf_token(), subfolder="llm", - **{k: v for k, v in kwargs.items() if k != "local_files_only"}, + **kwargs, ) return config_dict, sub_cfg except ValueError as e: From 8ac923d1d8672bfa3afc28248a643d769798020e Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Sun, 26 Oct 2025 00:23:04 -0400 Subject: [PATCH 3/4] default loader fix Signed-off-by: 0xrushi <6279035+0xrushi@users.noreply.github.com> --- .../test_default_loader_subfolder.py | 33 +++++++++++++++++++ .../model_loader/default_loader.py | 7 ++++ 2 files changed, 40 insertions(+) create mode 100644 tests/model_executor/model_loader/test_default_loader_subfolder.py diff --git a/tests/model_executor/model_loader/test_default_loader_subfolder.py b/tests/model_executor/model_loader/test_default_loader_subfolder.py new file mode 100644 index 000000000000..2d25a37dc0ef --- /dev/null +++ b/tests/model_executor/model_loader/test_default_loader_subfolder.py @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import json + +from vllm.config.load import LoadConfig +from vllm.model_executor.model_loader.default_loader import DefaultModelLoader + + +def test_default_loader_prefers_llm_subfolder_and_filters_with_index(tmp_path): + # Create local repo layout with llm/ subfolder + llm_dir = tmp_path / "llm" + llm_dir.mkdir() + + keep = llm_dir / "model-00001-of-00002.safetensors" + drop = llm_dir / "model-00002-of-00002.safetensors" + keep.write_bytes(b"0") + drop.write_bytes(b"0") + + # Create index file within llm/ that only references the first shard + index = llm_dir / "model.safetensors.index.json" + index.write_text(json.dumps({"weight_map": {"w": keep.name}})) + + # Default loader in auto format should find llm/*.safetensors and use the subfolder index + loader = DefaultModelLoader(LoadConfig(load_format="auto")) + hf_folder, files, use_safetensors = loader._prepare_weights( + str(tmp_path), revision=None, fall_back_to_pt=True, allow_patterns_overrides=None + ) + + assert hf_folder == str(tmp_path) + assert use_safetensors is True + assert files == [str(keep)] + diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py index 3a1a6c93b2da..18b8262b7442 100644 --- a/vllm/model_executor/model_loader/default_loader.py +++ b/vllm/model_executor/model_loader/default_loader.py @@ -118,6 +118,13 @@ def _prepare_weights( if allow_patterns_overrides is not None: allow_patterns = allow_patterns_overrides + # Prefer common subfolder variants (e.g., 'llm/') when available. + if is_local: + if os.path.isdir(os.path.join(model_name_or_path, "llm")): + allow_patterns = [f"llm/{p}" for p in allow_patterns] + allow_patterns + else: + allow_patterns = [f"llm/{p}" for p in allow_patterns] + allow_patterns + if not is_local: hf_folder = download_weights_from_hf( model_name_or_path, From 88a39f667af1e7c193a77fa0fc4933d943cbcedd Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Sun, 26 Oct 2025 08:00:27 -0400 Subject: [PATCH 4/4] precommit Signed-off-by: 0xrushi <6279035+0xrushi@users.noreply.github.com> --- .../model_loader/test_bitsandbytes_loader.py | 3 +-- .../model_loader/test_default_loader_subfolder.py | 6 ++++-- .../test_hf_config_parser_subfolder.py | 11 ++++------- .../model_loader/bitsandbytes_loader.py | 6 ++++-- vllm/model_executor/model_loader/default_loader.py | 8 ++++++-- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/tests/model_executor/model_loader/test_bitsandbytes_loader.py b/tests/model_executor/model_loader/test_bitsandbytes_loader.py index 0d67f881075d..bae67437b823 100644 --- a/tests/model_executor/model_loader/test_bitsandbytes_loader.py +++ b/tests/model_executor/model_loader/test_bitsandbytes_loader.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Optional from vllm.config.load import LoadConfig from vllm.model_executor.model_loader.bitsandbytes_loader import ( @@ -22,7 +21,7 @@ def _get_weight_files( # type: ignore[override] self, model_name_or_path: str, allowed_patterns: list[str], - revision: Optional[str] = None, + revision: str | None = None, ) -> tuple[str, list[str], str]: return self._mock_result diff --git a/tests/model_executor/model_loader/test_default_loader_subfolder.py b/tests/model_executor/model_loader/test_default_loader_subfolder.py index 2d25a37dc0ef..64b953812247 100644 --- a/tests/model_executor/model_loader/test_default_loader_subfolder.py +++ b/tests/model_executor/model_loader/test_default_loader_subfolder.py @@ -24,10 +24,12 @@ def test_default_loader_prefers_llm_subfolder_and_filters_with_index(tmp_path): # Default loader in auto format should find llm/*.safetensors and use the subfolder index loader = DefaultModelLoader(LoadConfig(load_format="auto")) hf_folder, files, use_safetensors = loader._prepare_weights( - str(tmp_path), revision=None, fall_back_to_pt=True, allow_patterns_overrides=None + str(tmp_path), + revision=None, + fall_back_to_pt=True, + allow_patterns_overrides=None, ) assert hf_folder == str(tmp_path) assert use_safetensors is True assert files == [str(keep)] - diff --git a/tests/transformers_utils/test_hf_config_parser_subfolder.py b/tests/transformers_utils/test_hf_config_parser_subfolder.py index c03fdf16f6d5..9b162637a7da 100644 --- a/tests/transformers_utils/test_hf_config_parser_subfolder.py +++ b/tests/transformers_utils/test_hf_config_parser_subfolder.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Optional, Union from transformers import GenerationConfig, PretrainedConfig @@ -16,16 +15,14 @@ def test_hf_config_parser_uses_llm_subfolder(monkeypatch): def fake_get_config_dict( cls, - model: Union[str, bytes], - revision: Optional[str] = None, - code_revision: Optional[str] = None, + model: str | bytes, + revision: str | None = None, + code_revision: str | None = None, **kwargs, ): return {"llm_cfg": {}}, base_config - def fake_file_exists( - model: Union[str, bytes], config_name: str, revision: Optional[str] - ): + def fake_file_exists(model: str | bytes, config_name: str, revision: str | None): return config_name == "llm/config.json" auto_called = {} diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py index 6921441bbf79..b1ad99a2c858 100644 --- a/vllm/model_executor/model_loader/bitsandbytes_loader.py +++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py @@ -84,6 +84,7 @@ def __init__(self, load_config: LoadConfig): self.pre_quant: bool = False self.load_8bit: bool = False self.is_pool_model: bool = False + self.allow_patterns_overrides: list[str] | None = None def _get_weight_files( self, @@ -142,8 +143,9 @@ def _prepare_weights( allowed_patterns = ["*.safetensors", "*.bin", "*.pt"] - if getattr(self, "allow_patterns_overrides", None): - allowed_patterns = list(self.allow_patterns_overrides) + allow_patterns_overrides = getattr(self, "allow_patterns_overrides", None) + if allow_patterns_overrides is not None: + allowed_patterns = list(allow_patterns_overrides) hf_folder, hf_weights_files, matched_pattern = self._get_weight_files( model_name_or_path, allowed_patterns, revision diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py index 2ff2c58f8577..249bbf966921 100644 --- a/vllm/model_executor/model_loader/default_loader.py +++ b/vllm/model_executor/model_loader/default_loader.py @@ -137,7 +137,7 @@ def _prepare_weights( hf_folder = model_name_or_path hf_weights_files: list[str] = [] - matched_pattern: Optional[str] = None + matched_pattern: str | None = None for pattern in allow_patterns: files = glob.glob(os.path.join(hf_folder, pattern)) if files: @@ -157,7 +157,11 @@ def _prepare_weights( first_dir_rel = os.path.relpath( os.path.dirname(hf_weights_files[0]), hf_folder ) - folder_prefix = "" if first_dir_rel in ("", ".") else first_dir_rel.rstrip("/") + "/" + folder_prefix = ( + "" + if first_dir_rel in ("", ".") + else first_dir_rel.rstrip("/") + "/" + ) index_file = folder_prefix + index_file # For models like Mistral-7B-Instruct-v0.3 # there are both sharded safetensors files and a consolidated