Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
daa305a
FEAT: add engine ability display
OliverBryant Oct 13, 2025
5347c4b
feat: frontend supports engine ability display
yiboyasss Oct 13, 2025
2466777
FEAT: add engine ability display
OliverBryant Oct 14, 2025
8e1fa20
FEAT: add engine ability display
OliverBryant Oct 14, 2025
da58bf4
FEAT: add engine ability display
OliverBryant Oct 14, 2025
38aad40
FEAT: add engine ability display
OliverBryant Oct 14, 2025
a679c3b
FEAT: add engine ability display
OliverBryant Oct 14, 2025
340ff70
FEAT: add engine ability display
OliverBryant Oct 14, 2025
19e1e2a
FEAT: add engine ability display
OliverBryant Oct 14, 2025
cc84a84
FEAT: add engine ability display
OliverBryant Oct 14, 2025
d9b3a43
FEAT: add engine ability display
OliverBryant Oct 14, 2025
d9d3136
modify accomplishment measure
OliverBryant Oct 21, 2025
08450ac
modify accomplishment measure
OliverBryant Oct 21, 2025
e793cd4
modify accomplishment measure
OliverBryant Oct 21, 2025
27ea341
modify accomplishment measure
OliverBryant Oct 21, 2025
114ec63
modify accomplishment measure
OliverBryant Oct 21, 2025
c17b78e
mypy test
OliverBryant Oct 21, 2025
b194751
mypy test
OliverBryant Oct 21, 2025
2aa43d7
mypy test
OliverBryant Oct 21, 2025
173e494
mypy test
OliverBryant Oct 21, 2025
bc41700
mypy test
OliverBryant Oct 21, 2025
fc9b422
mypy test
OliverBryant Oct 21, 2025
5030b26
mypy fix
OliverBryant Oct 21, 2025
cf51732
mypy fix
OliverBryant Oct 21, 2025
0660aab
mypy fix
OliverBryant Oct 21, 2025
996f3cd
mypy fix
OliverBryant Oct 21, 2025
41b0735
mypy fix
OliverBryant Oct 22, 2025
c760a58
Modify class name
OliverBryant Oct 22, 2025
6615014
Modify class name
OliverBryant Oct 22, 2025
2105c83
commit
OliverBryant Oct 22, 2025
eb1bb43
new engine ability display
OliverBryant Oct 29, 2025
26ca06f
pre-commit
OliverBryant Oct 29, 2025
48a272d
mypy-error
OliverBryant Oct 29, 2025
0acb471
fix mlx CI bug
OliverBryant Oct 29, 2025
1b973b4
fix CI bug
OliverBryant Oct 30, 2025
f52824a
modify embedding sentence_transformers
OliverBryant Nov 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions xinference/model/embedding/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def __init__(

@classmethod
@abstractmethod
def check_lib(cls) -> bool:
def check_lib(cls) -> Union[bool, str]:
pass

@classmethod
Expand All @@ -168,7 +168,7 @@ def match_json(
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
) -> bool:
) -> Union[bool, str]:
pass

@classmethod
Expand All @@ -177,13 +177,15 @@ def match(
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
):
) -> bool:
"""
Return if the model_spec can be matched.
"""
if not cls.check_lib():
lib_result = cls.check_lib()
if lib_result != True:
return False
return cls.match_json(model_family, model_spec, quantization)
match_result = cls.match_json(model_family, model_spec, quantization)
return match_result == True

@abstractmethod
def load(self):
Expand Down
17 changes: 13 additions & 4 deletions xinference/model/embedding/flag/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,19 +285,28 @@ def encode(
return result

@classmethod
def check_lib(cls) -> bool:
return importlib.util.find_spec("FlagEmbedding") is not None
def check_lib(cls) -> Union[bool, str]:
return (
True
if importlib.util.find_spec("FlagEmbedding") is not None
else "FlagEmbedding library is not installed"
)

@classmethod
def match_json(
cls,
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
) -> bool:
) -> Union[bool, str]:
# Check library availability first
lib_result = cls.check_lib()
if lib_result != True:
return lib_result

if (
model_spec.model_format in ["pytorch"]
and model_family.model_name in FLAG_EMBEDDER_MODEL_LIST
):
return True
return False
return f"FlagEmbedding engine only supports pytorch format and models in FLAG_EMBEDDER_MODEL_LIST, got format: {model_spec.model_format}, model: {model_family.model_name}"
37 changes: 33 additions & 4 deletions xinference/model/embedding/llama_cpp/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,16 +225,45 @@ def _handle_embedding():
return Embedding(**r) # type: ignore

@classmethod
def check_lib(cls) -> bool:
return importlib.util.find_spec("xllamacpp") is not None
def check_lib(cls) -> Union[bool, str]:
return (
True
if importlib.util.find_spec("xllamacpp") is not None
else "xllamacpp library is not installed"
)

@classmethod
def match_json(
cls,
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
) -> bool:
) -> Union[bool, str]:
# Check library availability
lib_result = cls.check_lib()
if lib_result != True:
return lib_result

# Check model format compatibility
if model_spec.model_format not in ["ggufv2"]:
return False
return f"llama.cpp embedding only supports GGUF v2 format, got: {model_spec.model_format}"

# Check embedding-specific requirements
if not hasattr(model_spec, "model_file_name_template"):
return "GGUF embedding model requires proper file configuration (missing model_file_name_template)"

# Check model dimensions for llama.cpp compatibility
model_dimensions = model_family.dimensions
if model_dimensions > 4096: # llama.cpp may have limitations
return f"Large embedding model may have compatibility issues with llama.cpp ({model_dimensions} dimensions)"

# Check platform-specific considerations
import platform

current_platform = platform.system()

# llama.cpp works across platforms but may have performance differences
if current_platform == "Windows":
return "llama.cpp embedding may have limited performance on Windows"

return True
76 changes: 76 additions & 0 deletions xinference/model/embedding/match_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""
Error handling result structures for embedding model engine matching.

This module provides structured error handling for engine matching operations,
allowing engines to provide detailed failure reasons and suggestions.
"""

from dataclasses import dataclass
from typing import Any, Dict, Optional


@dataclass
class MatchResult:
"""
Result of engine matching operation with detailed error information.

This class provides structured information about whether an engine can handle
a specific model configuration, and if not, why and what alternatives exist.
"""

is_match: bool
reason: Optional[str] = None
error_type: Optional[str] = None
technical_details: Optional[str] = None

@classmethod
def success(cls) -> "MatchResult":
"""Create a successful match result."""
return cls(is_match=True)

@classmethod
def failure(
cls,
reason: str,
error_type: Optional[str] = None,
technical_details: Optional[str] = None,
) -> "MatchResult":
"""Create a failed match result with optional details."""
return cls(
is_match=False,
reason=reason,
error_type=error_type,
technical_details=technical_details,
)

def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for API responses."""
result: Dict[str, Any] = {"is_match": self.is_match}
if not self.is_match:
if self.reason:
result["reason"] = self.reason
if self.error_type:
result["error_type"] = self.error_type
if self.technical_details:
result["technical_details"] = self.technical_details
return result

def to_error_string(self) -> str:
"""Convert to error string for backward compatibility."""
if self.is_match:
return "Available"
error_msg = self.reason or "Unknown error"
return error_msg


# Error type constants for better categorization
class ErrorType:
HARDWARE_REQUIREMENT = "hardware_requirement"
OS_REQUIREMENT = "os_requirement"
MODEL_FORMAT = "model_format"
DEPENDENCY_MISSING = "dependency_missing"
MODEL_COMPATIBILITY = "model_compatibility"
DIMENSION_MISMATCH = "dimension_mismatch"
VERSION_REQUIREMENT = "version_requirement"
CONFIGURATION_ERROR = "configuration_error"
ENGINE_UNAVAILABLE = "engine_unavailable"
51 changes: 46 additions & 5 deletions xinference/model/embedding/sentence_transformers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,15 +424,56 @@ def base64_to_image(base64_str: str) -> Image.Image:
return result

@classmethod
def check_lib(cls) -> bool:
return importlib.util.find_spec("sentence_transformers") is not None
def check_lib(cls) -> Union[bool, str]:
return (
True
if importlib.util.find_spec("sentence_transformers") is not None
else "sentence_transformers library is not installed"
)

@classmethod
def match_json(
cls,
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
) -> bool:
# As default embedding engine, sentence-transformer support all models
return model_spec.model_format in ["pytorch"]
) -> Union[bool, str]:
# Check library availability
lib_result = cls.check_lib()
if lib_result != True:
return lib_result

# Check model format compatibility
if model_spec.model_format not in ["pytorch"]:
return f"Sentence Transformers only supports pytorch format, got: {model_spec.model_format}"

# Check model dimensions compatibility
model_dimensions = model_family.dimensions
if model_dimensions > 8192: # Extremely large embedding models
return f"Extremely large embedding model detected ({model_dimensions} dimensions), may have performance issues"

# Check token limits
max_tokens = model_family.max_tokens
if max_tokens > 131072: # Extremely high token limits (128K)
return f"Extremely high token limit model detected (max_tokens: {max_tokens}), may cause memory issues"

# Check for special model requirements
model_name = model_family.model_name.lower()

# Check Qwen2 GTE models
if "gte" in model_name and "qwen2" in model_name:
# These models have specific requirements
if not hasattr(cls, "_check_qwen_gte_requirements"):
return "Qwen2 GTE models require special handling"

# Check Qwen3 models
if "qwen3" in model_name:
# Qwen3 has flash attention requirements - basic check
try:
pass

# This would be checked during actual loading
except Exception:
return "Qwen3 embedding model may have compatibility issues"

return True
87 changes: 77 additions & 10 deletions xinference/model/embedding/vllm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from ..core import EmbeddingModel, EmbeddingModelFamilyV2, EmbeddingSpecV1

logger = logging.getLogger(__name__)
SUPPORTED_MODELS_PREFIXES = ["bge", "gte", "text2vec", "m3e", "gte", "Qwen3"]
SUPPORTED_MODELS_PREFIXES = ["bge", "gte", "text2vec", "m3e", "gte", "qwen3"]


class VLLMEmbeddingModel(EmbeddingModel):
Expand All @@ -32,16 +32,44 @@ def __init__(self, *args, **kwargs):

def load(self):
try:
# Handle vLLM-transformers config conflict by setting environment variable
import os

os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache_vllm"

from vllm import LLM

except ImportError:
except ImportError as e:
error_message = "Failed to import module 'vllm'"
installation_guide = [
"Please make sure 'vllm' is installed. ",
"You can install it by `pip install vllm`\n",
]

# Check if it's a config conflict error
if "aimv2" in str(e):
error_message = (
"vLLM has a configuration conflict with transformers library"
)
installation_guide = [
"This is a known issue with certain vLLM and transformers versions.",
"Try upgrading transformers or using a different vLLM version.\n",
]

raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
except Exception as e:
# Handle config registration conflicts
if "aimv2" in str(e) and "already used by a Transformers config" in str(e):
error_message = (
"vLLM has a configuration conflict with transformers library"
)
installation_guide = [
"This is a known issue with certain vLLM and transformers versions.",
"Try: pip install --upgrade transformers vllm\n",
]
raise RuntimeError(f"{error_message}\n\n{''.join(installation_guide)}")
raise

if self.model_family.model_name in {
"Qwen3-Embedding-0.6B",
"Qwen3-Embedding-4B",
Expand Down Expand Up @@ -149,21 +177,60 @@ def create_embedding(
return result

@classmethod
def check_lib(cls) -> bool:
return importlib.util.find_spec("vllm") is not None
def check_lib(cls) -> Union[bool, str]:
return (
True
if importlib.util.find_spec("vllm") is not None
else "vllm library is not installed"
)

@classmethod
def match_json(
cls,
model_family: EmbeddingModelFamilyV2,
model_spec: EmbeddingSpecV1,
quantization: str,
) -> bool:
if model_spec.model_format in ["pytorch"]:
prefix = model_family.model_name.split("-", 1)[0]
if prefix in SUPPORTED_MODELS_PREFIXES:
return True
return False
) -> Union[bool, str]:
# Check library availability first
lib_result = cls.check_lib()
if lib_result != True:
return lib_result

# Check model format compatibility
if model_spec.model_format not in ["pytorch"]:
return f"VLLM Embedding engine only supports pytorch format models, got format: {model_spec.model_format}"

# Check model name prefix matching
prefix = model_family.model_name.split("-", 1)[0]
if prefix.lower() not in [p.lower() for p in SUPPORTED_MODELS_PREFIXES]:
return f"VLLM Embedding engine only supports models with prefixes {SUPPORTED_MODELS_PREFIXES}, got model: {model_family.model_name}"

# Additional runtime compatibility checks for vLLM version
try:
import vllm
from packaging.version import Version

vllm_version = Version(vllm.__version__)

# Check for vLLM version compatibility issues
if vllm_version >= Version("0.10.0") and vllm_version < Version("0.11.0"):
# vLLM 0.10.x has V1 engine issues on CPU
import platform

if platform.system() == "Darwin" and platform.machine() in [
"arm64",
"arm",
]:
# Check if this is likely to run on CPU (most common for testing)
return f"vLLM {vllm_version} has compatibility issues with embedding models on Apple Silicon CPUs. Consider using a different platform or vLLM version."
elif vllm_version >= Version("0.11.0"):
# vLLM 0.11+ should have fixed the config conflict issue
pass
except Exception:
# If version check fails, continue with basic validation
pass

return True

def wait_for_load(self):
# set context length after engine inited
Expand Down
Loading
Loading