xorbitsai · OliverBryant · Oct 13, 2025 · Oct 13, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py
@@ -158,7 +158,7 @@ def __init__(
 
     @classmethod
     @abstractmethod
-    def check_lib(cls) -> bool:
+    def check_lib(cls) -> Union[bool, str]:
         pass
 
     @classmethod
@@ -168,7 +168,7 @@ def match_json(
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ) -> bool:
+    ) -> Union[bool, str]:
         pass
 
     @classmethod
@@ -177,13 +177,15 @@ def match(
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ):
+    ) -> bool:
         """
         Return if the model_spec can be matched.
         """
-        if not cls.check_lib():
+        lib_result = cls.check_lib()
+        if lib_result != True:
             return False
-        return cls.match_json(model_family, model_spec, quantization)
+        match_result = cls.match_json(model_family, model_spec, quantization)
+        return match_result == True
 
     @abstractmethod
     def load(self):

diff --git a/xinference/model/embedding/flag/core.py b/xinference/model/embedding/flag/core.py
@@ -285,19 +285,28 @@ def encode(
         return result
 
     @classmethod
-    def check_lib(cls) -> bool:
-        return importlib.util.find_spec("FlagEmbedding") is not None
+    def check_lib(cls) -> Union[bool, str]:
+        return (
+            True
+            if importlib.util.find_spec("FlagEmbedding") is not None
+            else "FlagEmbedding library is not installed"
+        )
 
     @classmethod
     def match_json(
         cls,
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ) -> bool:
+    ) -> Union[bool, str]:
+        # Check library availability first
+        lib_result = cls.check_lib()
+        if lib_result != True:
+            return lib_result
+
         if (
             model_spec.model_format in ["pytorch"]
             and model_family.model_name in FLAG_EMBEDDER_MODEL_LIST
         ):
             return True
-        return False
+        return f"FlagEmbedding engine only supports pytorch format and models in FLAG_EMBEDDER_MODEL_LIST, got format: {model_spec.model_format}, model: {model_family.model_name}"
diff --git a/xinference/model/embedding/llama_cpp/core.py b/xinference/model/embedding/llama_cpp/core.py
@@ -225,16 +225,45 @@ def _handle_embedding():
         return Embedding(**r)  # type: ignore
 
     @classmethod
-    def check_lib(cls) -> bool:
-        return importlib.util.find_spec("xllamacpp") is not None
+    def check_lib(cls) -> Union[bool, str]:
+        return (
+            True
+            if importlib.util.find_spec("xllamacpp") is not None
+            else "xllamacpp library is not installed"
+        )
 
     @classmethod
     def match_json(
         cls,
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ) -> bool:
+    ) -> Union[bool, str]:
+        # Check library availability
+        lib_result = cls.check_lib()
+        if lib_result != True:
+            return lib_result
+
+        # Check model format compatibility
         if model_spec.model_format not in ["ggufv2"]:
-            return False
+            return f"llama.cpp embedding only supports GGUF v2 format, got: {model_spec.model_format}"
+
+        # Check embedding-specific requirements
+        if not hasattr(model_spec, "model_file_name_template"):
+            return "GGUF embedding model requires proper file configuration (missing model_file_name_template)"
+
+        # Check model dimensions for llama.cpp compatibility
+        model_dimensions = model_family.dimensions
+        if model_dimensions > 4096:  # llama.cpp may have limitations
+            return f"Large embedding model may have compatibility issues with llama.cpp ({model_dimensions} dimensions)"
+
+        # Check platform-specific considerations
+        import platform
+
+        current_platform = platform.system()
+
+        # llama.cpp works across platforms but may have performance differences
+        if current_platform == "Windows":
+            return "llama.cpp embedding may have limited performance on Windows"
+
         return True
diff --git a/xinference/model/embedding/match_result.py b/xinference/model/embedding/match_result.py
@@ -0,0 +1,76 @@
+"""
+Error handling result structures for embedding model engine matching.
+
+This module provides structured error handling for engine matching operations,
+allowing engines to provide detailed failure reasons and suggestions.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+
+@dataclass
+class MatchResult:
+    """
+    Result of engine matching operation with detailed error information.
+
+    This class provides structured information about whether an engine can handle
+    a specific model configuration, and if not, why and what alternatives exist.
+    """
+
+    is_match: bool
+    reason: Optional[str] = None
+    error_type: Optional[str] = None
+    technical_details: Optional[str] = None
+
+    @classmethod
+    def success(cls) -> "MatchResult":
+        """Create a successful match result."""
+        return cls(is_match=True)
+
+    @classmethod
+    def failure(
+        cls,
+        reason: str,
+        error_type: Optional[str] = None,
+        technical_details: Optional[str] = None,
+    ) -> "MatchResult":
+        """Create a failed match result with optional details."""
+        return cls(
+            is_match=False,
+            reason=reason,
+            error_type=error_type,
+            technical_details=technical_details,
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for API responses."""
+        result: Dict[str, Any] = {"is_match": self.is_match}
+        if not self.is_match:
+            if self.reason:
+                result["reason"] = self.reason
+            if self.error_type:
+                result["error_type"] = self.error_type
+            if self.technical_details:
+                result["technical_details"] = self.technical_details
+        return result
+
+    def to_error_string(self) -> str:
+        """Convert to error string for backward compatibility."""
+        if self.is_match:
+            return "Available"
+        error_msg = self.reason or "Unknown error"
+        return error_msg
+
+
+# Error type constants for better categorization
+class ErrorType:
+    HARDWARE_REQUIREMENT = "hardware_requirement"
+    OS_REQUIREMENT = "os_requirement"
+    MODEL_FORMAT = "model_format"
+    DEPENDENCY_MISSING = "dependency_missing"
+    MODEL_COMPATIBILITY = "model_compatibility"
+    DIMENSION_MISMATCH = "dimension_mismatch"
+    VERSION_REQUIREMENT = "version_requirement"
+    CONFIGURATION_ERROR = "configuration_error"
+    ENGINE_UNAVAILABLE = "engine_unavailable"
diff --git a/xinference/model/embedding/sentence_transformers/core.py b/xinference/model/embedding/sentence_transformers/core.py
@@ -424,15 +424,56 @@ def base64_to_image(base64_str: str) -> Image.Image:
         return result
 
     @classmethod
-    def check_lib(cls) -> bool:
-        return importlib.util.find_spec("sentence_transformers") is not None
+    def check_lib(cls) -> Union[bool, str]:
+        return (
+            True
+            if importlib.util.find_spec("sentence_transformers") is not None
+            else "sentence_transformers library is not installed"
+        )
 
     @classmethod
     def match_json(
         cls,
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ) -> bool:
-        # As default embedding engine, sentence-transformer support all models
-        return model_spec.model_format in ["pytorch"]
+    ) -> Union[bool, str]:
+        # Check library availability
+        lib_result = cls.check_lib()
+        if lib_result != True:
+            return lib_result
+
+        # Check model format compatibility
+        if model_spec.model_format not in ["pytorch"]:
+            return f"Sentence Transformers only supports pytorch format, got: {model_spec.model_format}"
+
+        # Check model dimensions compatibility
+        model_dimensions = model_family.dimensions
+        if model_dimensions > 8192:  # Extremely large embedding models
+            return f"Extremely large embedding model detected ({model_dimensions} dimensions), may have performance issues"
+
+        # Check token limits
+        max_tokens = model_family.max_tokens
+        if max_tokens > 131072:  # Extremely high token limits (128K)
+            return f"Extremely high token limit model detected (max_tokens: {max_tokens}), may cause memory issues"
+
+        # Check for special model requirements
+        model_name = model_family.model_name.lower()
+
+        # Check Qwen2 GTE models
+        if "gte" in model_name and "qwen2" in model_name:
+            # These models have specific requirements
+            if not hasattr(cls, "_check_qwen_gte_requirements"):
+                return "Qwen2 GTE models require special handling"
+
+        # Check Qwen3 models
+        if "qwen3" in model_name:
+            # Qwen3 has flash attention requirements - basic check
+            try:
+                pass
+
+                # This would be checked during actual loading
+            except Exception:
+                return "Qwen3 embedding model may have compatibility issues"
+
+        return True
diff --git a/xinference/model/embedding/vllm/core.py b/xinference/model/embedding/vllm/core.py
@@ -22,7 +22,7 @@
 from ..core import EmbeddingModel, EmbeddingModelFamilyV2, EmbeddingSpecV1
 
 logger = logging.getLogger(__name__)
-SUPPORTED_MODELS_PREFIXES = ["bge", "gte", "text2vec", "m3e", "gte", "Qwen3"]
+SUPPORTED_MODELS_PREFIXES = ["bge", "gte", "text2vec", "m3e", "gte", "qwen3"]
 
 
 class VLLMEmbeddingModel(EmbeddingModel):
@@ -32,16 +32,44 @@ def __init__(self, *args, **kwargs):
 
     def load(self):
         try:
+            # Handle vLLM-transformers config conflict by setting environment variable
+            import os
+
+            os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache_vllm"
+
             from vllm import LLM
 
-        except ImportError:
+        except ImportError as e:
             error_message = "Failed to import module 'vllm'"
             installation_guide = [
                 "Please make sure 'vllm' is installed. ",
                 "You can install it by `pip install vllm`\n",
             ]
 
+            # Check if it's a config conflict error
+            if "aimv2" in str(e):
+                error_message = (
+                    "vLLM has a configuration conflict with transformers library"
+                )
+                installation_guide = [
+                    "This is a known issue with certain vLLM and transformers versions.",
+                    "Try upgrading transformers or using a different vLLM version.\n",
+                ]
+
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
+        except Exception as e:
+            # Handle config registration conflicts
+            if "aimv2" in str(e) and "already used by a Transformers config" in str(e):
+                error_message = (
+                    "vLLM has a configuration conflict with transformers library"
+                )
+                installation_guide = [
+                    "This is a known issue with certain vLLM and transformers versions.",
+                    "Try: pip install --upgrade transformers vllm\n",
+                ]
+                raise RuntimeError(f"{error_message}\n\n{''.join(installation_guide)}")
+            raise
+
         if self.model_family.model_name in {
             "Qwen3-Embedding-0.6B",
             "Qwen3-Embedding-4B",
@@ -149,21 +177,60 @@ def create_embedding(
         return result
 
     @classmethod
-    def check_lib(cls) -> bool:
-        return importlib.util.find_spec("vllm") is not None
+    def check_lib(cls) -> Union[bool, str]:
+        return (
+            True
+            if importlib.util.find_spec("vllm") is not None
+            else "vllm library is not installed"
+        )
 
     @classmethod
     def match_json(
         cls,
         model_family: EmbeddingModelFamilyV2,
         model_spec: EmbeddingSpecV1,
         quantization: str,
-    ) -> bool:
-        if model_spec.model_format in ["pytorch"]:
-            prefix = model_family.model_name.split("-", 1)[0]
-            if prefix in SUPPORTED_MODELS_PREFIXES:
-                return True
-        return False
+    ) -> Union[bool, str]:
+        # Check library availability first
+        lib_result = cls.check_lib()
+        if lib_result != True:
+            return lib_result
+
+        # Check model format compatibility
+        if model_spec.model_format not in ["pytorch"]:
+            return f"VLLM Embedding engine only supports pytorch format models, got format: {model_spec.model_format}"
+
+        # Check model name prefix matching
+        prefix = model_family.model_name.split("-", 1)[0]
+        if prefix.lower() not in [p.lower() for p in SUPPORTED_MODELS_PREFIXES]:
+            return f"VLLM Embedding engine only supports models with prefixes {SUPPORTED_MODELS_PREFIXES}, got model: {model_family.model_name}"
+
+        # Additional runtime compatibility checks for vLLM version
+        try:
+            import vllm
+            from packaging.version import Version
+
+            vllm_version = Version(vllm.__version__)
+
+            # Check for vLLM version compatibility issues
+            if vllm_version >= Version("0.10.0") and vllm_version < Version("0.11.0"):
+                # vLLM 0.10.x has V1 engine issues on CPU
+                import platform
+
+                if platform.system() == "Darwin" and platform.machine() in [
+                    "arm64",
+                    "arm",
+                ]:
+                    # Check if this is likely to run on CPU (most common for testing)
+                    return f"vLLM {vllm_version} has compatibility issues with embedding models on Apple Silicon CPUs. Consider using a different platform or vLLM version."
+            elif vllm_version >= Version("0.11.0"):
+                # vLLM 0.11+ should have fixed the config conflict issue
+                pass
+        except Exception:
+            # If version check fails, continue with basic validation
+            pass
+
+        return True
 
     def wait_for_load(self):
         # set context length after engine inited