huggingface
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/.htaccess‎
Lines changed: 2 additions & 4 deletions b/‎docs/.htaccess‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎docs/sentence_transformer/usage/efficiency.rst‎
Lines changed: 11 additions & 1 deletion b/‎docs/sentence_transformer/usage/efficiency.rst‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎sentence_transformers/SentenceTransformer.py‎
Lines changed: 128 additions & 0 deletions b/‎sentence_transformers/SentenceTransformer.py‎
Lines changed: 128 additions & 0 deletions
diff --git a/‎sentence_transformers/SentenceTransformer.pyi‎
Lines changed: 0 additions & 136 deletions b/‎sentence_transformers/SentenceTransformer.pyi‎
Lines changed: 0 additions & 136 deletions
diff --git a/‎sentence_transformers/cross_encoder/CrossEncoder.py‎
Lines changed: 39 additions & 5 deletions b/‎sentence_transformers/cross_encoder/CrossEncoder.py‎
Lines changed: 39 additions & 5 deletions
@@ -11,6 +11,9 @@ on:
       - v*-release
   workflow_dispatch:
 
+env:
+  TRANSFORMERS_IS_CI: 1
+
 jobs:
 
   test_sampling:
 
@@ -36,15 +36,13 @@ Redirect 301 /docs/pretrained-models/msmarco.html /docs/pretrained-models/msmarc
 Redirect 301 /docs/examples/training/sts/README.html /examples/sentence_transformer/training/sts/README.html
 
 # Moved example pages for v4.0
-Redirect 301 /examples/applications/README.html /examples/cross_encoder/applications/README.html
-Redirect 301 /examples/training/ms_marco/cross_encoder_README.html /examples/cross_encoder/training/ms_marco/cross_encoder_README.html
-Redirect 301 /examples/training/README.html /examples/cross_encoder/training/README.html
+Redirect 301 /examples/training/ms_marco/cross_encoder_README.html /examples/cross_encoder/training/ms_marco/README.html
+Redirect 301 /examples/applications/cross-encoder/README.html /examples/cross_encoder/applications/README.html
 Redirect 301 /examples/applications/clustering/README.html /examples/sentence_transformer/applications/clustering/README.html
 Redirect 301 /examples/applications/embedding-quantization/README.html /examples/sentence_transformer/applications/embedding-quantization/README.html
 Redirect 301 /examples/applications/image-search/README.html /examples/sentence_transformer/applications/image-search/README.html
 Redirect 301 /examples/applications/parallel-sentence-mining/README.html /examples/sentence_transformer/applications/parallel-sentence-mining/README.html
 Redirect 301 /examples/applications/paraphrase-mining/README.html /examples/sentence_transformer/applications/paraphrase-mining/README.html
-Redirect 301 /examples/applications/README.html /examples/sentence_transformer/applications/README.html
 Redirect 301 /examples/applications/retrieve_rerank/README.html /examples/sentence_transformer/applications/retrieve_rerank/README.html
 Redirect 301 /examples/applications/semantic-search/README.html /examples/sentence_transformer/applications/semantic-search/README.html
 Redirect 301 /examples/applications/text-summarization/README.html /examples/sentence_transformer/applications/text-summarization/README.html
 
@@ -99,7 +99,11 @@ To convert a model to ONNX format, you can use the following code:
    sentences = ["This is an example sentence", "Each sentence is converted"]
    embeddings = model.encode(sentences)
 
-If the model path or repository already contains a model in ONNX format, Sentence Transformers will automatically use it. Otherwise, it will convert the model to ONNX the format. 
+If the model path or repository already contains a model in ONNX format, Sentence Transformers will automatically use it. Otherwise, it will convert the model to the ONNX format. 
+
+.. note::
+
+   If you wish to use the ONNX model outside of Sentence Transformers, you'll need to perform pooling and/or normalization yourself. The ONNX export only converts the Transformer component, which outputs token embeddings, not sentence embeddings. To get sentence embeddings, you'll need to apply the appropriate pooling strategy (like mean pooling) and any normalization that the original model uses.
 
 All keyword arguments passed via ``model_kwargs`` will be passed on to :meth:`ORTModel.from_pretrained <optimum.onnxruntime.ORTModel.from_pretrained>`. Some notable arguments include:
 
@@ -291,6 +295,12 @@ To convert a model to OpenVINO format, you can use the following code:
    sentences = ["This is an example sentence", "Each sentence is converted"]
    embeddings = model.encode(sentences)
 
+If the model path or repository already contains a model in OpenVINO format, Sentence Transformers will automatically use it. Otherwise, it will convert the model to the OpenVINO format.
+
+.. note::
+
+   If you wish to use the OpenVINO model outside of Sentence Transformers, you'll need to perform pooling and/or normalization yourself. The OpenVINO export only converts the Transformer component, which outputs token embeddings, not sentence embeddings. To get sentence embeddings, you'll need to apply the appropriate pooling strategy (like mean pooling) and any normalization that the original model uses.
+
 .. raw:: html
 
    All keyword arguments passed via <code>model_kwargs</code> will be passed on to <a href="https://huggingface.co/docs/optimum/intel/openvino/reference#optimum.intel.openvino.modeling_base.OVBaseModel.from_pretrained"><code style="color: #404040; font-weight: 700;">OVBaseModel.from_pretrained()</code></a>. Some notable arguments include:
 
@@ -391,6 +391,134 @@ def get_backend(self) -> Literal["torch", "onnx", "openvino"]:
         """
         return self.backend
 
+    # Return a single tensor because we're passing a single sentence.
+    @overload
+    def encode(
+        self,
+        sentences: str,
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: Literal["sentence_embedding", "token_embeddings"] = ...,
+        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
+        convert_to_numpy: Literal[False] = ...,
+        convert_to_tensor: bool = ...,
+        device: str | None = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -> Tensor: ...
+
+    # Return a single array, because convert_to_numpy is True
+    # and "sentence_embeddings" is passed
+    @overload
+    def encode(
+        self,
+        sentences: str | list[str] | np.ndarray,
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: Literal["sentence_embedding"] = ...,
+        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
+        convert_to_numpy: Literal[True] = ...,
+        convert_to_tensor: Literal[False] = ...,
+        device: str | None = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -> np.ndarray: ...
+
+    # Return a single tensor, because convert_to_tensor is True
+    # and "sentence_embeddings" is passed
+    @overload
+    def encode(
+        self,
+        sentences: str | list[str] | np.ndarray,
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: Literal["sentence_embedding"] = ...,
+        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
+        convert_to_numpy: bool = ...,
+        convert_to_tensor: Literal[True] = ...,
+        device: str | None = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -> Tensor: ...
+
+    # Return a list of tensors. Value of convert_ doesn't matter.
+    @overload
+    def encode(
+        self,
+        sentences: list[str] | np.ndarray,
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: Literal["sentence_embedding", "token_embeddings"] = ...,
+        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
+        convert_to_numpy: bool = ...,
+        convert_to_tensor: bool = ...,
+        device: str | None = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -> list[Tensor]: ...
+
+    # Return a list of dict of features, ignore the conversion args.
+    @overload
+    def encode(
+        self,
+        sentences: list[str] | np.ndarray,
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: None = ...,
+        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
+        convert_to_numpy: bool = ...,
+        convert_to_tensor: bool = ...,
+        device: str | None = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -> list[dict[str, Tensor]]: ...
+
+    # Return a dict of features, ignore the conversion args.
+    @overload
+    def encode(
+        self,
+        sentences: str,
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: None = ...,
+        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
+        convert_to_numpy: bool = ...,
+        convert_to_tensor: bool = ...,
+        device: str | None = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -> dict[str, Tensor]: ...
+
+    # If "token_embeddings" is True, then the output is a single tensor.
+    @overload
+    def encode(
+        self,
+        sentences: str,
+        prompt_name: str | None = ...,
+        prompt: str | None = ...,
+        batch_size: int = ...,
+        show_progress_bar: bool | None = ...,
+        output_value: Literal["token_embeddings"] = ...,
+        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = ...,
+        convert_to_numpy: bool = ...,
+        convert_to_tensor: bool = ...,
+        device: str | None = ...,
+        normalize_embeddings: bool = ...,
+        **kwargs,
+    ) -> Tensor: ...
+
     def encode(
         self,
         sentences: str | list[str] | np.ndarray,
 
@@ -167,11 +167,8 @@ def __init__(
             token=token,
             **model_kwargs,
         )
-        if "model_max_length" not in tokenizer_kwargs:
-            if max_length is not None:
-                tokenizer_kwargs["model_max_length"] = max_length
-            elif hasattr(self.config, "max_position_embeddings"):
-                tokenizer_kwargs["model_max_length"] = self.config.max_position_embeddings
+        if "model_max_length" not in tokenizer_kwargs and max_length is not None:
+            tokenizer_kwargs["model_max_length"] = max_length
 
         self.tokenizer = AutoTokenizer.from_pretrained(
             model_name_or_path,
@@ -182,6 +179,8 @@ def __init__(
             token=token,
             **tokenizer_kwargs,
         )
+        if "model_max_length" not in tokenizer_kwargs and hasattr(self.config, "max_position_embeddings"):
+            self.tokenizer.model_max_length = min(self.tokenizer.model_max_length, self.config.max_position_embeddings)
 
         # Check if a readme exists
         model_card_path = load_file_path(
@@ -272,6 +271,10 @@ def num_labels(self) -> int:
     def max_length(self) -> int:
         return self.tokenizer.model_max_length
 
+    @max_length.setter
+    def max_length(self, value: int) -> None:
+        self.tokenizer.model_max_length = value
+
     @property
     @deprecated(
         "The `default_activation_function` property was renamed and is now deprecated. "
@@ -583,6 +586,37 @@ def push_to_hub(
         create_pr: bool = False,
         tags: list[str] | None = None,
     ) -> str:
+        """
+        Upload the CrossEncoder model to the Hugging Face Hub.
+
+        Example:
+            ::
+
+                from sentence_transformers import CrossEncoder
+
+                model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")
+                model.push_to_hub("username/my-crossencoder-model")
+                # => "https://huggingface.co/username/my-crossencoder-model"
+
+        Args:
+            repo_id (str): The name of the repository on the Hugging Face Hub, e.g. "username/repo_name",
+                "organization/repo_name" or just "repo_name".
+            token (str, optional): The authentication token to use for the Hugging Face Hub API.
+                If not provided, will use the token stored via the Hugging Face CLI.
+            private (bool, optional): Whether to create a private repository. If not specified,
+                the repository will be public.
+            safe_serialization (bool, optional): Whether or not to convert the model weights in safetensors
+                format for safer serialization. Defaults to True.
+            commit_message (str, optional): The commit message to use for the push. Defaults to "Add new CrossEncoder model".
+            exist_ok (bool, optional): If True, do not raise an error if the repository already exists.
+                Ignored if ``create_pr=True``. Defaults to False.
+            revision (str, optional): The git branch to commit to. Defaults to the head of the 'main' branch.
+            create_pr (bool, optional): Whether to create a Pull Request with the upload or directly commit. Defaults to False.
+            tags (list[str], optional): A list of tags to add to the model card. Defaults to None.
+
+        Returns:
+            str: URL of the commit or pull request (if create_pr=True)
+        """
         api = HfApi(token=token)
         repo_url = api.create_repo(
             repo_id=repo_id,