Skip to content

Commit dd76c03

Browse files
committed
Merge branch 'master' into v3.2-release; version to 3.2.1
2 parents 539bf92 + f286d9f commit dd76c03

20 files changed

+219
-71
lines changed

.github/workflows/tests.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ jobs:
6363
python -m pip install --upgrade pip
6464
python -m pip install '.[train, onnx, openvino, dev]'
6565
66+
- name: Install model2vec
67+
run: python -m pip install model2vec
68+
if: ${{ contains(fromJSON('["3.10", "3.11", "3.12"]'), matrix.python-version) }}
69+
6670
- name: Run unit tests
6771
run: |
6872
python -m pytest --durations 20 -sv tests/

examples/applications/embedding-quantization/semantic_search_usearch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from sentence_transformers.quantization import quantize_embeddings, semantic_search_usearch
77

88
# 1. Load the quora corpus with questions
9-
dataset = load_dataset("quora", split="train").map(
9+
dataset = load_dataset("quora", split="train", trust_remote_code=True).map(
1010
lambda batch: {"text": [text for sample in batch["questions"] for text in sample["text"]]},
1111
batched=True,
1212
remove_columns=["questions", "is_duplicate"],
@@ -26,7 +26,7 @@
2626
# 4. Choose a target precision for the corpus embeddings
2727
corpus_precision = "binary"
2828
# Valid options are: "float32", "uint8", "int8", "ubinary", and "binary"
29-
# But usearch only supports "float32", "int8", and "binary"
29+
# But usearch only supports "float32", "int8", "binary" and "ubinary"
3030

3131
# 5. Encode the corpus
3232
full_corpus_embeddings = model.encode(corpus, normalize_embeddings=True, show_progress_bar=True)

pyproject.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "sentence-transformers"
3-
version = "3.2.0"
3+
version = "3.2.1"
44
description = "State-of-the-Art Text Embeddings"
55
license = { text = "Apache 2.0" }
66
readme = "README.md"
@@ -49,8 +49,8 @@ Repository = "https://github.com/UKPLab/sentence-transformers/"
4949

5050
[project.optional-dependencies]
5151
train = ["datasets", "accelerate>=0.20.3"]
52-
onnx = ["optimum[onnxruntime]>=1.23.0"]
53-
onnx-gpu = ["optimum[onnxruntime-gpu]>=1.23.0"]
52+
onnx = ["optimum[onnxruntime]>=1.23.1"]
53+
onnx-gpu = ["optimum[onnxruntime-gpu]>=1.23.1"]
5454
openvino = ["optimum-intel[openvino]>=1.20.0"]
5555
dev = ["datasets", "accelerate>=0.20.3", "pre-commit", "pytest", "pytest-cov"]
5656

@@ -100,4 +100,4 @@ testpaths = [
100100
addopts = "--strict-markers -m 'not slow'"
101101
markers = [
102102
"slow: marks tests as slow"
103-
]
103+
]

sentence_transformers/SentenceTransformer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1718,10 +1718,10 @@ def _load_sbert_model(
17181718

17191719
# Try to initialize the module with a lot of kwargs, but only if the module supports them
17201720
# Otherwise we fall back to the load method
1721-
# try:
1722-
module = module_class(model_name_or_path, cache_dir=cache_folder, backend=self.backend, **kwargs)
1723-
# except TypeError:
1724-
# module = module_class.load(model_name_or_path)
1721+
try:
1722+
module = module_class(model_name_or_path, cache_dir=cache_folder, backend=self.backend, **kwargs)
1723+
except TypeError:
1724+
module = module_class.load(model_name_or_path)
17251725
else:
17261726
# Normalize does not require any files to be loaded
17271727
if module_class == Normalize:

sentence_transformers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
__version__ = "3.2.0"
3+
__version__ = "3.2.1"
44
__MODEL_HUB_ORGANIZATION__ = "sentence-transformers"
55

66
import importlib

sentence_transformers/backend.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,9 @@ def export_optimized_onnx_model(
7878
or not isinstance(model[0], Transformer)
7979
or not isinstance(model[0].auto_model, ORTModelForFeatureExtraction)
8080
):
81-
raise ValueError('The model must be a SentenceTransformer model loaded with `backend="onnx"`.')
81+
raise ValueError(
82+
'The model must be a Transformer-based SentenceTransformer model loaded with `backend="onnx"`.'
83+
)
8284

8385
ort_model: ORTModelForFeatureExtraction = model[0].auto_model
8486
optimizer = ORTOptimizer.from_pretrained(ort_model)
@@ -158,7 +160,9 @@ def export_dynamic_quantized_onnx_model(
158160
or not isinstance(model[0], Transformer)
159161
or not isinstance(model[0].auto_model, ORTModelForFeatureExtraction)
160162
):
161-
raise ValueError('The model must be a SentenceTransformer model loaded with `backend="onnx"`.')
163+
raise ValueError(
164+
'The model must be a Transformer-based SentenceTransformer model loaded with `backend="onnx"`.'
165+
)
162166

163167
ort_model: ORTModelForFeatureExtraction = model[0].auto_model
164168
quantizer = ORTQuantizer.from_pretrained(ort_model)

sentence_transformers/losses/CachedGISTEmbedLoss.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from torch.utils.checkpoint import get_device_states, set_device_states
1111

1212
from sentence_transformers import SentenceTransformer
13-
from sentence_transformers.models import Transformer
13+
from sentence_transformers.models import StaticEmbedding, Transformer
1414

1515

1616
class RandContext:
@@ -139,6 +139,11 @@ def __init__(
139139
trainer.train()
140140
"""
141141
super().__init__()
142+
if isinstance(model[0], StaticEmbedding):
143+
raise ValueError(
144+
"CachedGISTEmbedLoss is not compatible with a SentenceTransformer model based on a StaticEmbedding. "
145+
"Consider using GISTEmbedLoss instead."
146+
)
142147
self.model = model
143148
self.guide = guide
144149
self.temperature = temperature

sentence_transformers/losses/CachedMultipleNegativesRankingLoss.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from torch.utils.checkpoint import get_device_states, set_device_states
1111

1212
from sentence_transformers import SentenceTransformer, util
13+
from sentence_transformers.models import StaticEmbedding
1314

1415

1516
class RandContext:
@@ -145,6 +146,12 @@ def __init__(
145146
trainer.train()
146147
"""
147148
super().__init__()
149+
if isinstance(model[0], StaticEmbedding):
150+
raise ValueError(
151+
"CachedMultipleNegativesRankingLoss is not compatible with a SentenceTransformer model based on a StaticEmbedding. "
152+
"Consider using MultipleNegativesRankingLoss instead."
153+
)
154+
148155
self.model = model
149156
self.scale = scale
150157
self.similarity_fct = similarity_fct

sentence_transformers/losses/CachedMultipleNegativesSymmetricRankingLoss.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from sentence_transformers import SentenceTransformer, util
1212
from sentence_transformers.losses.CachedMultipleNegativesRankingLoss import RandContext
13+
from sentence_transformers.models import StaticEmbedding
1314

1415

1516
def _backward_hook(
@@ -114,6 +115,12 @@ def __init__(
114115
- Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup: https://arxiv.org/pdf/2101.06983.pdf
115116
"""
116117
super().__init__()
118+
if isinstance(model[0], StaticEmbedding):
119+
raise ValueError(
120+
"CachedMultipleNegativesSymmetricRankingLoss is not compatible with a SentenceTransformer model based on a StaticEmbedding. "
121+
"Consider using MultipleNegativesSymmetricRankingLoss instead."
122+
)
123+
117124
self.model = model
118125
self.scale = scale
119126
self.similarity_fct = similarity_fct

sentence_transformers/losses/DenoisingAutoEncoderLoss.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel
88

99
from sentence_transformers import SentenceTransformer
10+
from sentence_transformers.models import StaticEmbedding
1011

1112
logger = logging.getLogger(__name__)
1213

@@ -73,6 +74,12 @@ def __init__(
7374
)
7475
"""
7576
super().__init__()
77+
78+
if isinstance(model[0], StaticEmbedding):
79+
raise ValueError(
80+
"DenoisingAutoEncoderLoss is not compatible with a SentenceTransformer model based on a StaticEmbedding."
81+
)
82+
7683
self.encoder = model # This will be the final model used during the inference time.
7784
self.tokenizer_encoder = model.tokenizer
7885

0 commit comments

Comments
 (0)