@@ -85,11 +85,12 @@ def run_text_embedding_genai(
8585 documents : list [str ],
8686 config : TextEmbeddingPipeline .Config | None = None ,
8787 task : Literal ["embed_documents" , "embed_query" ] = "embed_documents" ,
88+ device : str = "CPU" ,
8889):
8990 if not config :
9091 config = TextEmbeddingPipeline .Config ()
9192
92- pipeline = TextEmbeddingPipeline (models_path , "CPU" , config )
93+ pipeline = TextEmbeddingPipeline (models_path , device , config )
9394
9495 if config .batch_size :
9596 documents = documents [: config .batch_size ]
@@ -170,12 +171,12 @@ def run_qwen3_embedding_optimum(
170171MAX_EMBEDDING_ERROR = 2e-6 if sys .platform != "darwin" else 0.02 # ARM64 macs have different results
171172
172173
173- def validate_embedding_results (result_1 : EmbeddingResult , result_2 : EmbeddingResult ):
174+ def validate_embedding_results (result_1 : EmbeddingResult , result_2 : EmbeddingResult , threshold : float = MAX_EMBEDDING_ERROR ):
174175 np_result_1 = np .array (result_1 )
175176 np_result_2 = np .array (result_2 )
176177
177178 max_error = np .abs (np_result_1 - np_result_2 ).max ()
178- assert max_error < MAX_EMBEDDING_ERROR , f"Max error: { max_error } is greater than allowed { MAX_EMBEDDING_ERROR } "
179+ assert max_error < threshold , f"Max error: { max_error } is greater than allowed { threshold } "
179180
180181
181182def run_text_embedding_pipeline_with_ref (
@@ -334,6 +335,53 @@ def test_qwen3_embedding(download_and_convert_embeddings_models, dataset_documen
334335 validate_embedding_results (embeddings_genai , embeddings_opt .tolist ())
335336
336337
338+ @pytest .mark .parametrize ("download_and_convert_embeddings_models" , ["Qwen/Qwen3-Embedding-0.6B" ], indirect = True )
339+ @pytest .mark .parametrize (
340+ "config" ,
341+ [
342+ TextEmbeddingPipeline .Config (normalize = False , pooling_type = TextEmbeddingPipeline .PoolingType .LAST_TOKEN , padding_side = "left" , batch_size = 1 ),
343+ TextEmbeddingPipeline .Config (normalize = False , pooling_type = TextEmbeddingPipeline .PoolingType .LAST_TOKEN , batch_size = 1 ),
344+ ],
345+ )
346+ @pytest .mark .precommit
347+ @pytest .mark .xfail (condition = (sys .platform == "darwin" ), reason = "Ticket - 174635" )
348+ def test_qwen3_embedding_set_batch (download_and_convert_embeddings_models , dataset_documents , config ):
349+ opt_model , hf_tokenizer , models_path = download_and_convert_embeddings_models
350+
351+ docs_to_embed = dataset_documents [: config .batch_size ] if config .batch_size else dataset_documents
352+ embeddings_opt = run_qwen3_embedding_optimum (opt_model , hf_tokenizer , docs_to_embed , config .padding_side )
353+ refs_to_validate = embeddings_opt [: config .batch_size ] if config .batch_size else embeddings_opt
354+ embeddings_genai = run_text_embedding_genai (models_path , docs_to_embed , config , "embed_documents" )
355+ target_to_validate = embeddings_genai [: config .batch_size ] if config .batch_size else embeddings_genai
356+ validate_embedding_results (target_to_validate , refs_to_validate .tolist ())
357+
358+
359+ @pytest .mark .parametrize ("download_and_convert_embeddings_models" , ["Qwen/Qwen3-Embedding-0.6B" ], indirect = True )
360+ @pytest .mark .parametrize (
361+ "config" ,
362+ [
363+ TextEmbeddingPipeline .Config (normalize = False , pooling_type = TextEmbeddingPipeline .PoolingType .LAST_TOKEN , padding_side = "left" , batch_size = 1 , max_length = 200 , pad_to_max_length = True ),
364+ TextEmbeddingPipeline .Config (normalize = False , pooling_type = TextEmbeddingPipeline .PoolingType .LAST_TOKEN , batch_size = 1 , max_length = 200 , pad_to_max_length = True ),
365+ ],
366+ )
367+ @pytest .mark .precommit
368+ @pytest .mark .skipif (
369+ sys .platform == "darwin" or platform .machine () in ["aarch64" , "arm64" , "ARM64" ],
370+ reason = "NPU plugin is available only on Linux and Windows x86_64" ,
371+ )
372+ @pytest .mark .xfail (condition = (sys .platform == "darwin" ), reason = "Ticket - 174635" )
373+ def test_qwen3_embedding_npu (download_and_convert_embeddings_models , dataset_documents , config ):
374+ opt_model , hf_tokenizer , models_path = download_and_convert_embeddings_models
375+
376+ docs_to_embed = dataset_documents [: config .batch_size ] if config .batch_size else dataset_documents
377+ embeddings_opt = run_qwen3_embedding_optimum (opt_model , hf_tokenizer , docs_to_embed , config .padding_side )
378+ refs_to_validate = embeddings_opt [: config .batch_size ] if config .batch_size else embeddings_opt
379+ embeddings_genai = run_text_embedding_genai (models_path , docs_to_embed , config , "embed_documents" , device = "NPU" )
380+ target_to_validate = embeddings_genai [: config .batch_size ] if config .batch_size else embeddings_genai
381+ threshold = 0.1
382+ validate_embedding_results (target_to_validate , refs_to_validate .tolist (), threshold )
383+
384+
337385@pytest .mark .parametrize ("download_and_convert_embeddings_models" , EMBEDDINGS_TEST_MODELS , indirect = True )
338386@pytest .mark .parametrize (
339387 "config" ,
0 commit comments