feat: add async embedding methods and update tests for output dimensionality for Gemini embedding compat (#1031)

mdrxy · web-flow · commit c1ff72f7b69e · 2025-07-14T15:46:34.000-04:00
diff --git a/libs/genai/langchain_google_genai/embeddings.py b/libs/genai/langchain_google_genai/embeddings.py
@@ -17,7 +17,10 @@
     GoogleGenerativeAIError,
     get_client_info,
 )
-from langchain_google_genai._genai_extension import build_generative_service
+from langchain_google_genai._genai_extension import (
+    build_generative_async_service,
+    build_generative_service,
+)
 
 _MAX_TOKENS_PER_BATCH = 20000
 _DEFAULT_BATCH_SIZE = 100
@@ -29,8 +32,8 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
     To use, you must have either:
 
         1. The ``GOOGLE_API_KEY`` environment variable set with your API key, or
-        2. Pass your API key using the google_api_key kwarg
-        to the GoogleGenerativeAIEmbeddings constructor.
+        2. Pass your API key using the google_api_key kwarg to the
+        GoogleGenerativeAIEmbeddings constructor.
 
     Example:
         .. code-block:: python
@@ -42,6 +45,7 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
     """
 
     client: Any = None  #: :meta private:
+    async_client: Any = None  #: :meta private:
     model: str = Field(
         ...,
         description="The name of the embedding model to use. "
@@ -100,6 +104,13 @@ def validate_environment(self) -> Self:
             client_options=self.client_options,
             transport=self.transport,
         )
+        self.async_client = build_generative_async_service(
+            credentials=self.credentials,
+            api_key=google_api_key,
+            client_info=client_info,
+            client_options=self.client_options,
+            transport=self.transport,
+        )
         return self
 
     @staticmethod
@@ -166,12 +177,12 @@ def _prepare_batches(texts: List[str], batch_size: int) -> List[List[str]]:
     def _prepare_request(
         self,
         text: str,
+        *,
         task_type: Optional[str] = None,
         title: Optional[str] = None,
         output_dimensionality: Optional[int] = None,
     ) -> EmbedContentRequest:
         task_type = self.task_type or task_type or "RETRIEVAL_DOCUMENT"
-        # https://ai.google.dev/api/rest/v1/models/batchEmbedContents#EmbedContentRequest
         request = EmbedContentRequest(
             content={"parts": [{"text": text}]},
             model=self.model,
@@ -190,16 +201,17 @@ def embed_documents(
         titles: Optional[List[str]] = None,
         output_dimensionality: Optional[int] = None,
     ) -> List[List[float]]:
-        """Embed a list of strings. Google Generative AI currently
-        sets a max batch size of 100 strings.
+        """Embed a list of strings using the `batch endpoint <https://ai.google.dev/api/embeddings#method:-models.batchembedcontents>`__.
+
+        Google Generative AI currently sets a max batch size of 100 strings.
 
         Args:
             texts: List[str] The list of strings to embed.
             batch_size: [int] The batch size of embeddings to send to the model
-            task_type: `task_type <https://ai.google.dev/api/rest/v1/TaskType>`__
+            task_type: `task_type <https://ai.google.dev/api/embeddings#tasktype>`__
             titles: An optional list of titles for texts provided.
-            Only applicable when TaskType is ``'RETRIEVAL_DOCUMENT'``.
-            output_dimensionality: Optional `reduced dimension for the output embedding <https://ai.google.dev/api/rest/v1/models/batchEmbedContents#EmbedContentRequest>`__.
+              Only applicable when TaskType is ``'RETRIEVAL_DOCUMENT'``.
+            output_dimensionality: Optional `reduced dimension for the output embedding <https://ai.google.dev/api/embeddings#EmbedContentRequest>`__.
         Returns:
             List of embeddings, one for each text.
         """
@@ -236,25 +248,26 @@ def embed_documents(
     def embed_query(
         self,
         text: str,
+        *,
         task_type: Optional[str] = None,
         title: Optional[str] = None,
         output_dimensionality: Optional[int] = None,
     ) -> List[float]:
-        """Embed a text, using the `non-batch endpoint <https://ai.google.dev/api/rest/v1/models/embedContent#EmbedContentRequest>`__.
+        """Embed a text, using the `non-batch endpoint <https://ai.google.dev/api/embeddings#method:-models.embedcontent>`__.
 
         Args:
             text: The text to embed.
-            task_type: `task_type <https://ai.google.dev/api/rest/v1/TaskType>`__
+            task_type: `task_type <https://ai.google.dev/api/embeddings#tasktype>`__
             title: An optional title for the text.
-            Only applicable when TaskType is ``'RETRIEVAL_DOCUMENT'``.
-            output_dimensionality: Optional `reduced dimension for the output embedding <https://ai.google.dev/api/rest/v1/models/batchEmbedContents#EmbedContentRequest>`__.
+              Only applicable when TaskType is ``'RETRIEVAL_DOCUMENT'``.
+            output_dimensionality: Optional `reduced dimension for the output embedding <https://ai.google.dev/api/embeddings#EmbedContentRequest>`__.
 
         Returns:
             Embedding for the text.
         """
         task_type_to_use = task_type if task_type else self.task_type
         if task_type_to_use is None:
-            task_type_to_use = "RETRIEVAL_QUERY"  # Default to RETRIEVAL_QUERY
+            task_type_to_use = "RETRIEVAL_QUERY"
         try:
             request: EmbedContentRequest = self._prepare_request(
                 text=text,
@@ -266,3 +279,93 @@ def embed_query(
         except Exception as e:
             raise GoogleGenerativeAIError(f"Error embedding content: {e}") from e
         return list(result.embedding.values)
+
+    async def aembed_documents(
+        self,
+        texts: List[str],
+        *,
+        batch_size: int = _DEFAULT_BATCH_SIZE,
+        task_type: Optional[str] = None,
+        titles: Optional[List[str]] = None,
+        output_dimensionality: Optional[int] = None,
+    ) -> List[List[float]]:
+        """Embed a list of strings using the `batch endpoint <https://ai.google.dev/api/embeddings#method:-models.batchembedcontents>`__.
+
+        Google Generative AI currently sets a max batch size of 100 strings.
+
+        Args:
+            texts: List[str] The list of strings to embed.
+            batch_size: [int] The batch size of embeddings to send to the model
+            task_type: `task_type <https://ai.google.dev/api/embeddings#tasktype>`__
+            titles: An optional list of titles for texts provided.
+                Only applicable when TaskType is ``'RETRIEVAL_DOCUMENT'``.
+            output_dimensionality: Optional `reduced dimension for the output embedding <https://ai.google.dev/api/embeddings#EmbedContentRequest>`__.
+        Returns:
+            List of embeddings, one for each text.
+        """
+        embeddings: List[List[float]] = []
+        batch_start_index = 0
+        for batch in GoogleGenerativeAIEmbeddings._prepare_batches(texts, batch_size):
+            if titles:
+                titles_batch = titles[
+                    batch_start_index : batch_start_index + len(batch)
+                ]
+                batch_start_index += len(batch)
+            else:
+                titles_batch = [None] * len(batch)  # type: ignore[list-item]
+
+            requests = [
+                self._prepare_request(
+                    text=text,
+                    task_type=task_type,
+                    title=title,
+                    output_dimensionality=output_dimensionality,
+                )
+                for text, title in zip(batch, titles_batch)
+            ]
+
+            try:
+                result = await self.async_client.batch_embed_contents(
+                    BatchEmbedContentsRequest(requests=requests, model=self.model)
+                )
+            except Exception as e:
+                raise GoogleGenerativeAIError(f"Error embedding content: {e}") from e
+            embeddings.extend([list(e.values) for e in result.embeddings])
+        return embeddings
+
+    async def aembed_query(
+        self,
+        text: str,
+        *,
+        task_type: Optional[str] = None,
+        title: Optional[str] = None,
+        output_dimensionality: Optional[int] = None,
+    ) -> List[float]:
+        """Embed a text, using the `non-batch endpoint <https://ai.google.dev/api/embeddings#method:-models.embedcontent>`__.
+
+        Args:
+            text: The text to embed.
+            task_type: `task_type <https://ai.google.dev/api/embeddings#tasktype>`__
+            title: An optional title for the text.
+                Only applicable when TaskType is ``'RETRIEVAL_DOCUMENT'``.
+            output_dimensionality: Optional `reduced dimension for the output embedding <https://ai.google.dev/api/embeddings#EmbedContentRequest>`__.
+
+        Returns:
+            Embedding for the text.
+        """
+        task_type_to_use = task_type if task_type else self.task_type
+        if task_type_to_use is None:
+            task_type_to_use = "RETRIEVAL_QUERY"
+        try:
+            request: EmbedContentRequest = self._prepare_request(
+                text=text,
+                task_type=task_type,
+                title=title,
+                output_dimensionality=output_dimensionality,
+            )
+            result: EmbedContentResponse = await self.async_client.embed_content(
+                request
+            )
+        except Exception as e:
+            raise GoogleGenerativeAIError(f"Error embedding content: {e}") from e
+        return list(result.embedding.values)
diff --git a/libs/genai/tests/integration_tests/test_chat_models.py b/libs/genai/tests/integration_tests/test_chat_models.py
@@ -130,13 +130,20 @@ def test_chat_google_genai_invoke_with_image() -> None:
     """Test invoke tokens with image from ChatGoogleGenerativeAI."""
     llm = ChatGoogleGenerativeAI(model=_IMAGE_OUTPUT_MODEL)
 
-    result = llm.invoke(
-        "Generate an image of a cat. Then, say meow!",
-        config=dict(tags=["meow"]),
-        generation_config=dict(
-            top_k=2, top_p=1, temperature=0.7, response_modalities=["TEXT", "IMAGE"]
-        ),
-    )
+    for _ in range(3):
+        result = llm.invoke(
+            "Generate an image of a cat. Then, say meow!",
+            config=dict(tags=["meow"]),
+            generation_config=dict(
+                top_k=2, top_p=1, temperature=0.7, response_modalities=["TEXT", "IMAGE"]
+            ),
+        )
+        if (
+            isinstance(result.content, list)
+            and len(result.content) > 0
+            and isinstance(result.content[0], dict)
+        ):
+            break
     assert isinstance(result, AIMessage)
     assert isinstance(result.content, list)
     assert isinstance(result.content[0], dict)
@@ -155,11 +162,18 @@ def test_chat_google_genai_invoke_with_modalities() -> None:
         response_modalities=[Modality.TEXT, Modality.IMAGE],  # type: ignore[list-item]
     )
 
-    result = llm.invoke(
-        "Generate an image of a cat. Then, say meow!",
-        config=dict(tags=["meow"]),
-        generation_config=dict(top_k=2, top_p=1, temperature=0.7),
-    )
+    for _ in range(3):
+        result = llm.invoke(
+            "Generate an image of a cat. Then, say meow!",
+            config=dict(tags=["meow"]),
+            generation_config=dict(top_k=2, top_p=1, temperature=0.7),
+        )
+        if (
+            isinstance(result.content, list)
+            and len(result.content) > 0
+            and isinstance(result.content[0], dict)
+        ):
+            break
     assert isinstance(result, AIMessage)
     assert isinstance(result.content, list)
     assert isinstance(result.content[0], dict)
diff --git a/libs/genai/tests/integration_tests/test_embeddings.py b/libs/genai/tests/integration_tests/test_embeddings.py
@@ -5,7 +5,8 @@
 from langchain_google_genai._common import GoogleGenerativeAIError
 from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
 
-_MODEL = "models/embedding-001"
+_MODEL = "models/gemini-embedding-001"
+_OUTPUT_DIMENSIONALITY = 768
 
 
 @pytest.mark.parametrize(
@@ -19,7 +20,7 @@
 def test_embed_query_different_lengths(query: str) -> None:
     """Test embedding queries of different lengths."""
     model = GoogleGenerativeAIEmbeddings(model=_MODEL)
-    result = model.embed_query(query)
+    result = model.embed_query(query, output_dimensionality=_OUTPUT_DIMENSIONALITY)
     assert len(result) == 768
     assert isinstance(result, list)
 
@@ -35,7 +36,9 @@ def test_embed_query_different_lengths(query: str) -> None:
 async def test_aembed_query_different_lengths(query: str) -> None:
     """Test embedding queries of different lengths."""
     model = GoogleGenerativeAIEmbeddings(model=_MODEL)
-    result = await model.aembed_query(query)
+    result = await model.aembed_query(
+        query, output_dimensionality=_OUTPUT_DIMENSIONALITY
+    )
     assert len(result) == 768
     assert isinstance(result, list)
 
@@ -45,7 +48,9 @@ def test_embed_documents() -> None:
     model = GoogleGenerativeAIEmbeddings(
         model=_MODEL,
     )
-    result = model.embed_documents(["Hello world", "Good day, world"])
+    result = model.embed_documents(
+        ["Hello world", "Good day, world"], output_dimensionality=_OUTPUT_DIMENSIONALITY
+    )
     assert len(result) == 2
     assert len(result[0]) == 768
     assert len(result[1]) == 768
@@ -58,7 +63,9 @@ async def test_aembed_documents() -> None:
     model = GoogleGenerativeAIEmbeddings(
         model=_MODEL,
     )
-    result = await model.aembed_documents(["Hello world", "Good day, world"])
+    result = await model.aembed_documents(
+        ["Hello world", "Good day, world"], output_dimensionality=_OUTPUT_DIMENSIONALITY
+    )
     assert len(result) == 2
     assert len(result[0]) == 768
     assert len(result[1]) == 768
@@ -69,23 +76,25 @@ async def test_aembed_documents() -> None:
 def test_invalid_model_error_handling() -> None:
     """Test error handling with an invalid model name."""
     with pytest.raises(GoogleGenerativeAIError):
-        GoogleGenerativeAIEmbeddings(model="invalid_model").embed_query("Hello world")
+        GoogleGenerativeAIEmbeddings(model="invalid_model").embed_query(
+            "Hello world", output_dimensionality=_OUTPUT_DIMENSIONALITY
+        )
 
 
 def test_invalid_api_key_error_handling() -> None:
     """Test error handling with an invalid API key."""
     with pytest.raises(GoogleGenerativeAIError):
         GoogleGenerativeAIEmbeddings(
             model=_MODEL, google_api_key=SecretStr("invalid_key")
-        ).embed_query("Hello world")
+        ).embed_query("Hello world", output_dimensionality=_OUTPUT_DIMENSIONALITY)
 
 
 def test_embed_documents_consistency() -> None:
     """Test embedding consistency for the same document."""
     model = GoogleGenerativeAIEmbeddings(model=_MODEL)
     doc = "Consistent document for testing"
-    result1 = model.embed_documents([doc])
-    result2 = model.embed_documents([doc])
+    result1 = model.embed_documents([doc], output_dimensionality=_OUTPUT_DIMENSIONALITY)
+    result2 = model.embed_documents([doc], output_dimensionality=_OUTPUT_DIMENSIONALITY)
     assert result1 == result2
 
 
@@ -94,8 +103,12 @@ def test_embed_documents_quality() -> None:
     model = GoogleGenerativeAIEmbeddings(model=_MODEL)
     similar_docs = ["Document A", "Similar Document A"]
     dissimilar_docs = ["Document A", "Completely Different Zebra"]
-    similar_embeddings = model.embed_documents(similar_docs)
-    dissimilar_embeddings = model.embed_documents(dissimilar_docs)
+    similar_embeddings = model.embed_documents(
+        similar_docs, output_dimensionality=_OUTPUT_DIMENSIONALITY
+    )
+    dissimilar_embeddings = model.embed_documents(
+        dissimilar_docs, output_dimensionality=_OUTPUT_DIMENSIONALITY
+    )
     similar_distance = np.linalg.norm(
         np.array(similar_embeddings[0]) - np.array(similar_embeddings[1])
     )
@@ -109,16 +122,20 @@ def test_embed_query_task_type() -> None:
     """Test for task_type"""
 
     embeddings = GoogleGenerativeAIEmbeddings(model=_MODEL, task_type="clustering")
-    emb = embeddings.embed_query("How does alphafold work?", output_dimensionality=768)
+    emb = embeddings.embed_query(
+        "How does alphafold work?", output_dimensionality=_OUTPUT_DIMENSIONALITY
+    )
 
     embeddings2 = GoogleGenerativeAIEmbeddings(model=_MODEL)
     emb2 = embeddings2.embed_query(
-        "How does alphafold work?", task_type="clustering", output_dimensionality=768
+        "How does alphafold work?",
+        task_type="clustering",
+        output_dimensionality=_OUTPUT_DIMENSIONALITY,
     )
 
     embeddings3 = GoogleGenerativeAIEmbeddings(model=_MODEL)
     emb3 = embeddings3.embed_query(
-        "How does alphafold work?", output_dimensionality=768
+        "How does alphafold work?", output_dimensionality=_OUTPUT_DIMENSIONALITY
     )
 
     assert emb == emb2