chore: fix style issues and typings

kmruiz · kmruiz · commit 4284c0972fd0 · 2025-10-20T16:20:48.000+02:00
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -75,7 +75,6 @@
     "@typescript-eslint/parser": "^8.44.0",
     "@vitest/coverage-v8": "^3.2.4",
     "@vitest/eslint-plugin": "^1.3.4",
-    "ai": "^5.0.72",
     "duplexpair": "^1.0.2",
     "eslint": "^9.34.0",
     "eslint-config-prettier": "^10.1.8",
@@ -103,6 +102,7 @@
     "@mongodb-js/devtools-proxy-support": "^0.5.3",
     "@mongosh/arg-parser": "^3.19.0",
     "@mongosh/service-provider-node-driver": "^3.17.0",
+    "ai": "^5.0.72",
     "bson": "^6.10.4",
     "express": "^5.1.0",
     "lru-cache": "^11.1.0",
diff --git a/src/common/search/embeddingsProvider.ts b/src/common/search/embeddingsProvider.ts
@@ -6,8 +6,7 @@ import assert from "assert";
 import { createFetch } from "@mongodb-js/devtools-proxy-support";
 import { z } from "zod";
 
-const zEmbeddingsInput = z.string();
-type EmbeddingsInput = z.infer<typeof zEmbeddingsInput>;
+type EmbeddingsInput = string;
 type Embeddings = number[];
 
 type EmbeddingParameters = {
@@ -20,7 +19,7 @@ interface EmbeddingsProvider<SupportedModels extends string> {
     embed(modelId: SupportedModels, content: EmbeddingsInput[], parameters: EmbeddingParameters): Promise<Embeddings[]>;
 }
 
-export const zVoyageModels = z.enum(["voyage-3-large", "voyage-3.5", "voyage-3.5-lite", "voyage-code-3"]);
+const zVoyageModels = z.enum(["voyage-3-large", "voyage-3.5", "voyage-3.5-lite", "voyage-code-3"]);
 
 type VoyageModels = z.infer<typeof zVoyageModels>;
 class VoyageEmbeddingsProvider implements EmbeddingsProvider<VoyageModels> {
diff --git a/src/common/search/vectorSearchEmbeddingsManager.ts b/src/common/search/vectorSearchEmbeddingsManager.ts
@@ -273,15 +273,15 @@ export class VectorSearchEmbeddingsManager {
             return providerEmbeddings;
         }
 
-        const oneDocument: Document = await provider
+        const oneDocument: Document = (await provider
             .aggregate(database, collection, [{ $sample: { size: 1 } }, { $project: { embeddings: path } }])
-            .next();
+            .next()) as Document;
 
         if (!oneDocument) {
             return providerEmbeddings;
         }
 
-        const sampleEmbeddings = oneDocument.embeddings;
+        const sampleEmbeddings = oneDocument.embeddings as number[] | BSON.Binary;
         const adaptedEmbeddings = providerEmbeddings.map((embeddings) => {
             // now map based on the sample embeddings
             if (Array.isArray(sampleEmbeddings) && Array.isArray(embeddings)) {
diff --git a/src/tools/mongodb/read/aggregate.ts b/src/tools/mongodb/read/aggregate.ts
@@ -13,7 +13,7 @@ import { operationWithFallback } from "../../../helpers/operationWithFallback.js
 import { AGG_COUNT_MAX_TIME_MS_CAP, ONE_MB, CURSOR_LIMITS_TO_LLM_TEXT } from "../../../helpers/constants.js";
 import { zEJSON } from "../../args.js";
 import { LogId } from "../../../common/logger.js";
-import { SupportedEmbeddingModels, zSupportedEmbeddingModels } from "../../../common/search/embeddingsProvider.js";
+import { zSupportedEmbeddingModels } from "../../../common/search/embeddingsProvider.js";
 
 const AnyStage = zEJSON();
 const VectorSearchStage = z.object({
@@ -47,9 +47,11 @@ const VectorSearchStage = z.object({
             filter: zEJSON()
                 .optional()
                 .describe("MQL filter that can only use pre-filter fields from the index definition."),
-            embeddingModel: zSupportedEmbeddingModels.describe(
-                "The embedding model to use to generate embeddings before search. Note to LLM: If unsure, ask the user before providing one."
-            ),
+            embeddingModel: zSupportedEmbeddingModels
+                .optional()
+                .describe(
+                    "The embedding model to use to generate embeddings before search. Note to LLM: If unsure, ask the user before providing one."
+                ),
         })
         .passthrough(),
 });
@@ -224,32 +226,36 @@ export class AggregateTool extends MongoDBToolBase {
         pipeline: Document[];
     }): Promise<Document[]> {
         for (const stage of pipeline) {
-            if (stage.$vectorSearch) {
-                if ("queryVector" in stage.$vectorSearch && Array.isArray(stage.$vectorSearch.queryVector)) {
-                    // if it's already embeddings, don't do anything
+            if ("$vectorSearch" in stage) {
+                const { $vectorSearch: vectorSearchStage } = stage as z.infer<typeof VectorSearchStage>;
+
+                if (Array.isArray(vectorSearchStage.queryVector)) {
                     continue;
                 }
 
-                if (!("embeddingModel" in stage.$vectorSearch)) {
+                if (!vectorSearchStage.embeddingModel) {
                     throw new MongoDBError(
                         ErrorCodes.AtlasVectorSearchInvalidQuery,
                         "embeddingModel is mandatory if queryVector is a raw string."
                     );
                 }
 
-                const model = stage.$vectorSearch.embeddingModel as SupportedEmbeddingModels;
-                delete stage.$vectorSearch.embeddingModel;
+                const model = vectorSearchStage.embeddingModel;
+                delete vectorSearchStage.embeddingModel;
 
                 const [embeddings] = await this.session.vectorSearchEmbeddingsManager.generateEmbeddings({
                     database,
                     collection,
-                    path: stage.$vectorSearch.path,
+                    path: vectorSearchStage.path,
                     model,
-                    rawValues: stage.$vectorSearch.queryVector,
+                    rawValues: [vectorSearchStage.queryVector],
                     inputType: "query",
                 });
 
-                stage.$vectorSearch.queryVector = embeddings;
+                // $vectorSearch.queryVector can be a BSON.Binary: that it's not either number or an array.
+                // It's not exactly valid from the LLM perspective. That's why we overwrite the
+                // stage in an untyped way, as what we expose and what we can use are different.
+                vectorSearchStage.queryVector = embeddings as number[];
             }
         }