diff --git a/libs/langchain/src/agents/middleware/filesystemFileSearch.ts b/libs/langchain/src/agents/middleware/filesystemFileSearch.ts new file mode 100644 index 000000000000..d67d89708568 --- /dev/null +++ b/libs/langchain/src/agents/middleware/filesystemFileSearch.ts @@ -0,0 +1,598 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { execFile } from "node:child_process"; +import { promisify } from "node:util"; + +import { z } from "zod/v3"; +import { tool } from "@langchain/core/tools"; + +import { createMiddleware } from "../index.js"; + +const execFileAsync = promisify(execFile); + +const OUTPUT_MODES = ["files_with_matches", "content", "count"] as const; +type OutputMode = (typeof OUTPUT_MODES)[number]; + +/** + * Expands brace patterns like `*.{ts,tsx}` into a list of globs. + */ +function expandIncludePatterns(pattern: string): string[] | null { + if (pattern.includes("}") && !pattern.includes("{")) { + return null; + } + + const expanded: string[] = []; + + function expand(current: string): void { + const start = current.indexOf("{"); + if (start === -1) { + expanded.push(current); + return; + } + + const end = current.indexOf("}", start); + if (end === -1) { + throw new Error("Unmatched brace"); + } + + const prefix = current.slice(0, start); + const suffix = current.slice(end + 1); + const inner = current.slice(start + 1, end); + if (!inner) { + throw new Error("Empty brace pattern"); + } + + for (const option of inner.split(",")) { + expand(prefix + option + suffix); + } + } + + try { + expand(pattern); + } catch { + return null; + } + + return expanded; +} + +/** + * Validates glob pattern used for include filters. + */ +function isValidIncludePattern(pattern: string): boolean { + if (!pattern) { + return false; + } + + if ( + pattern.includes("\x00") || + pattern.includes("\n") || + pattern.includes("\r") + ) { + return false; + } + + const expanded = expandIncludePatterns(pattern); + if (!expanded) { + return false; + } + + try { + // Validate that each expanded pattern can be converted to a regex + for (const candidate of expanded) { + // Simple validation - check if it's a valid glob pattern + // In a real implementation, you might want to use a proper glob library + new RegExp(candidate.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")); + } + } catch { + return false; + } + + return true; +} + +/** + * Returns true if the basename matches the include pattern. + */ +function matchIncludePattern(basename: string, pattern: string): boolean { + const expanded = expandIncludePatterns(pattern); + if (!expanded) { + return false; + } + + return expanded.some((candidate) => { + // Simple glob matching - convert glob pattern to regex + const regexPattern = candidate + .replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + .replace(/\\\*/g, ".*") + .replace(/\\\?/g, "."); + const regex = new RegExp(`^${regexPattern}$`); + return regex.test(basename); + }); +} + +/** + * Simple glob matching function that handles ** patterns + */ +function matchGlob(filePath: string, pattern: string): boolean { + // Handle ** pattern - matches across directory boundaries + if (pattern.includes("**")) { + // Convert glob pattern to regex + // First, escape all special regex characters + let regexPattern = pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + + // Replace escaped ** with .* (matches any characters including slashes) + regexPattern = regexPattern.replace(/\\\*\\\*/g, ".*"); + + // Replace escaped single * with [^/]* (matches any characters except slashes) + regexPattern = regexPattern.replace(/\\\*/g, "[^/]*"); + + // Replace escaped ? with . (matches any single character except slashes) + regexPattern = regexPattern.replace(/\\\?/g, "[^/]"); + + const regex = new RegExp(`^${regexPattern}$`); + return regex.test(filePath); + } + + // For non-** patterns, check both full path and basename + const basename = path.basename(filePath); + let regexPattern = pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + regexPattern = regexPattern.replace(/\\\*/g, "[^/]*"); + regexPattern = regexPattern.replace(/\\\?/g, "[^/]"); + const regex = new RegExp(`^${regexPattern}$`); + return regex.test(filePath) || regex.test(basename); +} + +/** + * Recursively find files matching a glob pattern + */ +async function findFilesMatchingPattern( + basePath: string, + pattern: string, + rootPath: string +): Promise> { + const results: Array<{ virtualPath: string; modifiedAt: string }> = []; + + async function walkDir( + dirPath: string, + relativePath: string = "" + ): Promise { + let entries; + try { + entries = await fs.readdir(dirPath, { withFileTypes: true }); + } catch { + return; + } + + for (const entry of entries) { + const fullPath = path.join(dirPath, entry.name); + const relativeFilePath = path.join(relativePath, entry.name); + + if (entry.isDirectory()) { + await walkDir(fullPath, relativeFilePath); + } else if (entry.isFile()) { + // Check if file matches pattern + if (matchGlob(relativeFilePath, pattern)) { + try { + const stats = await fs.stat(fullPath); + const virtualPath = `/${path + .relative(rootPath, fullPath) + .replace(/\\/g, "/")}`; + const modifiedAt = new Date(stats.mtime).toISOString(); + results.push({ virtualPath, modifiedAt }); + } catch { + // Skip files we can't stat + } + } + } + } + } + + await walkDir(basePath); + return results; +} + +export interface FilesystemFileSearchMiddlewareConfig { + /** + * Root directory to search. + */ + rootPath: string; + /** + * Whether to use ripgrep for search. + * Falls back to Node.js based search if ripgrep unavailable. + * + * @see https://github.com/BurntSushi/ripgrep + * @default false + */ + useRipgrep?: boolean; + /** + * Maximum file size to search in MB (default: 10). + */ + maxFileSizeMb?: number; +} + +/** + * Provides Glob and Grep search over filesystem files. + * + * This middleware adds two tools that search through local filesystem: + * - Glob: Fast file pattern matching by file path + * - Grep: Fast content search using ripgrep or JavaScript fallback + * + * @example + * ```typescript + * import { createAgent } from "langchain"; + * import { filesystemFileSearchMiddleware } from "langchain/agents/middleware"; + * + * const agent = createAgent({ + * model: model, + * tools: [], + * middleware: [ + * filesystemFileSearchMiddleware({ rootPath: "/workspace" }), + * ], + * }); + * ``` + */ +export function filesystemFileSearchMiddleware( + config: FilesystemFileSearchMiddlewareConfig +) { + const rootPath = path.resolve(config.rootPath); + const useRipgrep = config.useRipgrep ?? false; + const maxFileSizeBytes = (config.maxFileSizeMb ?? 10) * 1024 * 1024; + + /** + * Validates and resolves a virtual path to filesystem path. + */ + function validateAndResolvePath(virtualPath: string): string { + // Normalize path + let normalizedPath = virtualPath; + if (!normalizedPath.startsWith("/")) { + normalizedPath = `/${normalizedPath}`; + } + + // Check for path traversal + if (normalizedPath.includes("..") || normalizedPath.includes("~")) { + throw new Error("Path traversal not allowed"); + } + + // Convert virtual path to filesystem path + const relative = normalizedPath.replace(/^\//, ""); + const fullPath = path.resolve(rootPath, relative); + + // Ensure path is within root + if (!fullPath.startsWith(rootPath)) { + throw new Error(`Path outside root directory: ${normalizedPath}`); + } + + return fullPath; + } + + /** + * Search using ripgrep subprocess. + */ + async function ripgrepSearch( + pattern: string, + basePath: string, + include?: string + ): Promise>> { + try { + const baseFull = validateAndResolvePath(basePath); + const stats = await fs.stat(baseFull); + if (!stats.isDirectory()) { + return new Map(); + } + } catch { + return new Map(); + } + + const cmd = ["rg", "--json"]; + if (include) { + cmd.push("--glob", include); + } + cmd.push("--", pattern, validateAndResolvePath(basePath)); + + try { + const { stdout } = await execFileAsync("rg", cmd.slice(1), { + timeout: 30000, + maxBuffer: 10 * 1024 * 1024, + }); + + const results = new Map>(); + for (const line of stdout.split("\n")) { + if (!line.trim()) continue; + try { + const data = JSON.parse(line); + if (data.type === "match") { + const filePath = data.data.path.text; + const virtualPath = `/${path + .relative(rootPath, filePath) + .replace(/\\/g, "/")}`; + const lineNum = data.data.line_number; + const lineText = data.data.lines.text.replace(/\n$/, ""); + + if (!results.has(virtualPath)) { + results.set(virtualPath, []); + } + results.get(virtualPath)!.push([lineNum, lineText]); + } + } catch { + // Skip invalid JSON lines + continue; + } + } + return results; + } catch { + // Fallback to JavaScript search if ripgrep unavailable or times out + return nodeSearch(pattern, basePath, include); + } + } + + /** + * Search using JavaScript regex (fallback). + */ + async function nodeSearch( + pattern: string, + basePath: string, + include?: string + ): Promise>> { + try { + const baseFull = validateAndResolvePath(basePath); + const stats = await fs.stat(baseFull); + if (!stats.isDirectory()) { + return new Map(); + } + } catch { + return new Map(); + } + + let regex: RegExp; + try { + regex = new RegExp(pattern); + } catch { + return new Map(); + } + + const results = new Map>(); + + async function walkDir(dirPath: string): Promise { + let entries; + try { + entries = await fs.readdir(dirPath, { withFileTypes: true }); + } catch { + return; + } + + for (const entry of entries) { + const fullPath = path.join(dirPath, entry.name); + + if (entry.isDirectory()) { + await walkDir(fullPath); + continue; + } + + if (!entry.isFile()) { + continue; + } + + /** + * Check include filter + */ + if (include && !matchIncludePattern(entry.name, include)) { + continue; + } + + /** + * Skip files that are too large + */ + let stats; + try { + stats = await fs.stat(fullPath); + if (stats.size > maxFileSizeBytes) { + continue; + } + } catch { + continue; + } + + let content: string; + try { + content = await fs.readFile(fullPath, "utf-8"); + } catch { + /** + * Skip files we can't read + */ + continue; + } + + // Search content + const lines = content.split("\n"); + for (let i = 0; i < lines.length; i++) { + if (regex.test(lines[i])) { + const virtualPath = `/${path + .relative(rootPath, fullPath) + .replace(/\\/g, "/")}`; + if (!results.has(virtualPath)) { + results.set(virtualPath, []); + } + results.get(virtualPath)!.push([i + 1, lines[i]]); + } + } + } + } + + await walkDir(validateAndResolvePath(basePath)); + return results; + } + + /** + * Format grep results based on output mode. + * + * @param results - The results of the grep search. + * @param outputMode - The output mode to use. + * @returns The formatted results. + */ + function formatGrepResults( + results: Map, + outputMode: OutputMode + ): string { + const sortedFiles = Array.from(results.keys()).sort(); + + if (outputMode === "files_with_matches") { + return sortedFiles.join("\n") || "No matches found"; + } + + if (outputMode === "content") { + const lines: string[] = []; + for (const filePath of sortedFiles) { + const matches = results.get(filePath)!; + for (const [lineNum, line] of matches) { + lines.push(`${filePath}:${lineNum}:${line}`); + } + } + return lines.join("\n") || "No matches found"; + } + + if (outputMode === "count") { + const lines: string[] = []; + for (const filePath of sortedFiles) { + const count = results.get(filePath)!.length; + lines.push(`${filePath}:${count}`); + } + return lines.join("\n") || "No matches found"; + } + + return sortedFiles.join("\n") || "No matches found"; + } + + const globSearch = tool( + async ({ pattern, path: searchPath = "/" }) => { + try { + const baseFull = validateAndResolvePath(searchPath); + const stats = await fs.stat(baseFull); + if (!stats.isDirectory()) { + return "No files found"; + } + } catch { + return "No files found"; + } + + const matching = await findFilesMatchingPattern( + validateAndResolvePath(searchPath), + pattern, + rootPath + ); + + if (matching.length === 0) { + return "No files found"; + } + + // Sort by modification time (most recent first) + matching.sort((a, b) => b.modifiedAt.localeCompare(a.modifiedAt)); + + return matching.map((m) => m.virtualPath).join("\n"); + }, + { + name: "glob_search", + description: `Fast file pattern matching tool that works with any codebase size. + +Supports glob patterns like **/*.js or src/**/*.ts. +Returns matching file paths sorted by modification time. +Use this tool when you need to find files by name patterns.`, + schema: z.object({ + pattern: z + .string() + .describe("The glob pattern to match files against."), + path: z + .string() + .default("/") + .describe( + "The directory to search in. If not specified, searches from root." + ), + }), + } + ); + + const grepSearch = tool( + async ({ + pattern, + path: searchPath = "/", + include = undefined, + output_mode = "files_with_matches", + }) => { + // Compile regex pattern (for validation) + try { + new RegExp(pattern); + } catch (e) { + const errorMessage = + e && typeof e === "object" && "message" in e + ? String(e.message) + : String(e); + return `Invalid regex pattern: ${errorMessage}`; + } + + if (include && !isValidIncludePattern(include)) { + return "Invalid include pattern"; + } + + // Try ripgrep first if enabled + let results: Map | undefined; + if (useRipgrep) { + try { + results = await ripgrepSearch(pattern, searchPath, include); + } catch { + // Fallback to JavaScript search + results = undefined; + } + } + + // JavaScript fallback if ripgrep failed or is disabled + if (typeof results === "undefined") { + results = await nodeSearch(pattern, searchPath, include); + } + + if (results.size === 0) { + return "No matches found"; + } + + // Format output based on mode + return formatGrepResults(results, output_mode); + }, + { + name: "grep_search", + description: `Fast content search tool that works with any codebase size. + +Searches file contents using regular expressions. Supports full regex +syntax and filters files by pattern with the include parameter.`, + schema: z.object({ + pattern: z + .string() + .describe( + "The regular expression pattern to search for in file contents." + ), + path: z + .string() + .default("/") + .describe( + "The directory to search in. If not specified, searches from root." + ), + include: z + .string() + .optional() + .describe('File pattern to filter (e.g., "*.js", "*.{ts,tsx}").'), + output_mode: z + .enum(OUTPUT_MODES) + .default("files_with_matches") + .describe( + `Output format: +- "files_with_matches": Only file paths containing matches (default) +- "content": Matching lines with file:line:content format +- "count": Count of matches per file` + ), + }), + } + ); + + return createMiddleware({ + name: "filesystemFileSearchMiddleware", + tools: [globSearch, grepSearch], + }); +} diff --git a/libs/langchain/src/agents/middleware/index.ts b/libs/langchain/src/agents/middleware/index.ts index 30f3b1de4979..ad7bdb4bfb1d 100644 --- a/libs/langchain/src/agents/middleware/index.ts +++ b/libs/langchain/src/agents/middleware/index.ts @@ -46,5 +46,9 @@ export { toolRetryMiddleware, type ToolRetryMiddlewareConfig, } from "./toolRetry.js"; +export { + filesystemFileSearchMiddleware, + type FilesystemFileSearchMiddlewareConfig, +} from "./filesystemFileSearch.js"; export { type AgentMiddleware } from "./types.js"; export { countTokensApproximately } from "./utils.js"; diff --git a/libs/langchain/src/agents/middleware/tests/filesystemFileSearch.test.ts b/libs/langchain/src/agents/middleware/tests/filesystemFileSearch.test.ts new file mode 100644 index 000000000000..741ed246e57b --- /dev/null +++ b/libs/langchain/src/agents/middleware/tests/filesystemFileSearch.test.ts @@ -0,0 +1,493 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { tmpdir } from "node:os"; + +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import type { ClientTool, ServerTool } from "@langchain/core/tools"; + +import { filesystemFileSearchMiddleware } from "../filesystemFileSearch.js"; + +// Helper to invoke tools with proper typing +async function invokeTool( + tool: ClientTool | ServerTool | undefined, + args: Record +): Promise { + if (!tool) { + throw new Error("Tool not found"); + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const result = await (tool as any).invoke(args); + return result as string; +} + +describe("filesystemFileSearchMiddleware", () => { + let testRoot: string; + let middleware: ReturnType; + + beforeEach(async () => { + // Create a temporary directory for each test + testRoot = await fs.mkdtemp(path.join(tmpdir(), "langchain-test-")); + middleware = filesystemFileSearchMiddleware({ rootPath: testRoot }); + }); + + afterEach(async () => { + // Clean up temporary directory + try { + await fs.rm(testRoot, { recursive: true, force: true }); + } catch { + // Ignore cleanup errors + } + }); + + describe("glob_search tool", () => { + it("should find files matching a simple pattern", async () => { + // Create test files + await fs.writeFile(path.join(testRoot, "file1.ts"), "content1"); + await fs.writeFile(path.join(testRoot, "file2.ts"), "content2"); + await fs.writeFile(path.join(testRoot, "file3.js"), "content3"); + + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + expect(globTool).toBeDefined(); + + const result = await invokeTool(globTool, { pattern: "*.ts" }); + const files = result.split("\n").filter(Boolean); + + expect(files.length).toBeGreaterThanOrEqual(2); + expect(files.some((f: string) => f.includes("file1.ts"))).toBe(true); + expect(files.some((f: string) => f.includes("file2.ts"))).toBe(true); + expect(files.some((f: string) => f.includes("file3.js"))).toBe(false); + }); + + it("should find files in subdirectories with ** pattern", async () => { + const subDir = path.join(testRoot, "src", "components"); + await fs.mkdir(subDir, { recursive: true }); + await fs.writeFile( + path.join(subDir, "Button.tsx"), + "export const Button" + ); + await fs.writeFile(path.join(subDir, "Input.tsx"), "export const Input"); + + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { pattern: "**/*.tsx" }); + const files = result.split("\n").filter(Boolean); + + expect(files.length).toBeGreaterThanOrEqual(2); + expect(files.some((f: string) => f.includes("Button.tsx"))).toBe(true); + expect(files.some((f: string) => f.includes("Input.tsx"))).toBe(true); + }); + + it("should search in a specific subdirectory", async () => { + const subDir = path.join(testRoot, "src"); + await fs.mkdir(subDir, { recursive: true }); + await fs.writeFile(path.join(subDir, "index.ts"), "export"); + await fs.writeFile(path.join(testRoot, "root.ts"), "export"); + + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { + pattern: "*.ts", + path: "/src", + }); + const files = result.split("\n").filter(Boolean); + + expect(files.some((f: string) => f.includes("src/index.ts"))).toBe(true); + expect(files.some((f: string) => f.includes("root.ts"))).toBe(false); + }); + + it("should return 'No files found' when no matches exist", async () => { + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { pattern: "*.nonexistent" }); + + expect(result).toBe("No files found"); + }); + + it("should return 'No files found' for invalid path", async () => { + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { + pattern: "*.ts", + path: "/nonexistent", + }); + + expect(result).toBe("No files found"); + }); + + it("should sort results by modification time (most recent first)", async () => { + const file1 = path.join(testRoot, "file1.ts"); + const file2 = path.join(testRoot, "file2.ts"); + + await fs.writeFile(file1, "content1"); + // Wait a bit to ensure different modification times + await new Promise((resolve) => setTimeout(resolve, 10)); + await fs.writeFile(file2, "content2"); + + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { pattern: "*.ts" }); + const files = result.split("\n").filter(Boolean); + + // file2 should come before file1 (more recent) + expect(files[0]).toContain("file2.ts"); + }); + + it("should prevent path traversal attacks", async () => { + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { + pattern: "*", + path: "/../", + }); + + expect(result).toBe("No files found"); + }); + + it("should prevent path traversal with ~", async () => { + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { + pattern: "*", + path: "/~/", + }); + + expect(result).toBe("No files found"); + }); + }); + + describe("grep_search tool", () => { + beforeEach(async () => { + // Create test files with content + await fs.writeFile( + path.join(testRoot, "file1.ts"), + "export const hello = 'world';\nconst test = 123;" + ); + await fs.writeFile( + path.join(testRoot, "file2.ts"), + "export const goodbye = 'world';\nconst test = 456;" + ); + await fs.writeFile( + path.join(testRoot, "file3.js"), + "export const hello = 'world';" + ); + }); + + it("should find files containing a pattern (files_with_matches mode)", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "hello", + output_mode: "files_with_matches", + }); + const files = result.split("\n").filter(Boolean); + + expect(files.length).toBeGreaterThanOrEqual(2); + expect(files.some((f: string) => f.includes("file1.ts"))).toBe(true); + expect(files.some((f: string) => f.includes("file3.js"))).toBe(true); + }); + + it("should return content with line numbers (content mode)", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "hello", + output_mode: "content", + }); + const lines = result.split("\n").filter(Boolean); + + expect(lines.length).toBeGreaterThanOrEqual(2); + expect(lines.some((l: string) => l.includes("file1.ts:1:"))).toBe(true); + expect(lines.some((l: string) => l.includes("hello"))).toBe(true); + }); + + it("should return match counts (count mode)", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "test", + output_mode: "count", + }); + const lines = result.split("\n").filter(Boolean); + + expect(lines.length).toBeGreaterThanOrEqual(2); + const file1Line = lines.find((l: string) => l.includes("file1.ts")); + const file2Line = lines.find((l: string) => l.includes("file2.ts")); + expect(file1Line).toContain(":1"); + expect(file2Line).toContain(":1"); + }); + + it("should filter files by include pattern", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "hello", + include: "*.ts", + output_mode: "files_with_matches", + }); + const files = result.split("\n").filter(Boolean); + + expect(files.some((f: string) => f.includes("file1.ts"))).toBe(true); + expect(files.some((f: string) => f.includes("file3.js"))).toBe(false); + }); + + it("should support brace expansion in include pattern", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "hello", + include: "*.{ts,js}", + output_mode: "files_with_matches", + }); + const files = result.split("\n").filter(Boolean); + + expect(files.some((f: string) => f.includes("file1.ts"))).toBe(true); + expect(files.some((f: string) => f.includes("file3.js"))).toBe(true); + }); + + it("should return 'No matches found' when pattern doesn't match", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "nonexistentpattern12345", + }); + + expect(result).toBe("No matches found"); + }); + + it("should return error for invalid regex pattern", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "[invalid", + }); + + expect(result).toContain("Invalid regex pattern"); + }); + + it("should return error for invalid include pattern", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "hello", + include: "*.{ts", + }); + + expect(result).toBe("Invalid include pattern"); + }); + + it("should search in a specific subdirectory", async () => { + const subDir = path.join(testRoot, "src"); + await fs.mkdir(subDir, { recursive: true }); + await fs.writeFile( + path.join(subDir, "index.ts"), + "export const hello = 'world';" + ); + + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "hello", + path: "/src", + output_mode: "files_with_matches", + }); + const files = result.split("\n").filter(Boolean); + + expect(files.some((f: string) => f.includes("src/index.ts"))).toBe(true); + expect(files.some((f: string) => f.includes("file1.ts"))).toBe(false); + }); + + it("should prevent path traversal attacks", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: ".*", + path: "/../", + }); + + expect(result).toBe("No matches found"); + }); + + it("should skip files that are too large", async () => { + // Create a large file (exceeds default 10MB limit) + const largeContent = "x".repeat(11 * 1024 * 1024); // 11MB + await fs.writeFile(path.join(testRoot, "large.ts"), largeContent); + + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "x", + output_mode: "files_with_matches", + }); + const files = result.split("\n").filter(Boolean); + + // Large file should be skipped + expect(files.some((f: string) => f.includes("large.ts"))).toBe(false); + }); + + it("should respect custom max file size", async () => { + const customMiddleware = filesystemFileSearchMiddleware({ + rootPath: testRoot, + maxFileSizeMb: 1, // 1MB limit + }); + + // Create a file just over 1MB + const largeContent = "x".repeat(1.1 * 1024 * 1024); + await fs.writeFile(path.join(testRoot, "medium.ts"), largeContent); + + const grepTool = customMiddleware.tools?.find( + (t) => t.name === "grep_search" + ); + const result = await invokeTool(grepTool, { + pattern: "x", + output_mode: "files_with_matches", + }); + const files = result.split("\n").filter(Boolean); + + // File should be skipped due to size limit + expect(files.some((f: string) => f.includes("medium.ts"))).toBe(false); + }); + }); + + describe("ripgrep integration", () => { + beforeEach(async () => { + await fs.writeFile( + path.join(testRoot, "test.ts"), + "export const hello = 'world';" + ); + }); + + it("should fallback to Node.js search when ripgrep fails", async () => { + const middlewareWithRipgrep = filesystemFileSearchMiddleware({ + rootPath: testRoot, + useRipgrep: true, + }); + + const grepTool = middlewareWithRipgrep.tools?.find( + (t) => t.name === "grep_search" + ); + // Even if ripgrep is enabled but fails, should fallback to Node.js search + const result = await invokeTool(grepTool, { + pattern: "hello", + output_mode: "files_with_matches", + }); + + // Should still work with Node.js fallback + expect(result).not.toBe("No matches found"); + }); + + it("should use Node.js search by default", async () => { + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "hello", + output_mode: "files_with_matches", + }); + + // Should work without ripgrep + expect(result).not.toBe("No matches found"); + }); + }); + + describe("edge cases", () => { + it("should handle empty directory", async () => { + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { pattern: "*" }); + + expect(result).toBe("No files found"); + }); + + it("should handle files with special characters in names", async () => { + await fs.writeFile(path.join(testRoot, "file-with-dash.ts"), "content"); + await fs.writeFile( + path.join(testRoot, "file_with_underscore.ts"), + "content" + ); + + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { pattern: "*.ts" }); + const files = result.split("\n").filter(Boolean); + + expect(files.some((f: string) => f.includes("file-with-dash.ts"))).toBe( + true + ); + expect( + files.some((f: string) => f.includes("file_with_underscore.ts")) + ).toBe(true); + }); + + it("should handle nested directories", async () => { + const deepDir = path.join(testRoot, "a", "b", "c", "d"); + await fs.mkdir(deepDir, { recursive: true }); + await fs.writeFile(path.join(deepDir, "deep.ts"), "content"); + + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const result = await invokeTool(globTool, { pattern: "**/*.ts" }); + const files = result.split("\n").filter(Boolean); + + expect(files.some((f: string) => f.includes("deep.ts"))).toBe(true); + }); + + it("should handle regex special characters in search pattern", async () => { + await fs.writeFile( + path.join(testRoot, "special.ts"), + "const x = (a + b) * c;" + ); + + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "\\(a \\+ b\\)", + output_mode: "files_with_matches", + }); + const files = result.split("\n").filter(Boolean); + + expect(files.some((f: string) => f.includes("special.ts"))).toBe(true); + }); + + it("should handle unreadable files gracefully", async () => { + // Create a file that we can't read (simulated by permission error) + const unreadableFile = path.join(testRoot, "unreadable.ts"); + await fs.writeFile(unreadableFile, "content"); + + // Note: On Windows, we can't easily test permission errors + // This test verifies the code handles errors gracefully + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + const result = await invokeTool(grepTool, { + pattern: "content", + output_mode: "files_with_matches", + }); + + // Should not throw, should return results for readable files + expect(typeof result).toBe("string"); + }); + + it("should handle binary files gracefully", async () => { + // Create a binary file (simulated with non-UTF8 content) + const binaryFile = path.join(testRoot, "binary.bin"); + const buffer = Buffer.from([0xff, 0xfe, 0xfd]); + await fs.writeFile(binaryFile, buffer); + + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + // Should not throw when encountering binary files + const result = await invokeTool(grepTool, { + pattern: ".*", + output_mode: "files_with_matches", + }); + + expect(typeof result).toBe("string"); + }); + }); + + describe("middleware structure", () => { + it("should have correct middleware name", () => { + expect(middleware.name).toBe("filesystemFileSearchMiddleware"); + }); + + it("should expose both tools", () => { + expect(middleware.tools).toBeDefined(); + expect(middleware.tools?.length).toBe(2); + expect(middleware.tools?.some((t) => t.name === "glob_search")).toBe( + true + ); + expect(middleware.tools?.some((t) => t.name === "grep_search")).toBe( + true + ); + }); + + it("should have correct tool schemas", () => { + const globTool = middleware.tools?.find((t) => t.name === "glob_search"); + const grepTool = middleware.tools?.find((t) => t.name === "grep_search"); + + expect(globTool).toBeDefined(); + expect(grepTool).toBeDefined(); + + // Verify schemas are defined + if (globTool && "schema" in globTool) { + expect(globTool.schema).toBeDefined(); + } + if (grepTool && "schema" in grepTool) { + expect(grepTool.schema).toBeDefined(); + } + }); + }); +});