From 36e7eaf195d41eeb575395ad98c961ae2eb4adbc Mon Sep 17 00:00:00 2001 From: Jack Herrington Date: Thu, 29 Jan 2026 18:27:06 -0800 Subject: [PATCH 1/6] fix: improve tool execution reliability and prevent race conditions - Add LLM simulator adapter for deterministic testing of tool flows - Add comprehensive unit tests for tool execution scenarios: - Server tool execution - Client tool execution - Approval workflows - Multi-tool and parallel tool execution - Tool sequences - Error handling - Add E2E test infrastructure for tool scenarios - Fix client tool race conditions by tracking pending tool executions - Prevent duplicate continuation attempts with continuationPending flag - Guard against concurrent stream processing in streamResponse - Add approval info to ToolCall type for server-side decision tracking - Include approval info in model message conversion for approval workflows - Check ModelMessage format for approval info extraction in chat activity This change improves the reliability of tool execution, especially for: - Client tools with async execute functions - Approval-based tool workflows - Sequential tool execution scenarios --- .changeset/tool-race-conditions.md | 18 + .../typescript/ai-client/src/chat-client.ts | 93 ++- .../ai/src/activities/chat/index.ts | 11 +- .../ai/src/activities/chat/messages.ts | 8 + packages/typescript/ai/src/types.ts | 5 + .../smoke-tests/adapters/package.json | 9 + .../smoke-tests/adapters/src/llm-simulator.ts | 529 +++++++++++++++ .../adapters/src/tests/tools/approval.test.ts | 380 +++++++++++ .../src/tests/tools/client-tool.test.ts | 279 ++++++++ .../src/tests/tools/error-handling.test.ts | 350 ++++++++++ .../src/tests/tools/multi-tool.test.ts | 268 ++++++++ .../src/tests/tools/sequences.test.ts | 419 ++++++++++++ .../src/tests/tools/server-tool.test.ts | 343 ++++++++++ .../smoke-tests/adapters/vitest.config.ts | 26 + .../typescript/smoke-tests/e2e/package.json | 4 +- .../e2e/src/routes/api.tools-test.ts | 553 +++++++++++++++ .../smoke-tests/e2e/src/routes/tools-test.tsx | 616 +++++++++++++++++ .../e2e/tests/tools/approval-flow.spec.ts | 332 +++++++++ .../e2e/tests/tools/client-tool.spec.ts | 272 ++++++++ .../e2e/tests/tools/race-conditions.spec.ts | 415 ++++++++++++ .../tools/server-client-sequence.spec.ts | 304 +++++++++ pnpm-lock.yaml | 9 + testing/panel/src/components/Header.tsx | 19 + testing/panel/src/lib/simulator-tools.ts | 163 +++++ testing/panel/src/routeTree.gen.ts | 42 ++ .../panel/src/routes/api.simulator-chat.ts | 277 ++++++++ testing/panel/src/routes/simulator.tsx | 630 ++++++++++++++++++ 27 files changed, 6352 insertions(+), 22 deletions(-) create mode 100644 .changeset/tool-race-conditions.md create mode 100644 packages/typescript/smoke-tests/adapters/src/llm-simulator.ts create mode 100644 packages/typescript/smoke-tests/adapters/src/tests/tools/approval.test.ts create mode 100644 packages/typescript/smoke-tests/adapters/src/tests/tools/client-tool.test.ts create mode 100644 packages/typescript/smoke-tests/adapters/src/tests/tools/error-handling.test.ts create mode 100644 packages/typescript/smoke-tests/adapters/src/tests/tools/multi-tool.test.ts create mode 100644 packages/typescript/smoke-tests/adapters/src/tests/tools/sequences.test.ts create mode 100644 packages/typescript/smoke-tests/adapters/src/tests/tools/server-tool.test.ts create mode 100644 packages/typescript/smoke-tests/adapters/vitest.config.ts create mode 100644 packages/typescript/smoke-tests/e2e/src/routes/api.tools-test.ts create mode 100644 packages/typescript/smoke-tests/e2e/src/routes/tools-test.tsx create mode 100644 packages/typescript/smoke-tests/e2e/tests/tools/approval-flow.spec.ts create mode 100644 packages/typescript/smoke-tests/e2e/tests/tools/client-tool.spec.ts create mode 100644 packages/typescript/smoke-tests/e2e/tests/tools/race-conditions.spec.ts create mode 100644 packages/typescript/smoke-tests/e2e/tests/tools/server-client-sequence.spec.ts create mode 100644 testing/panel/src/lib/simulator-tools.ts create mode 100644 testing/panel/src/routes/api.simulator-chat.ts create mode 100644 testing/panel/src/routes/simulator.tsx diff --git a/.changeset/tool-race-conditions.md b/.changeset/tool-race-conditions.md new file mode 100644 index 000000000..4bd1c0862 --- /dev/null +++ b/.changeset/tool-race-conditions.md @@ -0,0 +1,18 @@ +--- +"@tanstack/ai": patch +"@tanstack/ai-client": patch +--- + +fix: improve tool execution reliability and prevent race conditions + +- Fix client tool execution race conditions by tracking pending tool executions +- Prevent duplicate continuation attempts with continuationPending flag +- Guard against concurrent stream processing in streamResponse +- Add approval info to ToolCall type for server-side decision tracking +- Include approval info in model message conversion for approval workflows +- Check ModelMessage format for approval info extraction in chat activity + +This change improves the reliability of tool execution, especially for: +- Client tools with async execute functions +- Approval-based tool workflows +- Sequential tool execution scenarios diff --git a/packages/typescript/ai-client/src/chat-client.ts b/packages/typescript/ai-client/src/chat-client.ts index bb0a46937..a280260a8 100644 --- a/packages/typescript/ai-client/src/chat-client.ts +++ b/packages/typescript/ai-client/src/chat-client.ts @@ -27,6 +27,10 @@ export class ChatClient { private currentStreamId: string | null = null private currentMessageId: string | null = null private postStreamActions: Array<() => Promise> = [] + // Track pending client tool executions to await them before stream finalization + private pendingToolExecutions: Map> = new Map() + // Flag to deduplicate continuation checks during action draining + private continuationPending = false private callbacksRef: { current: { @@ -127,31 +131,41 @@ export class ChatClient { ) } }, - onToolCall: async (args: { + onToolCall: (args: { toolCallId: string toolName: string input: any }) => { // Handle client-side tool execution automatically const clientTool = this.clientToolsRef.current.get(args.toolName) - if (clientTool?.execute) { - try { - const output = await clientTool.execute(args.input) - await this.addToolResult({ - toolCallId: args.toolCallId, - tool: args.toolName, - output, - state: 'output-available', - }) - } catch (error: any) { - await this.addToolResult({ - toolCallId: args.toolCallId, - tool: args.toolName, - output: null, - state: 'output-error', - errorText: error.message, - }) - } + const executeFunc = clientTool?.execute + if (executeFunc) { + // Create and track the execution promise + const executionPromise = (async () => { + try { + const output = await executeFunc(args.input) + await this.addToolResult({ + toolCallId: args.toolCallId, + tool: args.toolName, + output, + state: 'output-available', + }) + } catch (error: any) { + await this.addToolResult({ + toolCallId: args.toolCallId, + tool: args.toolName, + output: null, + state: 'output-error', + errorText: error.message, + }) + } finally { + // Remove from pending when complete + this.pendingToolExecutions.delete(args.toolCallId) + } + })() + + // Track the pending execution + this.pendingToolExecutions.set(args.toolCallId, executionPromise) } }, onApprovalRequest: (args: { @@ -227,6 +241,12 @@ export class ChatClient { await new Promise((resolve) => setTimeout(resolve, 0)) } + // Wait for all pending tool executions to complete before finalizing + // This ensures client tools finish before we check for continuation + if (this.pendingToolExecutions.size > 0) { + await Promise.all(this.pendingToolExecutions.values()) + } + // Finalize the stream this.processor.finalizeStream() @@ -294,9 +314,17 @@ export class ChatClient { * Stream a response from the LLM */ private async streamResponse(): Promise { + // Guard against concurrent streams - if already loading, skip + if (this.isLoading) { + return + } + this.setIsLoading(true) this.setError(undefined) this.abortController = new AbortController() + // Reset pending tool executions for the new stream + this.pendingToolExecutions.clear() + let streamCompletedSuccessfully = false try { // Get model messages for the LLM @@ -319,6 +347,7 @@ export class ChatClient { ) await this.processStream(stream) + streamCompletedSuccessfully = true } catch (err) { if (err instanceof Error) { if (err.name === 'AbortError') { @@ -333,6 +362,20 @@ export class ChatClient { // Drain any actions that were queued while the stream was in progress await this.drainPostStreamActions() + + // Continue conversation if the stream ended with a tool result (server tool completed) + if (streamCompletedSuccessfully) { + const messages = this.processor.getMessages() + const lastPart = messages.at(-1)?.parts.at(-1) + + if (lastPart?.type === 'tool-result' && this.shouldAutoSend()) { + try { + await this.checkForContinuation() + } catch (error) { + console.error('Failed to continue flow after tool result:', error) + } + } + } } } @@ -476,8 +519,18 @@ export class ChatClient { * Check if we should continue the flow and do so if needed */ private async checkForContinuation(): Promise { + // Prevent duplicate continuation attempts + if (this.continuationPending || this.isLoading) { + return + } + if (this.shouldAutoSend()) { - await this.streamResponse() + this.continuationPending = true + try { + await this.streamResponse() + } finally { + this.continuationPending = false + } } } diff --git a/packages/typescript/ai/src/activities/chat/index.ts b/packages/typescript/ai/src/activities/chat/index.ts index feb5ef995..bb0c46ebf 100644 --- a/packages/typescript/ai/src/activities/chat/index.ts +++ b/packages/typescript/ai/src/activities/chat/index.ts @@ -731,7 +731,7 @@ class TextEngine< const clientToolResults = new Map() for (const message of this.messages) { - // todo remove any and fix this + // Check for UIMessage format (parts array) if (message.role === 'assistant' && (message as any).parts) { const parts = (message as any).parts for (const part of parts) { @@ -752,6 +752,15 @@ class TextEngine< } } } + + // Check for ModelMessage format (toolCalls array with approval info) + if (message.role === 'assistant' && message.toolCalls) { + for (const toolCall of message.toolCalls) { + if (toolCall.approval) { + approvals.set(toolCall.approval.id, toolCall.approval.approved) + } + } + } } return { approvals, clientToolResults } diff --git a/packages/typescript/ai/src/activities/chat/messages.ts b/packages/typescript/ai/src/activities/chat/messages.ts index 14c8dc621..c0ba16678 100644 --- a/packages/typescript/ai/src/activities/chat/messages.ts +++ b/packages/typescript/ai/src/activities/chat/messages.ts @@ -104,6 +104,14 @@ export function uiMessageToModelMessages( name: p.name, arguments: p.arguments, }, + // Include approval info if tool was approved/denied (for server to know the decision) + ...(p.state === 'approval-responded' && + p.approval?.approved !== undefined && { + approval: { + id: p.approval.id, + approved: p.approval.approved, + }, + }), })) : undefined diff --git a/packages/typescript/ai/src/types.ts b/packages/typescript/ai/src/types.ts index 7bd3c52d9..4e363c889 100644 --- a/packages/typescript/ai/src/types.ts +++ b/packages/typescript/ai/src/types.ts @@ -91,6 +91,11 @@ export interface ToolCall { name: string arguments: string // JSON string } + /** Approval info for tools requiring user approval (included in messages sent back to server) */ + approval?: { + id: string + approved: boolean + } } // ============================================================================ diff --git a/packages/typescript/smoke-tests/adapters/package.json b/packages/typescript/smoke-tests/adapters/package.json index cee813b65..1df9c3de4 100644 --- a/packages/typescript/smoke-tests/adapters/package.json +++ b/packages/typescript/smoke-tests/adapters/package.json @@ -6,8 +6,16 @@ "author": "", "license": "MIT", "type": "module", + "exports": { + ".": { + "import": "./src/llm-simulator.ts", + "types": "./src/llm-simulator.ts" + } + }, "scripts": { "start": "tsx src/cli.ts", + "test": "vitest run", + "test:watch": "vitest", "typecheck": "tsc --noEmit" }, "dependencies": { @@ -26,6 +34,7 @@ "dotenv": "^17.2.3", "tsx": "^4.21.0", "typescript": "5.9.3", + "vitest": "^4.0.14", "zod": "^4.2.0" } } diff --git a/packages/typescript/smoke-tests/adapters/src/llm-simulator.ts b/packages/typescript/smoke-tests/adapters/src/llm-simulator.ts new file mode 100644 index 000000000..e475d41e5 --- /dev/null +++ b/packages/typescript/smoke-tests/adapters/src/llm-simulator.ts @@ -0,0 +1,529 @@ +import type { + StreamChunk, + ChatOptions, + StructuredOutputOptions, + StructuredOutputResult, +} from '@tanstack/ai' + +/** + * Defines a tool call in the simulator script + */ +export interface SimulatorToolCall { + /** Tool name to call */ + name: string + /** Arguments to pass to the tool (will be JSON stringified) */ + arguments: Record + /** Optional custom tool call ID (auto-generated if not provided) */ + id?: string +} + +/** + * Defines a single iteration (LLM turn) in the simulator script + */ +export interface SimulatorIteration { + /** Text content to stream (optional) */ + content?: string + /** Tool calls to make (optional) */ + toolCalls?: Array + /** Finish reason - defaults to 'stop' if no tool calls, 'tool_calls' if has tool calls */ + finishReason?: 'stop' | 'tool_calls' | 'length' | null +} + +/** + * Complete script defining the LLM behavior + */ +export interface SimulatorScript { + /** Array of iterations (LLM turns) */ + iterations: Array + /** Model name to report in chunks (default: 'simulator-model') */ + model?: string +} + +/** + * LLM Simulator Adapter + * + * A deterministic mock adapter that yields predictable responses + * based on a pre-defined script. Useful for testing tool execution + * flows without depending on actual LLM behavior. + * + * @example + * ```typescript + * const script: SimulatorScript = { + * iterations: [ + * { + * content: "Let me check the temperature", + * toolCalls: [{ name: "get_temperature", arguments: { location: "Paris" } }] + * }, + * { + * content: "The temperature in Paris is 70 degrees" + * } + * ] + * } + * + * const adapter = createLLMSimulator(script) + * const stream = chat({ adapter, model: 'simulator', messages, tools }) + * ``` + */ +export class LLMSimulatorAdapter { + readonly kind = 'text' as const + readonly name = 'llm-simulator' + readonly models = ['simulator-model'] as const + + private script: SimulatorScript + private iterationIndex = 0 + private toolCallCounter = 0 + + constructor(script: SimulatorScript) { + this.script = script + } + + /** + * Reset the simulator to start from the first iteration + */ + reset(): void { + this.iterationIndex = 0 + this.toolCallCounter = 0 + } + + /** + * Get the current iteration index + */ + getCurrentIteration(): number { + return this.iterationIndex + } + + async *chatStream( + options: ChatOptions>, + ): AsyncIterable { + // Determine iteration based on message history for stateless operation across requests. + // This is primarily for E2E tests where each HTTP request creates a new adapter instance. + // + // Only apply message-based iteration when: + // 1. We're at index 0 (fresh adapter instance) + // 2. The script contains the tool calls we see in messages (full conversation script) + // + // For "continuation scripts" (unit tests) that only contain remaining iterations, + // we rely on the stateful iterationIndex. + if (this.iterationIndex === 0) { + const iterationFromMessages = this.determineIterationFromMessages( + options.messages, + ) + if (iterationFromMessages !== null && iterationFromMessages > 0) { + // Check if this script is a "full script" by seeing if iteration 0 + // has a tool call that matches one in the messages + const firstIterationToolCalls = this.script.iterations[0]?.toolCalls + const messagesHaveMatchingToolCall = + firstIterationToolCalls?.some((tc) => + this.isToolCallInMessages(tc.name, options.messages), + ) ?? false + + if ( + messagesHaveMatchingToolCall && + iterationFromMessages < this.script.iterations.length + ) { + // Full script mode: use message-based iteration + this.iterationIndex = iterationFromMessages + } + // Otherwise: continuation script mode, keep iterationIndex at 0 + } + } + + const iteration = this.script.iterations[this.iterationIndex] + const model = this.script.model || 'simulator-model' + const runId = this.generateId() + const messageId = this.generateId() + + // Yield RUN_STARTED event + yield { + type: 'RUN_STARTED', + runId, + model, + timestamp: Date.now(), + } as StreamChunk + + if (!iteration) { + // No more iterations - just return RUN_FINISHED + yield { + type: 'RUN_FINISHED', + runId, + model, + timestamp: Date.now(), + finishReason: 'stop', + } as StreamChunk + return + } + + // Yield content chunks if content is provided (AG-UI format) + if (iteration.content) { + // Yield TEXT_MESSAGE_START + yield { + type: 'TEXT_MESSAGE_START', + messageId, + model, + timestamp: Date.now(), + role: 'assistant', + } as StreamChunk + + // Split content into chunks for more realistic streaming + const words = iteration.content.split(' ') + let accumulated = '' + + for (let i = 0; i < words.length; i++) { + const word = words[i] + const delta = i === 0 ? word : ` ${word}` + accumulated += delta + + yield { + type: 'TEXT_MESSAGE_CONTENT', + messageId, + model, + timestamp: Date.now(), + delta, + content: accumulated, + } as StreamChunk + } + + // Yield TEXT_MESSAGE_END + yield { + type: 'TEXT_MESSAGE_END', + messageId, + model, + timestamp: Date.now(), + } as StreamChunk + } + + // Yield tool call chunks if tool calls are provided (AG-UI format) + if (iteration.toolCalls && iteration.toolCalls.length > 0) { + for (let i = 0; i < iteration.toolCalls.length; i++) { + const toolCall = iteration.toolCalls[i]! + const toolCallId = + toolCall.id || `call-${++this.toolCallCounter}-${Date.now()}` + const toolName = toolCall.name + const argsStr = JSON.stringify(toolCall.arguments) + + // Yield TOOL_CALL_START + yield { + type: 'TOOL_CALL_START', + toolCallId, + toolName, + model, + timestamp: Date.now(), + } as StreamChunk + + // Yield TOOL_CALL_ARGS (full arguments at once for simplicity) + yield { + type: 'TOOL_CALL_ARGS', + toolCallId, + toolName, + model, + timestamp: Date.now(), + delta: argsStr, + args: argsStr, + } as StreamChunk + + // Note: TOOL_CALL_END is NOT emitted here because the chat activity + // emits it after the tool is actually executed. The LLM only signals + // that it wants to call a tool, it doesn't produce the result. + } + } + + // Determine finish reason + let finishReason = iteration.finishReason + if (finishReason === undefined) { + finishReason = + iteration.toolCalls && iteration.toolCalls.length > 0 + ? 'tool_calls' + : 'stop' + } + + // Yield RUN_FINISHED event + yield { + type: 'RUN_FINISHED', + runId, + model, + timestamp: Date.now(), + finishReason, + } as StreamChunk + + // Advance to next iteration for next call + this.iterationIndex++ + } + + async structuredOutput( + _options: StructuredOutputOptions>, + ): Promise> { + // Simple mock implementation + return { + data: {}, + rawText: '{}', + } + } + + private generateId(): string { + return `sim-${Date.now()}-${Math.random().toString(36).substring(7)}` + } + + /** + * Check if a tool with the given name appears in the messages + */ + private isToolCallInMessages( + toolName: string, + messages: Array<{ + role: string + toolCalls?: Array<{ function: { name: string } }> + }>, + ): boolean { + for (const msg of messages) { + if (msg.role === 'assistant' && msg.toolCalls) { + for (const tc of msg.toolCalls) { + if (tc.function.name === toolName) { + return true + } + } + } + } + return false + } + + /** + * Determine which iteration we should be on based on message history. + * This enables stateless operation across requests - each request can + * determine the correct iteration based on how many tool call rounds + * have been completed. + * + * Logic: + * - Count assistant messages that have tool calls + * - For each such message, check if there are corresponding tool results + * - Tool results can be in: + * 1. Separate `role: 'tool'` messages with `toolCallId` + * 2. The `parts` array of assistant messages with `output` set + * - Completed tool call rounds = iterations we've already processed + */ + private determineIterationFromMessages( + messages: Array<{ + role: string + toolCalls?: Array<{ + id: string + approval?: { id: string; approved: boolean } + }> + toolCallId?: string + parts?: Array<{ + type: string + id?: string + output?: any + approval?: { approved?: boolean } + }> + }>, + ): number | null { + if (!messages || messages.length === 0) { + return 0 // Fresh conversation, start at iteration 0 + } + + // Find all assistant messages with tool calls + const assistantToolCallMessages = messages.filter( + (m) => m.role === 'assistant' && m.toolCalls && m.toolCalls.length > 0, + ) + + if (assistantToolCallMessages.length === 0) { + // No tool calls in history, might be first iteration or continuation + // Check if there's a user message (fresh start) + const hasUserMessage = messages.some((m) => m.role === 'user') + return hasUserMessage ? 0 : null + } + + // Get all completed tool call IDs from: + // 1. Separate tool result messages (role: 'tool') + // 2. Parts array with output set (client tool results) - UIMessage format + // 3. Parts array with approval.approved set (approval responses) - UIMessage format + // 4. toolCalls array with approval.approved set (approval responses) - ModelMessage format + const completedToolIds = new Set() + + for (const msg of messages) { + // Check for role: 'tool' messages (server tool results) + if (msg.role === 'tool' && msg.toolCallId) { + completedToolIds.add(msg.toolCallId) + } + + // Check for UIMessage format: parts with output or approval responses + if (msg.parts) { + for (const part of msg.parts) { + if (part.type === 'tool-call' && part.id) { + // Client tool results have output set + if (part.output !== undefined) { + completedToolIds.add(part.id) + } + // Approval tools are complete when approval.approved is set (true or false) + if (part.approval?.approved !== undefined) { + completedToolIds.add(part.id) + } + } + } + } + + // Check for ModelMessage format: toolCalls with approval info + if (msg.role === 'assistant' && msg.toolCalls) { + for (const tc of msg.toolCalls) { + // Approval tools are complete when approval.approved is set + if (tc.approval?.approved !== undefined) { + completedToolIds.add(tc.id) + } + } + } + } + + // Count how many complete tool call rounds we have + let completedRounds = 0 + for (const assistantMsg of assistantToolCallMessages) { + const toolCalls = assistantMsg.toolCalls as Array<{ id: string }> + const allToolsComplete = toolCalls.every((tc) => + completedToolIds.has(tc.id), + ) + if (allToolsComplete) { + completedRounds++ + } + } + + // The next iteration is completedRounds (0-indexed) + // e.g., if we've completed 1 round, we're on iteration 1 + return completedRounds + } +} + +/** + * Create a new LLM Simulator adapter with the given script + */ +export function createLLMSimulator( + script: SimulatorScript, +): LLMSimulatorAdapter { + return new LLMSimulatorAdapter(script) +} + +// ============================================================================ +// Pre-built Scripts for Common Scenarios +// ============================================================================ + +/** + * Script builders for common test scenarios + */ +export const SimulatorScripts = { + /** + * Script for a single server tool call + */ + singleServerTool( + toolName: string, + toolArgs: Record, + responseContent: string, + ): SimulatorScript { + return { + iterations: [ + { + content: `I'll use the ${toolName} tool.`, + toolCalls: [{ name: toolName, arguments: toolArgs }], + }, + { + content: responseContent, + }, + ], + } + }, + + /** + * Script for a single client tool call (no server execute) + */ + singleClientTool( + toolName: string, + toolArgs: Record, + responseContent: string, + ): SimulatorScript { + return { + iterations: [ + { + content: `I'll use the ${toolName} tool.`, + toolCalls: [{ name: toolName, arguments: toolArgs }], + }, + { + content: responseContent, + }, + ], + } + }, + + /** + * Script for a tool that requires approval + */ + approvalTool( + toolName: string, + toolArgs: Record, + responseAfterApproval: string, + ): SimulatorScript { + return { + iterations: [ + { + content: `I need to use ${toolName}, which requires your approval.`, + toolCalls: [{ name: toolName, arguments: toolArgs }], + }, + { + content: responseAfterApproval, + }, + ], + } + }, + + /** + * Script for sequential tool calls (tool A then tool B) + */ + sequentialTools( + tool1: { name: string; args: Record }, + tool2: { name: string; args: Record }, + finalResponse: string, + ): SimulatorScript { + return { + iterations: [ + { + content: `First, I'll use ${tool1.name}.`, + toolCalls: [{ name: tool1.name, arguments: tool1.args }], + }, + { + content: `Now I'll use ${tool2.name}.`, + toolCalls: [{ name: tool2.name, arguments: tool2.args }], + }, + { + content: finalResponse, + }, + ], + } + }, + + /** + * Script for multiple tools in the same turn + */ + parallelTools( + tools: Array<{ name: string; args: Record }>, + responseContent: string, + ): SimulatorScript { + return { + iterations: [ + { + content: `I'll use multiple tools at once.`, + toolCalls: tools.map((t) => ({ name: t.name, arguments: t.args })), + }, + { + content: responseContent, + }, + ], + } + }, + + /** + * Script for a simple text response (no tools) + */ + textOnly(content: string): SimulatorScript { + return { + iterations: [ + { + content, + }, + ], + } + }, +} diff --git a/packages/typescript/smoke-tests/adapters/src/tests/tools/approval.test.ts b/packages/typescript/smoke-tests/adapters/src/tests/tools/approval.test.ts new file mode 100644 index 000000000..72821c7ae --- /dev/null +++ b/packages/typescript/smoke-tests/adapters/src/tests/tools/approval.test.ts @@ -0,0 +1,380 @@ +import { describe, expect, it, vi } from 'vitest' +import { chat, maxIterations, toolDefinition } from '@tanstack/ai' +import { z } from 'zod' +import { SimulatorScripts, createLLMSimulator } from '../../llm-simulator' +import type { SimulatorScript } from '../../llm-simulator' + +/** + * Helper to collect all chunks from a stream + */ +async function collectChunks(stream: AsyncIterable): Promise> { + const chunks: Array = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + return chunks +} + +describe('Approval Flow Tests', () => { + describe('Approval Requested', () => { + it('should emit approval-requested for tools with needsApproval', async () => { + const script = SimulatorScripts.approvalTool( + 'delete_file', + { path: '/tmp/important.txt' }, + 'The file has been deleted.', + ) + const adapter = createLLMSimulator(script) + + const executeFn = vi.fn(async (args: { path: string }) => { + return JSON.stringify({ deleted: true, path: args.path }) + }) + + const deleteTool = toolDefinition({ + name: 'delete_file', + description: 'Delete a file from the filesystem', + inputSchema: z.object({ + path: z.string().describe('The file path to delete'), + }), + needsApproval: true, + }).server(executeFn) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Delete /tmp/important.txt' }], + tools: [deleteTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Should have approval-requested chunk (emitted as CUSTOM event) + const approvalChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'approval-requested', + ) + expect(approvalChunks.length).toBe(1) + + const approvalChunk = approvalChunks[0] as any + expect(approvalChunk.data.toolName).toBe('delete_file') + expect(approvalChunk.data.input).toEqual({ path: '/tmp/important.txt' }) + expect(approvalChunk.data.approval.needsApproval).toBe(true) + + // Tool should NOT be executed yet (waiting for approval) + expect(executeFn).not.toHaveBeenCalled() + }) + + it('should stop iteration when approval is needed', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'I will delete the file for you.', + toolCalls: [ + { name: 'dangerous_action', arguments: { confirm: true } }, + ], + }, + { + content: 'Action completed.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const executeFn = vi.fn(async () => 'done') + + const tool = toolDefinition({ + name: 'dangerous_action', + description: 'A dangerous action requiring approval', + inputSchema: z.object({ confirm: z.boolean() }), + needsApproval: true, + }).server(executeFn) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Do the dangerous thing' }], + tools: [tool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Should stop after first iteration + const finishedChunks = chunks.filter((c) => c.type === 'RUN_FINISHED') + expect(finishedChunks.length).toBeGreaterThanOrEqual(1) + + // Tool should not be executed + expect(executeFn).not.toHaveBeenCalled() + + // Simulator should be waiting at iteration 1 + expect(adapter.getCurrentIteration()).toBe(1) + }) + }) + + describe('Approval Accepted', () => { + it('should execute tool when approval is granted via messages', async () => { + const script: SimulatorScript = { + iterations: [ + { + // After receiving approval, LLM responds + content: 'The file has been successfully deleted.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const executeFn = vi.fn(async (args: { path: string }) => { + return JSON.stringify({ deleted: true, path: args.path }) + }) + + const deleteTool = toolDefinition({ + name: 'delete_file', + description: 'Delete a file', + inputSchema: z.object({ path: z.string() }), + needsApproval: true, + }).server(executeFn) + + // Messages with approval already granted + const messagesWithApproval = [ + { role: 'user' as const, content: 'Delete /tmp/test.txt' }, + { + role: 'assistant' as const, + content: 'I will delete the file.', + toolCalls: [ + { + id: 'call-1', + type: 'function' as const, + function: { + name: 'delete_file', + arguments: '{"path":"/tmp/test.txt"}', + }, + }, + ], + parts: [ + { + type: 'tool-call' as const, + id: 'call-1', + name: 'delete_file', + arguments: '{"path":"/tmp/test.txt"}', + state: 'approval-responded' as const, + approval: { + id: 'approval_call-1', + needsApproval: true, + approved: true, // User approved + }, + }, + ], + }, + ] + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: messagesWithApproval, + tools: [deleteTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Tool should have been executed because approval was granted + expect(executeFn).toHaveBeenCalledTimes(1) + expect(executeFn).toHaveBeenCalledWith({ path: '/tmp/test.txt' }) + + // Should have tool_result chunk + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(1) + }) + }) + + describe('Approval Denied', () => { + it('should not execute tool when approval is denied', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'I understand. I will not delete the file.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const executeFn = vi.fn(async () => 'deleted') + + const deleteTool = toolDefinition({ + name: 'delete_file', + description: 'Delete a file', + inputSchema: z.object({ path: z.string() }), + needsApproval: true, + }).server(executeFn) + + // Messages with approval denied + const messagesWithDenial = [ + { role: 'user' as const, content: 'Delete /tmp/test.txt' }, + { + role: 'assistant' as const, + content: 'I will delete the file.', + toolCalls: [ + { + id: 'call-1', + type: 'function' as const, + function: { + name: 'delete_file', + arguments: '{"path":"/tmp/test.txt"}', + }, + }, + ], + parts: [ + { + type: 'tool-call' as const, + id: 'call-1', + name: 'delete_file', + arguments: '{"path":"/tmp/test.txt"}', + state: 'approval-responded' as const, + approval: { + id: 'approval_call-1', + needsApproval: true, + approved: false, // User denied + }, + }, + ], + }, + ] + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: messagesWithDenial, + tools: [deleteTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Tool should NOT have been executed + expect(executeFn).not.toHaveBeenCalled() + + // Should have content response + const contentChunks = chunks.filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + expect(contentChunks.length).toBeGreaterThan(0) + }) + }) + + describe('Multiple Approval Tools', () => { + it('should handle multiple tools requiring approval', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'I need to perform two dangerous operations.', + toolCalls: [ + { name: 'tool_a', arguments: { value: 'A' } }, + { name: 'tool_b', arguments: { value: 'B' } }, + ], + }, + { + content: 'Both operations completed.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const executeFnA = vi.fn(async () => 'A done') + const executeFnB = vi.fn(async () => 'B done') + + const toolA = toolDefinition({ + name: 'tool_a', + description: 'Tool A', + inputSchema: z.object({ value: z.string() }), + needsApproval: true, + }).server(executeFnA) + + const toolB = toolDefinition({ + name: 'tool_b', + description: 'Tool B', + inputSchema: z.object({ value: z.string() }), + needsApproval: true, + }).server(executeFnB) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Do both operations' }], + tools: [toolA, toolB], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Should have approval-requested for both tools (emitted as CUSTOM events) + const approvalChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'approval-requested', + ) + expect(approvalChunks.length).toBe(2) + + // Neither tool should be executed + expect(executeFnA).not.toHaveBeenCalled() + expect(executeFnB).not.toHaveBeenCalled() + }) + }) + + describe('Mixed Approval and Non-Approval Tools', () => { + it('should execute non-approval tools and request approval for approval tools', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'I will check status and then delete.', + toolCalls: [ + { name: 'check_status', arguments: { id: '123' } }, + { name: 'delete_item', arguments: { id: '123' } }, + ], + }, + { + content: 'Done.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const checkExecute = vi.fn(async () => ({ status: 'active' })) + const deleteExecute = vi.fn(async () => ({ deleted: true })) + + const checkTool = toolDefinition({ + name: 'check_status', + description: 'Check status', + inputSchema: z.object({ id: z.string() }), + // No needsApproval - will execute immediately + }).server(checkExecute) + + const deleteTool = toolDefinition({ + name: 'delete_item', + description: 'Delete item', + inputSchema: z.object({ id: z.string() }), + needsApproval: true, // Needs approval + }).server(deleteExecute) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Check and delete 123' }], + tools: [checkTool, deleteTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Non-approval tool should execute + expect(checkExecute).toHaveBeenCalledTimes(1) + + // Approval tool should NOT execute (waiting for approval) + expect(deleteExecute).not.toHaveBeenCalled() + + // Should have approval request for delete tool (emitted as CUSTOM event) + const approvalChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'approval-requested', + ) + expect(approvalChunks.length).toBe(1) + expect((approvalChunks[0] as any).data.toolName).toBe('delete_item') + + // Check tool should have been executed (verify via mock call) + expect(checkExecute).toHaveBeenCalledWith({ id: '123' }) + }) + }) +}) diff --git a/packages/typescript/smoke-tests/adapters/src/tests/tools/client-tool.test.ts b/packages/typescript/smoke-tests/adapters/src/tests/tools/client-tool.test.ts new file mode 100644 index 000000000..4a0c82d21 --- /dev/null +++ b/packages/typescript/smoke-tests/adapters/src/tests/tools/client-tool.test.ts @@ -0,0 +1,279 @@ +import { describe, expect, it, vi } from 'vitest' +import { chat, maxIterations, toolDefinition } from '@tanstack/ai' +import { z } from 'zod' +import { SimulatorScripts, createLLMSimulator } from '../../llm-simulator' +import type { SimulatorScript } from '../../llm-simulator' + +/** + * Helper to collect all chunks from a stream + */ +async function collectChunks(stream: AsyncIterable): Promise> { + const chunks: Array = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + return chunks +} + +describe('Client Tool Tests', () => { + describe('Client Tool Without Execute (Definition Only)', () => { + it('should emit tool-input-available for client tool without execute', async () => { + const script = SimulatorScripts.singleClientTool( + 'show_notification', + { message: 'Hello World', type: 'info' }, + 'I have shown the notification.', + ) + const adapter = createLLMSimulator(script) + + // Client tool definition without execute function + const notificationTool = toolDefinition({ + name: 'show_notification', + description: 'Show a notification to the user', + inputSchema: z.object({ + message: z.string(), + type: z.enum(['info', 'warning', 'error']), + }), + }).client() // No execute function - client will handle it + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Show me a hello notification' }], + tools: [notificationTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Should have TOOL_CALL_START chunks + const toolCallChunks = chunks.filter((c) => c.type === 'TOOL_CALL_START') + expect(toolCallChunks.length).toBeGreaterThan(0) + + // Should have tool-input-available chunks (for client-side handling, emitted as CUSTOM event) + const inputAvailableChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'tool-input-available', + ) + expect(inputAvailableChunks.length).toBe(1) + + const inputChunk = inputAvailableChunks[0] as any + expect(inputChunk.data.toolName).toBe('show_notification') + expect(inputChunk.data.input).toEqual({ message: 'Hello World', type: 'info' }) + }) + + it('should stop iteration when client tool needs input', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'I need to show something on screen.', + toolCalls: [ + { name: 'render_component', arguments: { component: 'Chart' } }, + ], + }, + { + // This iteration should NOT be reached until client provides result + content: 'The component has been rendered.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const clientTool = toolDefinition({ + name: 'render_component', + description: 'Render a UI component', + inputSchema: z.object({ component: z.string() }), + }).client() // No execute - waits for client + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Show me a chart' }], + tools: [clientTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // The stream should stop after first iteration (waiting for client) + const finishedChunks = chunks.filter((c) => c.type === 'RUN_FINISHED') + expect(finishedChunks.length).toBeGreaterThanOrEqual(1) + + // Should have tool-input-available (emitted as CUSTOM event) + const inputChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'tool-input-available', + ) + expect(inputChunks.length).toBe(1) + + // Simulator should still be on iteration 1 (not advanced) + expect(adapter.getCurrentIteration()).toBe(1) + }) + }) + + describe('Client Tool With Execute', () => { + it('should execute client tool with execute function', async () => { + const script = SimulatorScripts.singleClientTool( + 'get_location', + {}, + 'You are in New York.', + ) + const adapter = createLLMSimulator(script) + + const executeFn = vi.fn(async () => { + return { latitude: 40.7128, longitude: -74.006, city: 'New York' } + }) + + const locationTool = toolDefinition({ + name: 'get_location', + description: 'Get current location', + inputSchema: z.object({}), + outputSchema: z.object({ + latitude: z.number(), + longitude: z.number(), + city: z.string(), + }), + }).client(executeFn) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Where am I?' }], + tools: [locationTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Client tool with execute should behave like server tool + expect(executeFn).toHaveBeenCalledTimes(1) + + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(1) + + const result = JSON.parse((toolResultChunks[0] as any).result) + expect(result.city).toBe('New York') + }) + }) + + describe('Simulating Client Tool Results (Message Injection)', () => { + it('should continue when client tool result is provided via messages', async () => { + // This simulates what happens when client sends back tool result + const script: SimulatorScript = { + iterations: [ + { + // LLM will receive the tool result and respond + content: + 'Based on the uploaded file, I can see it contains 100 lines.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const uploadTool = toolDefinition({ + name: 'upload_file', + description: 'Upload a file', + inputSchema: z.object({ filename: z.string() }), + }).client() // No execute - client handles + + // Simulate messages with tool result already present + // (as if client had previously provided the result) + const messagesWithToolResult = [ + { role: 'user' as const, content: 'Upload my file' }, + { + role: 'assistant' as const, + content: 'I will upload the file for you.', + toolCalls: [ + { + id: 'call-1', + type: 'function' as const, + function: { + name: 'upload_file', + arguments: '{"filename":"test.txt"}', + }, + }, + ], + parts: [ + { + type: 'tool-call' as const, + id: 'call-1', + name: 'upload_file', + arguments: '{"filename":"test.txt"}', + state: 'complete' as const, + output: { success: true, lines: 100 }, + }, + ], + }, + ] + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: messagesWithToolResult, + tools: [uploadTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Should get the response content + const contentChunks = chunks.filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + expect(contentChunks.length).toBeGreaterThan(0) + + const fullContent = contentChunks.map((c) => (c as any).content).join('') + expect(fullContent).toContain('100 lines') + }) + }) + + describe('Mixed Client Tools', () => { + it('should handle multiple client tools with different states', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Let me help with both tasks.', + toolCalls: [ + { name: 'client_tool_a', arguments: { value: 'A' } }, + { name: 'client_tool_b', arguments: { value: 'B' } }, + ], + }, + { + content: 'Both tasks completed.', + }, + ], + } + const adapter = createLLMSimulator(script) + + // One client tool with execute, one without + const toolA = toolDefinition({ + name: 'client_tool_a', + description: 'Tool A', + inputSchema: z.object({ value: z.string() }), + }).client(async (args) => ({ processed: args.value })) + + const toolB = toolDefinition({ + name: 'client_tool_b', + description: 'Tool B', + inputSchema: z.object({ value: z.string() }), + }).client() // No execute + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Do both' }], + tools: [toolA, toolB], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Tool B should have tool-input-available (no execute, emitted as CUSTOM event) + const inputChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'tool-input-available', + ) + expect(inputChunks.length).toBeGreaterThanOrEqual(1) + + // At least one should be for tool_b + const toolBInputs = inputChunks.filter( + (c: any) => c.data?.toolName === 'client_tool_b', + ) + expect(toolBInputs.length).toBe(1) + }) + }) +}) diff --git a/packages/typescript/smoke-tests/adapters/src/tests/tools/error-handling.test.ts b/packages/typescript/smoke-tests/adapters/src/tests/tools/error-handling.test.ts new file mode 100644 index 000000000..125b271cd --- /dev/null +++ b/packages/typescript/smoke-tests/adapters/src/tests/tools/error-handling.test.ts @@ -0,0 +1,350 @@ +import { describe, expect, it, vi } from 'vitest' +import { chat, maxIterations, toolDefinition } from '@tanstack/ai' +import { z } from 'zod' +import { createLLMSimulator } from '../../llm-simulator' +import type { SimulatorScript } from '../../llm-simulator' + +/** + * Helper to collect all chunks from a stream + */ +async function collectChunks(stream: AsyncIterable): Promise> { + const chunks: Array = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + return chunks +} + +describe('Error Handling Tests', () => { + describe('Tool Execution Errors', () => { + it('should handle tool that throws an error', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Let me try that operation.', + toolCalls: [{ name: 'failing_tool', arguments: { input: 'test' } }], + }, + { + content: 'I encountered an error.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const failingExecute = vi.fn(async () => { + throw new Error('Tool execution failed: database connection error') + }) + + const failingTool = toolDefinition({ + name: 'failing_tool', + description: 'A tool that fails', + inputSchema: z.object({ input: z.string() }), + }).server(failingExecute) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Run the failing tool' }], + tools: [failingTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Tool should have been called + expect(failingExecute).toHaveBeenCalledTimes(1) + + // Should have a tool result with error + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(1) + + const result = JSON.parse((toolResultChunks[0] as any).result) + expect(result.error).toContain('database connection error') + }) + + it('should handle async rejection in tool', async () => { + const script: SimulatorScript = { + iterations: [ + { + toolCalls: [{ name: 'async_fail', arguments: {} }], + }, + { + content: 'Error handled.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const asyncFailExecute = vi.fn(async () => { + return Promise.reject(new Error('Async rejection')) + }) + + const tool = toolDefinition({ + name: 'async_fail', + description: 'Async failing tool', + inputSchema: z.object({}), + }).server(asyncFailExecute) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'test' }], + tools: [tool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(1) + + const result = JSON.parse((toolResultChunks[0] as any).result) + expect(result.error).toContain('Async rejection') + }) + }) + + describe('Unknown Tool', () => { + it('should handle call to unknown tool gracefully', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Using the tool.', + toolCalls: [{ name: 'unknown_tool', arguments: { x: 1 } }], + }, + { + content: 'Done.', + }, + ], + } + const adapter = createLLMSimulator(script) + + // Only register a different tool + const knownTool = toolDefinition({ + name: 'known_tool', + description: 'A known tool', + inputSchema: z.object({}), + }).server(async () => 'result') + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'test' }], + tools: [knownTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Should have a tool result with error about unknown tool + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(1) + + const result = JSON.parse((toolResultChunks[0] as any).result) + expect(result.error).toContain('Unknown tool') + }) + }) + + describe('Tool With No Execute', () => { + it('should emit tool-input-available for tool definition without execute', async () => { + const script: SimulatorScript = { + iterations: [ + { + toolCalls: [ + { name: 'no_execute_tool', arguments: { data: 'test' } }, + ], + }, + ], + } + const adapter = createLLMSimulator(script) + + // Tool definition only (no .server() or .client() with execute) + const toolDef = toolDefinition({ + name: 'no_execute_tool', + description: 'Tool without execute', + inputSchema: z.object({ data: z.string() }), + }) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'test' }], + tools: [toolDef], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Should emit tool-input-available since there's no execute (emitted as CUSTOM event) + const inputChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'tool-input-available', + ) + expect(inputChunks.length).toBe(1) + }) + }) + + describe('Empty Tool Calls', () => { + it('should handle iteration with no tool calls gracefully', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'I will just respond without tools.', + // No toolCalls + }, + ], + } + const adapter = createLLMSimulator(script) + + const tool = toolDefinition({ + name: 'unused_tool', + description: 'Tool', + inputSchema: z.object({}), + }).server(vi.fn()) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'test' }], + tools: [tool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Should have content but no tool calls or results + const contentChunks = chunks.filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + const toolCallChunks = chunks.filter((c) => c.type === 'TOOL_CALL_START') + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + + expect(contentChunks.length).toBeGreaterThan(0) + expect(toolCallChunks.length).toBe(0) + expect(toolResultChunks.length).toBe(0) + }) + }) + + describe('Max Iterations', () => { + it('should stop after max iterations are reached', async () => { + // Script that would loop forever + const script: SimulatorScript = { + iterations: [ + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + { toolCalls: [{ name: 'loop_tool', arguments: {} }] }, + ], + } + const adapter = createLLMSimulator(script) + + const execute = vi.fn(async () => 'continue') + + const tool = toolDefinition({ + name: 'loop_tool', + description: 'Looping tool', + inputSchema: z.object({}), + }).server(execute) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'loop' }], + tools: [tool], + agentLoopStrategy: maxIterations(3), // Limit to 3 iterations + }) + + const chunks = await collectChunks(stream) + + // Should stop at max iterations + expect(execute.mock.calls.length).toBeLessThanOrEqual(3) + + // Should have RUN_FINISHED chunks + const finishedChunks = chunks.filter((c) => c.type === 'RUN_FINISHED') + expect(finishedChunks.length).toBeGreaterThan(0) + }) + }) + + describe('Tool Returns Non-String', () => { + it('should handle tool returning object (auto-stringify)', async () => { + const script: SimulatorScript = { + iterations: [ + { + toolCalls: [{ name: 'object_tool', arguments: {} }], + }, + { + content: 'Got the object.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const tool = toolDefinition({ + name: 'object_tool', + description: 'Returns object', + inputSchema: z.object({}), + }).server(async () => { + // Return object directly (should be stringified) + return { key: 'value', nested: { a: 1 } } + }) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'test' }], + tools: [tool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(1) + + // Should be valid JSON + const result = (toolResultChunks[0] as any).result + const parsed = JSON.parse(result) + expect(parsed.key).toBe('value') + expect(parsed.nested.a).toBe(1) + }) + + it('should handle tool returning number', async () => { + const script: SimulatorScript = { + iterations: [ + { + toolCalls: [{ name: 'number_tool', arguments: {} }], + }, + { + content: 'Got the number.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const tool = toolDefinition({ + name: 'number_tool', + description: 'Returns number', + inputSchema: z.object({}), + }).server(async () => 42) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'test' }], + tools: [tool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(1) + + // Number should be stringified + const result = (toolResultChunks[0] as any).result + expect(result).toBe('42') + }) + }) +}) diff --git a/packages/typescript/smoke-tests/adapters/src/tests/tools/multi-tool.test.ts b/packages/typescript/smoke-tests/adapters/src/tests/tools/multi-tool.test.ts new file mode 100644 index 000000000..32894484e --- /dev/null +++ b/packages/typescript/smoke-tests/adapters/src/tests/tools/multi-tool.test.ts @@ -0,0 +1,268 @@ +import { describe, expect, it, vi } from 'vitest' +import { chat, maxIterations, toolDefinition } from '@tanstack/ai' +import { z } from 'zod' +import { SimulatorScripts, createLLMSimulator } from '../../llm-simulator' +import type { SimulatorScript } from '../../llm-simulator' + +/** + * Helper to collect all chunks from a stream + */ +async function collectChunks(stream: AsyncIterable): Promise> { + const chunks: Array = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + return chunks +} + +describe('Multi-Tool Tests', () => { + describe('Parallel Tool Execution', () => { + it('should execute multiple tools in the same iteration', async () => { + const script = SimulatorScripts.parallelTools( + [ + { name: 'get_weather', args: { city: 'NYC' } }, + { name: 'get_time', args: { timezone: 'EST' } }, + { name: 'get_news', args: { category: 'tech' } }, + ], + 'Here is the weather, time, and news.', + ) + const adapter = createLLMSimulator(script) + + const weatherExecute = vi.fn(async () => + JSON.stringify({ temp: 72, condition: 'sunny' }), + ) + const timeExecute = vi.fn(async () => + JSON.stringify({ time: '14:30', timezone: 'EST' }), + ) + const newsExecute = vi.fn(async () => + JSON.stringify({ headlines: ['AI advances'] }), + ) + + const weatherTool = toolDefinition({ + name: 'get_weather', + description: 'Get weather', + inputSchema: z.object({ city: z.string() }), + }).server(weatherExecute) + + const timeTool = toolDefinition({ + name: 'get_time', + description: 'Get time', + inputSchema: z.object({ timezone: z.string() }), + }).server(timeExecute) + + const newsTool = toolDefinition({ + name: 'get_news', + description: 'Get news', + inputSchema: z.object({ category: z.string() }), + }).server(newsExecute) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [ + { role: 'user', content: 'Give me weather, time, and news' }, + ], + tools: [weatherTool, timeTool, newsTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // All three tools should be executed + expect(weatherExecute).toHaveBeenCalledTimes(1) + expect(timeExecute).toHaveBeenCalledTimes(1) + expect(newsExecute).toHaveBeenCalledTimes(1) + + // Should have 3 tool results + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(3) + + // Should have 3 tool calls + const toolCallChunks = chunks.filter((c) => c.type === 'TOOL_CALL_START') + expect(toolCallChunks.length).toBe(3) + }) + + it('should handle different tool types in parallel', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Executing multiple operations.', + toolCalls: [ + { name: 'server_tool', arguments: { value: 1 } }, + { name: 'approval_tool', arguments: { action: 'delete' } }, + { name: 'client_tool', arguments: { display: 'chart' } }, + ], + }, + { + content: 'Operations initiated.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const serverExecute = vi.fn(async () => ({ result: 'server done' })) + const approvalExecute = vi.fn(async () => ({ + result: 'approved action done', + })) + + const serverTool = toolDefinition({ + name: 'server_tool', + description: 'Server tool', + inputSchema: z.object({ value: z.number() }), + }).server(serverExecute) + + const approvalTool = toolDefinition({ + name: 'approval_tool', + description: 'Approval tool', + inputSchema: z.object({ action: z.string() }), + needsApproval: true, + }).server(approvalExecute) + + const clientTool = toolDefinition({ + name: 'client_tool', + description: 'Client tool', + inputSchema: z.object({ display: z.string() }), + }).client() // No execute + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Do all three' }], + tools: [serverTool, approvalTool, clientTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Server tool should execute + expect(serverExecute).toHaveBeenCalledTimes(1) + + // Approval tool should NOT execute (waiting for approval) + expect(approvalExecute).not.toHaveBeenCalled() + + // Should have approval-requested for approval tool (emitted as CUSTOM event) + const approvalChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'approval-requested', + ) + expect(approvalChunks.length).toBe(1) + + // Should have tool-input-available for client tool (emitted as CUSTOM event) + const inputChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'tool-input-available', + ) + expect(inputChunks.length).toBe(1) + }) + }) + + describe('Same Tool Called Multiple Times', () => { + it('should handle the same tool called multiple times with different args', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Checking multiple cities.', + toolCalls: [ + { name: 'get_weather', arguments: { city: 'NYC' }, id: 'call-1' }, + { name: 'get_weather', arguments: { city: 'LA' }, id: 'call-2' }, + { + name: 'get_weather', + arguments: { city: 'Chicago' }, + id: 'call-3', + }, + ], + }, + { + content: 'Here is the weather for all three cities.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const weatherExecute = vi.fn(async (args: { city: string }) => { + const temps: Record = { NYC: 70, LA: 85, Chicago: 60 } + return JSON.stringify({ city: args.city, temp: temps[args.city] || 0 }) + }) + + const weatherTool = toolDefinition({ + name: 'get_weather', + description: 'Get weather', + inputSchema: z.object({ city: z.string() }), + }).server(weatherExecute) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [ + { role: 'user', content: 'Weather in NYC, LA, and Chicago' }, + ], + tools: [weatherTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Should be called 3 times + expect(weatherExecute).toHaveBeenCalledTimes(3) + expect(weatherExecute).toHaveBeenCalledWith({ city: 'NYC' }) + expect(weatherExecute).toHaveBeenCalledWith({ city: 'LA' }) + expect(weatherExecute).toHaveBeenCalledWith({ city: 'Chicago' }) + + // Should have 3 tool results + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(3) + }) + }) + + describe('Tool Selection', () => { + it('should only execute tools that are called', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'I only need tool B.', + toolCalls: [{ name: 'tool_b', arguments: {} }], + }, + { + content: 'Done with B.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const executeA = vi.fn(async () => 'A') + const executeB = vi.fn(async () => 'B') + const executeC = vi.fn(async () => 'C') + + const toolA = toolDefinition({ + name: 'tool_a', + description: 'Tool A', + inputSchema: z.object({}), + }).server(executeA) + + const toolB = toolDefinition({ + name: 'tool_b', + description: 'Tool B', + inputSchema: z.object({}), + }).server(executeB) + + const toolC = toolDefinition({ + name: 'tool_c', + description: 'Tool C', + inputSchema: z.object({}), + }).server(executeC) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Just use B' }], + tools: [toolA, toolB, toolC], + agentLoopStrategy: maxIterations(10), + }) + + await collectChunks(stream) + + // Only B should be executed + expect(executeA).not.toHaveBeenCalled() + expect(executeB).toHaveBeenCalledTimes(1) + expect(executeC).not.toHaveBeenCalled() + }) + }) +}) diff --git a/packages/typescript/smoke-tests/adapters/src/tests/tools/sequences.test.ts b/packages/typescript/smoke-tests/adapters/src/tests/tools/sequences.test.ts new file mode 100644 index 000000000..fa2444cc1 --- /dev/null +++ b/packages/typescript/smoke-tests/adapters/src/tests/tools/sequences.test.ts @@ -0,0 +1,419 @@ +import { describe, expect, it, vi } from 'vitest' +import { chat, maxIterations, toolDefinition } from '@tanstack/ai' +import { z } from 'zod' +import { SimulatorScripts, createLLMSimulator } from '../../llm-simulator' +import type { SimulatorScript } from '../../llm-simulator' + +/** + * Helper to collect all chunks from a stream + */ +async function collectChunks(stream: AsyncIterable): Promise> { + const chunks: Array = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + return chunks +} + +describe('Tool Sequence Tests', () => { + describe('Server Tool -> Server Tool', () => { + it('should execute sequential server tools', async () => { + const script = SimulatorScripts.sequentialTools( + { name: 'get_user', args: { userId: '123' } }, + { name: 'get_orders', args: { userId: '123' } }, + 'User has 5 orders.', + ) + const adapter = createLLMSimulator(script) + + const getUserExecute = vi.fn(async (args: { userId: string }) => { + return JSON.stringify({ id: args.userId, name: 'John' }) + }) + + const getOrdersExecute = vi.fn(async (args: { userId: string }) => { + return JSON.stringify({ orders: [1, 2, 3, 4, 5], count: 5 }) + }) + + const getUserTool = toolDefinition({ + name: 'get_user', + description: 'Get user by ID', + inputSchema: z.object({ userId: z.string() }), + }).server(getUserExecute) + + const getOrdersTool = toolDefinition({ + name: 'get_orders', + description: 'Get orders for user', + inputSchema: z.object({ userId: z.string() }), + }).server(getOrdersExecute) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Get orders for user 123' }], + tools: [getUserTool, getOrdersTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Both tools should be executed in sequence + expect(getUserExecute).toHaveBeenCalledTimes(1) + expect(getOrdersExecute).toHaveBeenCalledTimes(1) + + // Verify call order + expect(getUserExecute.mock.invocationCallOrder[0]).toBeLessThan( + getOrdersExecute.mock.invocationCallOrder[0]!, + ) + + // Should have 2 tool results + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(2) + }) + + it('should pass first tool result to context for second tool', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Let me fetch the data.', + toolCalls: [{ name: 'fetch_data', arguments: { source: 'api' } }], + }, + { + content: 'Now I will process it.', + toolCalls: [ + { name: 'process_data', arguments: { format: 'json' } }, + ], + }, + { + content: 'Data processed successfully.', + }, + ], + } + const adapter = createLLMSimulator(script) + + let fetchResult = '' + const fetchExecute = vi.fn(async () => { + fetchResult = JSON.stringify({ raw: 'data123' }) + return fetchResult + }) + + const processExecute = vi.fn(async () => { + // In a real scenario, this would use the fetch result + return JSON.stringify({ processed: true }) + }) + + const fetchTool = toolDefinition({ + name: 'fetch_data', + description: 'Fetch data', + inputSchema: z.object({ source: z.string() }), + }).server(fetchExecute) + + const processTool = toolDefinition({ + name: 'process_data', + description: 'Process data', + inputSchema: z.object({ format: z.string() }), + }).server(processExecute) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Fetch and process data' }], + tools: [fetchTool, processTool], + agentLoopStrategy: maxIterations(10), + }) + + await collectChunks(stream) + + expect(fetchExecute).toHaveBeenCalledTimes(1) + expect(processExecute).toHaveBeenCalledTimes(1) + + // Process should be called after fetch + expect(fetchExecute.mock.invocationCallOrder[0]).toBeLessThan( + processExecute.mock.invocationCallOrder[0]!, + ) + }) + }) + + describe('Server Tool -> Client Tool', () => { + it('should execute server tool then request client tool input', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'First, let me check the data.', + toolCalls: [{ name: 'server_check', arguments: { id: 'abc' } }], + }, + { + content: 'Now please confirm on screen.', + toolCalls: [ + { name: 'client_confirm', arguments: { message: 'Proceed?' } }, + ], + }, + { + content: 'Great, all done!', + }, + ], + } + const adapter = createLLMSimulator(script) + + const serverExecute = vi.fn(async () => JSON.stringify({ valid: true })) + + const serverTool = toolDefinition({ + name: 'server_check', + description: 'Server-side check', + inputSchema: z.object({ id: z.string() }), + }).server(serverExecute) + + const clientTool = toolDefinition({ + name: 'client_confirm', + description: 'Client-side confirmation', + inputSchema: z.object({ message: z.string() }), + }).client() // No execute - handled by client + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Check and confirm' }], + tools: [serverTool, clientTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Server tool should execute + expect(serverExecute).toHaveBeenCalledTimes(1) + + // Should have tool result for server tool + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(1) + + // Should have tool-input-available for client tool (emitted as CUSTOM event) + const inputChunks = chunks.filter( + (c: any) => c.type === 'CUSTOM' && c.name === 'tool-input-available', + ) + expect(inputChunks.length).toBe(1) + expect((inputChunks[0] as any).data.toolName).toBe('client_confirm') + }) + }) + + describe('Client Tool -> Server Tool', () => { + it('should execute client tool result then continue to server tool', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Now I will process on server.', + toolCalls: [ + { name: 'server_process', arguments: { data: 'processed' } }, + ], + }, + { + content: 'Processing complete.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const serverExecute = vi.fn(async (args: any) => { + return { result: args.data + '_done' } + }) + + const clientTool = toolDefinition({ + name: 'client_collect', + description: 'Collect input from client', + inputSchema: z.object({}), + }).client() + + const serverTool = toolDefinition({ + name: 'server_process', + description: 'Process on server', + inputSchema: z.object({ data: z.string() }), + }).server(serverExecute) + + // Simulate that client tool already completed + const messagesWithClientResult = [ + { role: 'user' as const, content: 'Collect and process' }, + { + role: 'assistant' as const, + content: 'Let me collect your input.', + toolCalls: [ + { + id: 'call-1', + type: 'function' as const, + function: { + name: 'client_collect', + arguments: '{}', + }, + }, + ], + parts: [ + { + type: 'tool-call' as const, + id: 'call-1', + name: 'client_collect', + arguments: '{}', + state: 'complete' as const, + output: { userInput: 'client_data' }, + }, + ], + }, + ] + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: messagesWithClientResult, + tools: [clientTool, serverTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Server tool should execute + expect(serverExecute).toHaveBeenCalledTimes(1) + expect(serverExecute).toHaveBeenCalledWith({ data: 'processed' }) + + // Should have tool results (may include the client tool result that was injected) + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBeGreaterThanOrEqual(1) + }) + }) + + describe('Three Tool Sequence', () => { + it('should handle A -> B -> C tool sequence', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Step 1', + toolCalls: [{ name: 'tool_a', arguments: { step: 1 } }], + }, + { + content: 'Step 2', + toolCalls: [{ name: 'tool_b', arguments: { step: 2 } }], + }, + { + content: 'Step 3', + toolCalls: [{ name: 'tool_c', arguments: { step: 3 } }], + }, + { + content: 'All three steps completed.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const callOrder: string[] = [] + + const toolA = toolDefinition({ + name: 'tool_a', + description: 'Tool A', + inputSchema: z.object({ step: z.number() }), + }).server(async () => { + callOrder.push('A') + return 'A done' + }) + + const toolB = toolDefinition({ + name: 'tool_b', + description: 'Tool B', + inputSchema: z.object({ step: z.number() }), + }).server(async () => { + callOrder.push('B') + return 'B done' + }) + + const toolC = toolDefinition({ + name: 'tool_c', + description: 'Tool C', + inputSchema: z.object({ step: z.number() }), + }).server(async () => { + callOrder.push('C') + return 'C done' + }) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Do A, B, C' }], + tools: [toolA, toolB, toolC], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // All tools should execute in order + expect(callOrder).toEqual(['A', 'B', 'C']) + + // Should have 3 tool results + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(3) + }) + }) + + describe('Parallel Tools in Sequence', () => { + it('should handle parallel tools followed by another tool', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'First, getting data from two sources.', + toolCalls: [ + { name: 'source_a', arguments: {} }, + { name: 'source_b', arguments: {} }, + ], + }, + { + content: 'Now combining results.', + toolCalls: [{ name: 'combine', arguments: {} }], + }, + { + content: 'Here are the combined results.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const callOrder: string[] = [] + + const sourceA = toolDefinition({ + name: 'source_a', + description: 'Source A', + inputSchema: z.object({}), + }).server(async () => { + callOrder.push('A') + return JSON.stringify({ source: 'A', data: [1, 2] }) + }) + + const sourceB = toolDefinition({ + name: 'source_b', + description: 'Source B', + inputSchema: z.object({}), + }).server(async () => { + callOrder.push('B') + return JSON.stringify({ source: 'B', data: [3, 4] }) + }) + + const combine = toolDefinition({ + name: 'combine', + description: 'Combine data', + inputSchema: z.object({}), + }).server(async () => { + callOrder.push('combine') + return JSON.stringify({ combined: [1, 2, 3, 4] }) + }) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Get and combine data' }], + tools: [sourceA, sourceB, combine], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // A and B should be called before combine + expect(callOrder.indexOf('A')).toBeLessThan(callOrder.indexOf('combine')) + expect(callOrder.indexOf('B')).toBeLessThan(callOrder.indexOf('combine')) + + // Should have 3 tool results + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(3) + }) + }) +}) diff --git a/packages/typescript/smoke-tests/adapters/src/tests/tools/server-tool.test.ts b/packages/typescript/smoke-tests/adapters/src/tests/tools/server-tool.test.ts new file mode 100644 index 000000000..e32d30877 --- /dev/null +++ b/packages/typescript/smoke-tests/adapters/src/tests/tools/server-tool.test.ts @@ -0,0 +1,343 @@ +import { describe, expect, it, vi } from 'vitest' +import { chat, maxIterations, toolDefinition } from '@tanstack/ai' +import { z } from 'zod' +import { SimulatorScripts, createLLMSimulator } from '../../llm-simulator' +import type { SimulatorScript } from '../../llm-simulator' + +/** + * Helper to collect all chunks from a stream + */ +async function collectChunks(stream: AsyncIterable): Promise> { + const chunks: Array = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + return chunks +} + +describe('Server Tool Tests', () => { + describe('Single Server Tool Execution', () => { + it('should execute a server tool and return the result', async () => { + const script = SimulatorScripts.singleServerTool( + 'get_temperature', + { location: 'San Francisco' }, + 'The temperature in San Francisco is 70 degrees.', + ) + const adapter = createLLMSimulator(script) + + const executeFn = vi.fn(async (args: { location: string }) => { + return `${args.location}: 70°F` + }) + + const temperatureTool = toolDefinition({ + name: 'get_temperature', + description: 'Get the current temperature for a location', + inputSchema: z.object({ + location: z.string().describe('The city name'), + }), + }).server(executeFn) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [ + { + role: 'user', + content: 'What is the temperature in San Francisco?', + }, + ], + tools: [temperatureTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + // Verify the tool was called + expect(executeFn).toHaveBeenCalledTimes(1) + expect(executeFn).toHaveBeenCalledWith({ location: 'San Francisco' }) + + // Verify we got tool call and tool result chunks + const toolCallChunks = chunks.filter((c) => c.type === 'TOOL_CALL_START') + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + + expect(toolCallChunks.length).toBeGreaterThan(0) + expect(toolResultChunks.length).toBe(1) + + // Verify the tool result content + const resultChunk = toolResultChunks[0] as any + expect(resultChunk.result).toContain('San Francisco') + expect(resultChunk.result).toContain('70') + }) + + it('should handle a tool with complex nested arguments', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Let me search for that.', + toolCalls: [ + { + name: 'search_products', + arguments: { + query: 'laptop', + filters: { + minPrice: 500, + maxPrice: 2000, + brands: ['Apple', 'Dell'], + }, + limit: 10, + }, + }, + ], + }, + { + content: 'I found 5 laptops matching your criteria.', + }, + ], + } + const adapter = createLLMSimulator(script) + + const executeFn = vi.fn(async (args: any) => { + return JSON.stringify({ + products: [{ name: 'MacBook Pro', price: 1999 }], + total: 5, + }) + }) + + const searchTool = toolDefinition({ + name: 'search_products', + description: 'Search for products', + inputSchema: z.object({ + query: z.string(), + filters: z.object({ + minPrice: z.number(), + maxPrice: z.number(), + brands: z.array(z.string()), + }), + limit: z.number(), + }), + }).server(executeFn) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Find me a laptop' }], + tools: [searchTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + expect(executeFn).toHaveBeenCalledTimes(1) + expect(executeFn).toHaveBeenCalledWith({ + query: 'laptop', + filters: { + minPrice: 500, + maxPrice: 2000, + brands: ['Apple', 'Dell'], + }, + limit: 10, + }) + + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + expect(toolResultChunks.length).toBe(1) + }) + + it('should handle a tool that returns JSON', async () => { + const script = SimulatorScripts.singleServerTool( + 'get_user', + { userId: '123' }, + 'Here is the user information.', + ) + const adapter = createLLMSimulator(script) + + // Return an object (will be JSON.stringified by the framework) + const executeFn = vi.fn(async (args: { userId: string }) => { + return { + id: args.userId, + name: 'John Doe', + email: 'john@example.com', + } + }) + + const getUserTool = toolDefinition({ + name: 'get_user', + description: 'Get user by ID', + inputSchema: z.object({ + userId: z.string(), + }), + outputSchema: z.object({ + id: z.string(), + name: z.string(), + email: z.string(), + }), + }).server(executeFn) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Get user 123' }], + tools: [getUserTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + + expect(toolResultChunks.length).toBe(1) + // The result is the JSON-stringified tool output + const result = (toolResultChunks[0] as any).result + expect(result).toContain('123') + expect(result).toContain('John Doe') + }) + + it('should handle tool that returns an object result', async () => { + const script = SimulatorScripts.singleServerTool( + 'echo', + { message: 'Hello' }, + 'Echo complete.', + ) + const adapter = createLLMSimulator(script) + + // Return an object (framework handles stringification) + const executeFn = vi.fn(async (args: { message: string }) => { + return { echoed: args.message.toUpperCase() } + }) + + const echoTool = toolDefinition({ + name: 'echo', + description: 'Echo a message', + inputSchema: z.object({ message: z.string() }), + }).server(executeFn) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Echo hello' }], + tools: [echoTool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + + expect(executeFn).toHaveBeenCalledWith({ message: 'Hello' }) + expect(toolResultChunks.length).toBe(1) + expect((toolResultChunks[0] as any).result).toContain('HELLO') + }) + }) + + describe('Tool Execution Tracking', () => { + it('should track tool call ID correctly', async () => { + const script: SimulatorScript = { + iterations: [ + { + toolCalls: [ + { name: 'test_tool', arguments: {}, id: 'custom-call-id-123' }, + ], + }, + { content: 'Done' }, + ], + } + const adapter = createLLMSimulator(script) + + const executeFn = vi.fn(async () => 'result') + + const tool = toolDefinition({ + name: 'test_tool', + description: 'Test tool', + inputSchema: z.object({}), + }).server(executeFn) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'test' }], + tools: [tool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + const toolCallChunks = chunks.filter((c) => c.type === 'TOOL_CALL_START') + const toolResultChunks = chunks.filter((c) => c.type === 'TOOL_CALL_END') + + expect(toolCallChunks.length).toBeGreaterThan(0) + expect((toolCallChunks[0] as any).toolCallId).toBe('custom-call-id-123') + expect((toolResultChunks[0] as any).toolCallId).toBe('custom-call-id-123') + }) + + it('should generate tool call ID if not provided', async () => { + const script: SimulatorScript = { + iterations: [ + { + toolCalls: [{ name: 'test_tool', arguments: {} }], + }, + { content: 'Done' }, + ], + } + const adapter = createLLMSimulator(script) + + const tool = toolDefinition({ + name: 'test_tool', + description: 'Test tool', + inputSchema: z.object({}), + }).server(async () => 'result') + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'test' }], + tools: [tool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + const toolCallChunks = chunks.filter((c) => c.type === 'TOOL_CALL_START') + + expect(toolCallChunks.length).toBeGreaterThan(0) + expect((toolCallChunks[0] as any).toolCallId).toMatch(/^call-\d+-\d+$/) + }) + }) + + describe('Content and Tool Call Together', () => { + it('should handle content followed by tool call in same iteration', async () => { + const script: SimulatorScript = { + iterations: [ + { + content: 'Let me check that for you.', + toolCalls: [ + { name: 'check_status', arguments: { id: 'order-123' } }, + ], + }, + { content: 'Your order is on its way!' }, + ], + } + const adapter = createLLMSimulator(script) + + const executeFn = vi.fn(async () => JSON.stringify({ status: 'shipped' })) + + const tool = toolDefinition({ + name: 'check_status', + description: 'Check order status', + inputSchema: z.object({ id: z.string() }), + }).server(executeFn) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages: [{ role: 'user', content: 'Check my order' }], + tools: [tool], + agentLoopStrategy: maxIterations(10), + }) + + const chunks = await collectChunks(stream) + + const contentChunks = chunks.filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + const toolCallChunks = chunks.filter((c) => c.type === 'TOOL_CALL_START') + + // Should have content chunks from both iterations + expect(contentChunks.length).toBeGreaterThan(0) + expect(toolCallChunks.length).toBeGreaterThan(0) + expect(executeFn).toHaveBeenCalledWith({ id: 'order-123' }) + }) + }) +}) diff --git a/packages/typescript/smoke-tests/adapters/vitest.config.ts b/packages/typescript/smoke-tests/adapters/vitest.config.ts new file mode 100644 index 000000000..fbb124cd4 --- /dev/null +++ b/packages/typescript/smoke-tests/adapters/vitest.config.ts @@ -0,0 +1,26 @@ +import { defineConfig } from 'vitest/config' +import path from 'path' + +export default defineConfig({ + resolve: { + alias: { + '@tanstack/ai': path.resolve(__dirname, '../../ai/src/index.ts'), + }, + }, + test: { + globals: true, + environment: 'node', + include: ['src/**/*.test.ts'], + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html', 'lcov'], + exclude: [ + 'node_modules/', + 'dist/', + 'output/', + '**/*.test.ts', + '**/*.config.ts', + ], + }, + }, +}) diff --git a/packages/typescript/smoke-tests/e2e/package.json b/packages/typescript/smoke-tests/e2e/package.json index 530c4b87a..58b9cbbbf 100644 --- a/packages/typescript/smoke-tests/e2e/package.json +++ b/packages/typescript/smoke-tests/e2e/package.json @@ -22,10 +22,12 @@ "@tanstack/react-router": "^1.141.1", "@tanstack/react-start": "^1.141.1", "@tanstack/router-plugin": "^1.139.7", + "@tanstack/tests-adapters": "workspace:*", "react": "^19.2.3", "react-dom": "^19.2.3", "tailwindcss": "^4.1.18", - "vite-tsconfig-paths": "^5.1.4" + "vite-tsconfig-paths": "^5.1.4", + "zod": "^4.2.0" }, "devDependencies": { "@playwright/test": "^1.57.0", diff --git a/packages/typescript/smoke-tests/e2e/src/routes/api.tools-test.ts b/packages/typescript/smoke-tests/e2e/src/routes/api.tools-test.ts new file mode 100644 index 000000000..5eed91e07 --- /dev/null +++ b/packages/typescript/smoke-tests/e2e/src/routes/api.tools-test.ts @@ -0,0 +1,553 @@ +import { createFileRoute } from '@tanstack/react-router' +import { + chat, + maxIterations, + toServerSentEventsResponse, + toolDefinition, +} from '@tanstack/ai' +import { z } from 'zod' +import { + createLLMSimulator, + type SimulatorScript, +} from '@tanstack/tests-adapters' + +/** + * Pre-defined test scenarios for tool testing + */ +const SCENARIOS: Record = { + // Simple text response (no tools) + 'text-only': { + iterations: [ + { + content: 'This is a simple text response without any tools.', + }, + ], + }, + + // Single server tool + 'server-tool-single': { + iterations: [ + { + content: 'Let me get the weather for you.', + toolCalls: [ + { name: 'get_weather', arguments: { city: 'San Francisco' } }, + ], + }, + { + content: 'The weather in San Francisco is 72°F and sunny.', + }, + ], + }, + + // Single client tool + 'client-tool-single': { + iterations: [ + { + content: 'I need to show you a notification.', + toolCalls: [ + { + name: 'show_notification', + arguments: { message: 'Hello from the AI!', type: 'info' }, + }, + ], + }, + { + content: 'The notification has been shown.', + }, + ], + }, + + // Approval tool + 'approval-tool': { + iterations: [ + { + content: 'I need your permission to delete this file.', + toolCalls: [ + { + name: 'delete_file', + arguments: { path: '/tmp/test.txt' }, + }, + ], + }, + { + content: 'The file has been deleted.', + }, + ], + }, + + // Server tool -> Client tool sequence + 'sequence-server-client': { + iterations: [ + { + content: 'First, let me fetch the data.', + toolCalls: [{ name: 'fetch_data', arguments: { source: 'api' } }], + }, + { + content: 'Now let me display it on screen.', + toolCalls: [ + { + name: 'display_chart', + arguments: { type: 'bar', data: [1, 2, 3] }, + }, + ], + }, + { + content: 'The chart is now displayed.', + }, + ], + }, + + // Multiple tools in parallel + 'parallel-tools': { + iterations: [ + { + content: 'Let me gather all the information at once.', + toolCalls: [ + { name: 'get_weather', arguments: { city: 'NYC' } }, + { name: 'get_time', arguments: { timezone: 'EST' } }, + ], + }, + { + content: 'Here is the weather and time for NYC.', + }, + ], + }, + + // ========================================================================= + // RACE CONDITION / EVENT FLOW SCENARIOS + // These test the client-side event handling and continuation logic + // ========================================================================= + + // Two client tools in sequence - tests continuation after first client tool completes + 'sequential-client-tools': { + iterations: [ + { + content: 'First notification coming.', + toolCalls: [ + { + name: 'show_notification', + arguments: { message: 'First notification', type: 'info' }, + }, + ], + }, + { + content: 'Second notification coming.', + toolCalls: [ + { + name: 'show_notification', + arguments: { message: 'Second notification', type: 'warning' }, + }, + ], + }, + { + content: 'Both notifications have been shown.', + }, + ], + }, + + // Multiple client tools in parallel (same turn) - tests handling of concurrent client executions + 'parallel-client-tools': { + iterations: [ + { + content: 'Showing multiple things at once.', + toolCalls: [ + { + name: 'show_notification', + arguments: { message: 'Parallel 1', type: 'info' }, + }, + { + name: 'display_chart', + arguments: { type: 'bar', data: [1, 2, 3] }, + }, + ], + }, + { + content: 'All displayed.', + }, + ], + }, + + // Two approvals in sequence - tests approval flow continuation + 'sequential-approvals': { + iterations: [ + { + content: 'First I need to delete file A.', + toolCalls: [ + { + name: 'delete_file', + arguments: { path: '/tmp/a.txt' }, + }, + ], + }, + { + content: 'Now I need to delete file B.', + toolCalls: [ + { + name: 'delete_file', + arguments: { path: '/tmp/b.txt' }, + }, + ], + }, + { + content: 'Both files have been processed.', + }, + ], + }, + + // Multiple approvals in parallel (same turn) - tests handling of concurrent approvals + 'parallel-approvals': { + iterations: [ + { + content: 'I need to delete multiple files at once.', + toolCalls: [ + { + name: 'delete_file', + arguments: { path: '/tmp/parallel-a.txt' }, + }, + { + name: 'delete_file', + arguments: { path: '/tmp/parallel-b.txt' }, + }, + ], + }, + { + content: 'All files have been processed.', + }, + ], + }, + + // Client tool followed by approval - tests mixed flow + 'client-then-approval': { + iterations: [ + { + content: 'First a notification.', + toolCalls: [ + { + name: 'show_notification', + arguments: { message: 'Before approval', type: 'info' }, + }, + ], + }, + { + content: 'Now I need approval to delete.', + toolCalls: [ + { + name: 'delete_file', + arguments: { path: '/tmp/after-notify.txt' }, + }, + ], + }, + { + content: 'Complete.', + }, + ], + }, + + // Approval followed by client tool - tests that approval doesn't block subsequent client tools + 'approval-then-client': { + iterations: [ + { + content: 'First I need approval.', + toolCalls: [ + { + name: 'delete_file', + arguments: { path: '/tmp/before-notify.txt' }, + }, + ], + }, + { + content: 'Now showing notification.', + toolCalls: [ + { + name: 'show_notification', + arguments: { message: 'After approval', type: 'info' }, + }, + ], + }, + { + content: 'Complete.', + }, + ], + }, + + // Server tool followed by two client tools - tests complex continuation + 'server-then-two-clients': { + iterations: [ + { + content: 'Fetching data first.', + toolCalls: [{ name: 'fetch_data', arguments: { source: 'db' } }], + }, + { + content: 'First client action.', + toolCalls: [ + { + name: 'show_notification', + arguments: { message: 'Data fetched', type: 'info' }, + }, + ], + }, + { + content: 'Second client action.', + toolCalls: [ + { + name: 'display_chart', + arguments: { type: 'line', data: [10, 20, 30] }, + }, + ], + }, + { + content: 'All done.', + }, + ], + }, + + // Three client tools in sequence - stress test continuation logic + 'triple-client-sequence': { + iterations: [ + { + content: 'First step.', + toolCalls: [ + { + name: 'show_notification', + arguments: { message: 'Step 1', type: 'info' }, + }, + ], + }, + { + content: 'Second step.', + toolCalls: [ + { + name: 'display_chart', + arguments: { type: 'pie', data: [25, 25, 50] }, + }, + ], + }, + { + content: 'Third step.', + toolCalls: [ + { + name: 'show_notification', + arguments: { message: 'Step 3', type: 'warning' }, + }, + ], + }, + { + content: 'All three steps complete.', + }, + ], + }, +} + +/** + * Server-side tool definitions (for tools that execute on the server) + */ +const serverTools = { + get_weather: toolDefinition({ + name: 'get_weather', + description: 'Get weather for a city', + inputSchema: z.object({ + city: z.string(), + }), + }).server(async (args) => { + return JSON.stringify({ + city: args.city, + temperature: 72, + condition: 'sunny', + }) + }), + + fetch_data: toolDefinition({ + name: 'fetch_data', + description: 'Fetch data from a source', + inputSchema: z.object({ + source: z.string(), + }), + }).server(async (args) => { + return JSON.stringify({ + source: args.source, + data: [1, 2, 3, 4, 5], + }) + }), + + get_time: toolDefinition({ + name: 'get_time', + description: 'Get current time in timezone', + inputSchema: z.object({ + timezone: z.string(), + }), + }).server(async (args) => { + return JSON.stringify({ + timezone: args.timezone, + time: '14:30:00', + }) + }), + + delete_file: toolDefinition({ + name: 'delete_file', + description: 'Delete a file (requires approval)', + inputSchema: z.object({ + path: z.string(), + }), + needsApproval: true, + }).server(async (args) => { + return JSON.stringify({ + deleted: true, + path: args.path, + }) + }), +} + +/** + * Client-side tool definitions (tools that execute on the client) + * These use .client() without an execute function - execution happens on client side + */ +const clientToolDefinitions = { + show_notification: toolDefinition({ + name: 'show_notification', + description: 'Show a notification to the user', + inputSchema: z.object({ + message: z.string(), + type: z.enum(['info', 'warning', 'error']), + }), + }).client(), + + display_chart: toolDefinition({ + name: 'display_chart', + description: 'Display a chart on the screen', + inputSchema: z.object({ + type: z.enum(['bar', 'line', 'pie']), + data: z.array(z.number()), + }), + }).client(), +} + +export const Route = createFileRoute('/api/tools-test')({ + server: { + handlers: { + POST: async ({ request }) => { + const requestSignal = request.signal + + if (requestSignal?.aborted) { + return new Response(null, { status: 499 }) + } + + const abortController = new AbortController() + + try { + const body = await request.json() + // scenario is in body.data (from useChat body option) or body directly (legacy) + const messages = body.messages + const scenario = body.data?.scenario || body.scenario || 'text-only' + + // Get the script for this scenario + const script = SCENARIOS[scenario] + if (!script) { + return new Response( + JSON.stringify({ + error: `Unknown scenario: ${scenario}. Available: ${Object.keys(SCENARIOS).join(', ')}`, + }), + { + status: 400, + headers: { 'Content-Type': 'application/json' }, + }, + ) + } + + // Create simulator with the script + const adapter = createLLMSimulator(script) + + // Determine which tools to include based on the scenario + const tools = getToolsForScenario(scenario) + + const stream = chat({ + adapter, + model: 'simulator-model', + messages, + tools, + agentLoopStrategy: maxIterations(20), + abortController, + }) + + return toServerSentEventsResponse(stream, { abortController }) + } catch (error: any) { + console.error('[Tools Test API] Error:', error) + if (error.name === 'AbortError' || abortController.signal.aborted) { + return new Response(null, { status: 499 }) + } + return new Response( + JSON.stringify({ + error: error.message || 'An error occurred', + }), + { + status: 500, + headers: { 'Content-Type': 'application/json' }, + }, + ) + } + }, + }, + }, +}) + +/** + * Get the tools needed for a specific scenario + */ +function getToolsForScenario(scenario: string) { + switch (scenario) { + case 'text-only': + return [] + + case 'server-tool-single': + return [serverTools.get_weather] + + case 'client-tool-single': + return [clientToolDefinitions.show_notification] + + case 'approval-tool': + return [serverTools.delete_file] + + case 'sequence-server-client': + return [serverTools.fetch_data, clientToolDefinitions.display_chart] + + case 'parallel-tools': + return [serverTools.get_weather, serverTools.get_time] + + // Race condition / event flow scenarios + case 'sequential-client-tools': + return [clientToolDefinitions.show_notification] + + case 'parallel-client-tools': + return [ + clientToolDefinitions.show_notification, + clientToolDefinitions.display_chart, + ] + + case 'sequential-approvals': + return [serverTools.delete_file] + + case 'parallel-approvals': + return [serverTools.delete_file] + + case 'client-then-approval': + return [clientToolDefinitions.show_notification, serverTools.delete_file] + + case 'approval-then-client': + return [serverTools.delete_file, clientToolDefinitions.show_notification] + + case 'server-then-two-clients': + return [ + serverTools.fetch_data, + clientToolDefinitions.show_notification, + clientToolDefinitions.display_chart, + ] + + case 'triple-client-sequence': + return [ + clientToolDefinitions.show_notification, + clientToolDefinitions.display_chart, + ] + + default: + return [] + } +} diff --git a/packages/typescript/smoke-tests/e2e/src/routes/tools-test.tsx b/packages/typescript/smoke-tests/e2e/src/routes/tools-test.tsx new file mode 100644 index 000000000..d9c572382 --- /dev/null +++ b/packages/typescript/smoke-tests/e2e/src/routes/tools-test.tsx @@ -0,0 +1,616 @@ +import { useState, useCallback, useRef, useEffect } from 'react' +import { createFileRoute } from '@tanstack/react-router' +import { useChat, fetchServerSentEvents } from '@tanstack/ai-react' +import { toolDefinition } from '@tanstack/ai' +import { z } from 'zod' + +/** + * Event log entry for tracking tool execution flow + */ +interface ToolEvent { + timestamp: number + type: + | 'execution-start' + | 'execution-complete' + | 'approval-granted' + | 'approval-denied' + | 'error' + toolName: string + toolCallId?: string + details?: string +} + +/** + * Client-side tool definitions with execute functions + * These track execution for testing purposes + */ +function createTrackedTools( + addEvent: (event: Omit) => void, +) { + const showNotificationTool = toolDefinition({ + name: 'show_notification', + description: 'Show a notification to the user', + inputSchema: z.object({ + message: z.string(), + type: z.enum(['info', 'warning', 'error']), + }), + outputSchema: z.object({ + displayed: z.boolean(), + timestamp: z.number(), + }), + }).client(async (args) => { + addEvent({ + type: 'execution-start', + toolName: 'show_notification', + details: args.message, + }) + + // Simulate async work + await new Promise((r) => setTimeout(r, 50)) + + addEvent({ + type: 'execution-complete', + toolName: 'show_notification', + details: args.message, + }) + + return { + displayed: true, + timestamp: Date.now(), + } + }) + + const displayChartTool = toolDefinition({ + name: 'display_chart', + description: 'Display a chart on the screen', + inputSchema: z.object({ + type: z.enum(['bar', 'line', 'pie']), + data: z.array(z.number()), + }), + outputSchema: z.object({ + rendered: z.boolean(), + chartId: z.string(), + }), + }).client(async (args) => { + addEvent({ + type: 'execution-start', + toolName: 'display_chart', + details: args.type, + }) + + // Simulate async work + await new Promise((r) => setTimeout(r, 50)) + + addEvent({ + type: 'execution-complete', + toolName: 'display_chart', + details: args.type, + }) + + return { + rendered: true, + chartId: `chart-${Date.now()}`, + } + }) + + return [showNotificationTool, displayChartTool] +} + +// Available test scenarios +const SCENARIOS = [ + { id: 'text-only', label: 'Text Only (No Tools)', category: 'basic' }, + { id: 'server-tool-single', label: 'Single Server Tool', category: 'basic' }, + { id: 'client-tool-single', label: 'Single Client Tool', category: 'basic' }, + { id: 'approval-tool', label: 'Approval Required Tool', category: 'basic' }, + { + id: 'sequence-server-client', + label: 'Server → Client Sequence', + category: 'basic', + }, + { id: 'parallel-tools', label: 'Parallel Tools', category: 'basic' }, + // Race condition / event flow scenarios + { + id: 'sequential-client-tools', + label: 'Sequential Client Tools (2)', + category: 'race', + }, + { + id: 'parallel-client-tools', + label: 'Parallel Client Tools', + category: 'race', + }, + { + id: 'sequential-approvals', + label: 'Sequential Approvals (2)', + category: 'race', + }, + { id: 'parallel-approvals', label: 'Parallel Approvals', category: 'race' }, + { id: 'client-then-approval', label: 'Client → Approval', category: 'race' }, + { id: 'approval-then-client', label: 'Approval → Client', category: 'race' }, + { + id: 'server-then-two-clients', + label: 'Server → 2 Clients', + category: 'race', + }, + { + id: 'triple-client-sequence', + label: 'Triple Client Sequence', + category: 'race', + }, +] + +function ToolsTestPage() { + const [scenario, setScenario] = useState('text-only') + const [toolEvents, setToolEvents] = useState>([]) + const [testStartTime, setTestStartTime] = useState(null) + const [testComplete, setTestComplete] = useState(false) + + // Track approvals we've responded to (to avoid duplicate responses) + const respondedApprovals = useRef>(new Set()) + + // Create event logger + const addEvent = useCallback((event: Omit) => { + setToolEvents((prev) => [...prev, { ...event, timestamp: Date.now() }]) + }, []) + + // Create tracked tools (memoized since addEvent is stable) + const clientTools = useRef(createTrackedTools(addEvent)).current + + const { messages, sendMessage, isLoading, stop, addToolApprovalResponse } = + useChat({ + // Include scenario in ID so client is recreated when scenario changes + id: `tools-test-${scenario}`, + connection: fetchServerSentEvents('/api/tools-test'), + body: { scenario }, + tools: clientTools, + onFinish: () => { + setTestComplete(true) + }, + }) + + // Track when test completes (all tool calls are complete and not loading) + useEffect(() => { + if (!isLoading && testStartTime && messages.length > 1) { + // Get all tool results (for server tools) + const resultIds = new Set( + messages.flatMap((msg) => + msg.parts + .filter((p) => p.type === 'tool-result') + .map((p) => (p as { toolCallId: string }).toolCallId), + ), + ) + + // Check if any tool calls are still pending + const allToolCalls = messages.flatMap((msg) => + msg.parts.filter((p) => p.type === 'tool-call'), + ) + const pendingCalls = allToolCalls.filter( + (tc) => + tc.state !== 'complete' && + tc.state !== 'output-available' && + tc.output === undefined && + !resultIds.has(tc.id), + ) + if (pendingCalls.length === 0 && allToolCalls.length > 0) { + setTestComplete(true) + } + } + }, [isLoading, messages, testStartTime]) + + const handleSendMessage = useCallback(() => { + // Reset test state + setToolEvents([]) + setTestComplete(false) + setTestStartTime(Date.now()) + respondedApprovals.current.clear() + sendMessage('Run the test scenario') + }, [sendMessage]) + + // Extract tool call parts from messages for display + const toolCalls = messages.flatMap((msg) => + msg.parts + .filter((p) => p.type === 'tool-call') + .map((p) => ({ + messageId: msg.id, + ...p, + })), + ) + + // Extract tool result parts (for server tools) + const toolResultIds = new Set( + messages.flatMap((msg) => + msg.parts + .filter((p) => p.type === 'tool-result') + .map((p) => (p as { toolCallId: string }).toolCallId), + ), + ) + + // Extract approval requests + const pendingApprovals = toolCalls.filter( + (tc) => tc.approval?.needsApproval && tc.state === 'approval-requested', + ) + + return ( +
+

Tool Testing Page

+ + {/* Scenario Selector */} +
+ + + {testComplete && ( + + ✓ Test Complete + + )} +
+ + {/* Controls */} +
+ + {isLoading && ( + + )} +
+ + {/* Pending Approvals */} + {pendingApprovals.length > 0 && ( +
+

+ Pending Approvals ( + {pendingApprovals.length}) +

+ {pendingApprovals.map((tc) => ( +
+ + {tc.name}: {JSON.stringify(tc.arguments)} + + + +
+ ))} +
+ )} + + {/* Event Log - tracks execution flow for testing */} +
+

+ Event Log ({toolEvents.length}) +

+ {toolEvents.length === 0 ? ( +

No events yet

+ ) : ( +
+ {toolEvents.map((event, i) => ( +
+ [ + {new Date(event.timestamp) + .toISOString() + .split('T')[1] + ?.slice(0, 12)} + ] {event.type}: {event.toolName} + {event.details ? ` - ${event.details}` : ''} +
+ ))} +
+ )} +
+ + {/* Tool Calls Display */} +
+

Tool Calls

+ {toolCalls.length === 0 ? ( +

No tool calls yet

+ ) : ( + + + + + + + + + + + {toolCalls.map((tc) => ( + + + + + + + ))} + +
ToolStateArgumentsOutput
{tc.name} + + {tc.state} + + + {typeof tc.arguments === 'string' + ? tc.arguments + : JSON.stringify(tc.arguments)} + + {tc.output ? JSON.stringify(tc.output) : '-'} +
+ )} +
+ + {/* Messages JSON Display */} +
+
+          {JSON.stringify(messages, null, 2)}
+        
+
+ + {/* Test metadata for assertions */} +
+ tc.state === 'complete' || + tc.state === 'output-available' || + tc.output !== undefined || + toolResultIds.has(tc.id), + ).length + } + data-event-count={toolEvents.length} + data-execution-start-count={ + toolEvents.filter((e) => e.type === 'execution-start').length + } + data-execution-complete-count={ + toolEvents.filter((e) => e.type === 'execution-complete').length + } + data-approval-granted-count={ + toolEvents.filter((e) => e.type === 'approval-granted').length + } + data-approval-denied-count={ + toolEvents.filter((e) => e.type === 'approval-denied').length + } + /> + + {/* Event log as JSON for easy parsing in tests */} + - +`.trimStart();async function hv({testInfo:u,metadata:i,errorContext:c,errors:f,buildCodeFrame:r,stdout:o,stderr:d}){var w;const y=new Set(f.filter(R=>R.message&&!R.message.includes(` +`)).map(R=>R.message));for(const R of f)for(const z of y.keys())(w=R.message)!=null&&w.includes(z)&&y.delete(z);const v=f.filter(R=>!(!R.message||!R.message.includes(` +`)&&!y.has(R.message)));if(!v.length)return;const A=[dv,"# Test info","",u];o&&A.push("","# Stdout","","```",zf(o),"```"),d&&A.push("","# Stderr","","```",zf(d),"```"),A.push("","# Error details");for(const R of v)A.push("","```",zf(R.message||""),"```");c&&A.push(c);const E=await r(v[v.length-1]);return E&&A.push("","# Test source","","```ts",E,"```"),i!=null&&i.gitDiff&&A.push("","# Local changes","","```diff",i.gitDiff,"```"),A.join(` +`)}const gv=new RegExp("([\\u001B\\u009B][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~])))","g");function zf(u){return u.replace(gv,"")}function mv(u,i){var f;const c=new Map;for(const r of u){const o=r.name.match(/^(.*)-(expected|actual|diff|previous)(\.[^.]+)?$/);if(!o)continue;const[,d,y,v=""]=o,A=d+v;let E=c.get(A);E||(E={name:A,anchors:[`attachment-${d}`]},c.set(A,E)),E.anchors.push(`attachment-${i.attachments.indexOf(r)}`),y==="actual"&&(E.actual={attachment:r}),y==="expected"&&(E.expected={attachment:r,title:"Expected"}),y==="previous"&&(E.expected={attachment:r,title:"Previous"}),y==="diff"&&(E.diff={attachment:r})}for(const[r,o]of c)!o.actual||!o.expected?c.delete(r):(u.delete(o.actual.attachment),u.delete(o.expected.attachment),u.delete((f=o.diff)==null?void 0:f.attachment));return[...c.values()]}const Av=({test:u,result:i,testRunMetadata:c,options:f})=>{const{screenshots:r,videos:o,traces:d,otherAttachments:y,diffs:v,errors:A,otherAttachmentAnchors:E,screenshotAnchors:w,errorContext:R}=ct.useMemo(()=>{const N=i.attachments.filter(L=>!L.name.startsWith("_")),x=new Set(N.filter(L=>L.contentType.startsWith("image/"))),p=[...x].map(L=>`attachment-${N.indexOf(L)}`),T=N.filter(L=>L.contentType.startsWith("video/")),D=N.filter(L=>L.name==="trace"),U=N.find(L=>L.name==="error-context"),I=new Set(N);[...x,...T,...D].forEach(L=>I.delete(L));const V=[...I].map(L=>`attachment-${N.indexOf(L)}`),j=mv(x,i),G=i.errors.map(L=>L.message);return{screenshots:[...x],videos:T,traces:D,otherAttachments:I,diffs:j,errors:G,otherAttachmentAnchors:V,screenshotAnchors:p,errorContext:U}},[i]),z=M5(async()=>{if(f!=null&&f.noCopyPrompt)return;const N=i.attachments.find(D=>D.name==="stdout"),x=i.attachments.find(D=>D.name==="stderr"),p=N!=null&&N.body&&N.contentType==="text/plain"?N.body:void 0,T=x!=null&&x.body&&x.contentType==="text/plain"?x.body:void 0;return await hv({testInfo:[`- Name: ${u.path.join(" >> ")} >> ${u.title}`,`- Location: ${u.location.file}:${u.location.line}:${u.location.column}`].join(` +`),metadata:c,errorContext:R!=null&&R.path?await fetch(R.path).then(D=>D.text()):R==null?void 0:R.body,errors:i.errors,buildCodeFrame:async D=>D.codeframe,stdout:p,stderr:T})},[u,R,c,i],void 0);return m.jsxs("div",{className:"test-result",children:[!!A.length&&m.jsxs(Ke,{header:"Errors",children:[z&&m.jsx("div",{style:{position:"absolute",right:"16px",padding:"10px",zIndex:1},children:m.jsx(fv,{prompt:z})}),A.map((N,x)=>{const p=vv(N,v);return m.jsxs(m.Fragment,{children:[m.jsx(vr,{code:N},"test-result-error-message-"+x),p&&m.jsx(rv,{diff:p})]})})]}),!!i.steps.length&&m.jsx(Ke,{header:"Test Steps",children:i.steps.map((N,x)=>m.jsx(Ih,{step:N,result:i,test:u,depth:0},`step-${x}`))}),v.map((N,x)=>m.jsx(bi,{id:N.anchors,children:m.jsx(Ke,{dataTestId:"test-results-image-diff",header:`Image mismatch: ${N.name}`,revealOnAnchorId:N.anchors,children:m.jsx(Vh,{diff:N})})},`diff-${x}`)),!!r.length&&m.jsx(Ke,{header:"Screenshots",revealOnAnchorId:w,children:r.map((N,x)=>m.jsxs(bi,{id:`attachment-${i.attachments.indexOf(N)}`,children:[m.jsx("a",{href:Qe(N.path),children:m.jsx("img",{className:"screenshot",src:Qe(N.path)})}),m.jsx(Ju,{attachment:N,result:i})]},`screenshot-${x}`))}),!!d.length&&m.jsx(bi,{id:"attachment-trace",children:m.jsx(Ke,{header:"Traces",revealOnAnchorId:"attachment-trace",children:m.jsxs("div",{children:[m.jsx("a",{href:Qe(Gh(d)),children:m.jsx("img",{className:"screenshot",src:ev,style:{width:192,height:117,marginLeft:20}})}),d.map((N,x)=>m.jsx(Ju,{attachment:N,result:i,linkName:d.length===1?"trace":`trace-${x+1}`},`trace-${x}`))]})})}),!!o.length&&m.jsx(bi,{id:"attachment-video",children:m.jsx(Ke,{header:"Videos",revealOnAnchorId:"attachment-video",children:o.map(N=>m.jsxs("div",{children:[m.jsx("video",{controls:!0,children:m.jsx("source",{src:Qe(N.path),type:N.contentType})}),m.jsx(Ju,{attachment:N,result:i})]},N.path))})}),!!y.size&&m.jsx(Ke,{header:"Attachments",revealOnAnchorId:E,dataTestId:"attachments",children:[...y].map((N,x)=>m.jsx(bi,{id:`attachment-${i.attachments.indexOf(N)}`,children:m.jsx(Ju,{attachment:N,result:i,openInNewTab:N.contentType.startsWith("text/html")})},`attachment-link-${x}`))})]})};function vv(u,i){const c=u.split(` +`)[0];if(!(!c.includes("toHaveScreenshot")&&!c.includes("toMatchSnapshot")))return i.find(f=>u.includes(f.name))}const Ih=({test:u,step:i,result:c,depth:f})=>{const r=ue();return m.jsx(tv,{title:m.jsxs("span",{"aria-label":i.title,children:[m.jsx("span",{style:{float:"right"},children:yl(i.duration)}),i.attachments.length>0&&m.jsx("a",{style:{float:"right"},title:"reveal attachment",href:Qe(En({test:u,result:c,anchor:`attachment-${i.attachments[0]}`},r)),onClick:o=>{o.stopPropagation()},children:Ch()}),cc(i.error||i.duration===-1?"failed":i.skipped?"skipped":"passed"),m.jsx("span",{children:i.title}),i.count>1&&m.jsxs(m.Fragment,{children:[" ✕ ",m.jsx("span",{className:"test-result-counter",children:i.count})]}),i.location&&m.jsxs("span",{className:"test-result-path",children:["— ",i.location.file,":",i.location.line]})]}),loadChildren:i.steps.length||i.snippet?()=>{const o=i.snippet?[m.jsx(vr,{testId:"test-snippet",code:i.snippet},"line")]:[],d=i.steps.map((y,v)=>m.jsx(Ih,{step:y,depth:f+1,result:c,test:u},v));return o.concat(d)}:void 0,depth:f})},yv=({projectNames:u,test:i,testRunMetadata:c,run:f,next:r,prev:o,options:d})=>{const[y,v]=ct.useState(f),A=ue(),E=i.annotations.filter(w=>!w.type.startsWith("_"))??[];return m.jsxs(m.Fragment,{children:[m.jsx(Ar,{title:i.title,leftSuperHeader:m.jsx("div",{className:"test-case-path",children:i.path.join(" › ")}),rightSuperHeader:m.jsxs(m.Fragment,{children:[m.jsx("div",{className:Ye(!o&&"hidden"),children:m.jsx(yn,{href:En({test:o},A),children:"« previous"})}),m.jsx("div",{style:{width:10}}),m.jsx("div",{className:Ye(!r&&"hidden"),children:m.jsx(yn,{href:En({test:r},A),children:"next »"})})]})}),m.jsxs("div",{className:"hbox",style:{lineHeight:"24px"},children:[m.jsx("div",{className:"test-case-location",children:m.jsxs(hr,{value:`${i.location.file}:${i.location.line}`,children:[i.location.file,":",i.location.line]})}),m.jsx("div",{style:{flex:"auto"}}),m.jsx(Lh,{test:i,trailingSeparator:!0}),m.jsx("div",{className:"test-case-duration",children:yl(i.duration)})]}),m.jsx(Yh,{style:{marginLeft:"6px"},projectNames:u,activeProjectName:i.projectName,otherLabels:i.tags}),i.results.length===0&&E.length!==0&&m.jsx(Ke,{header:"Annotations",dataTestId:"test-case-annotations",children:E.map((w,R)=>m.jsx(p2,{annotation:w},R))}),m.jsx($5,{tabs:i.results.map((w,R)=>({id:String(R),title:m.jsxs("div",{style:{display:"flex",alignItems:"center"},children:[cc(w.status)," ",Ev(R),i.results.length>1&&m.jsx("span",{className:"test-case-run-duration",children:yl(w.duration)})]}),render:()=>{const z=w.annotations.filter(N=>!N.type.startsWith("_"));return m.jsxs(m.Fragment,{children:[!!z.length&&m.jsx(Ke,{header:"Annotations",dataTestId:"test-case-annotations",children:z.map((N,x)=>m.jsx(p2,{annotation:N},x))}),m.jsx(Av,{test:i,result:w,testRunMetadata:c,options:d})]})}}))||[],selectedTab:String(y),setSelectedTab:w=>v(+w)})]})};function p2({annotation:{type:u,description:i}}){return m.jsxs("div",{className:"test-case-annotation",children:[m.jsx("span",{style:{fontWeight:"bold"},children:u}),i&&m.jsxs(hr,{value:i,children:[": ",Ri(i)]})]})}function Ev(u){return u?`Retry #${u}`:"Run"}const Zh=({file:u,projectNames:i,isFileExpanded:c,setFileExpanded:f,footer:r})=>{const o=ue();return m.jsx(Xh,{expanded:c?c(u.fileId):void 0,noInsets:!0,setExpanded:f?(d=>f(u.fileId,d)):void 0,header:m.jsx("span",{className:"chip-header-allow-selection",children:u.fileName}),footer:r,children:u.tests.map(d=>m.jsxs("div",{className:Ye("test-file-test","test-file-test-outcome-"+d.outcome),children:[m.jsxs("div",{className:"hbox",style:{alignItems:"flex-start"},children:[m.jsxs("div",{className:"hbox",children:[m.jsx("span",{className:"test-file-test-status-icon",children:cc(d.outcome)}),m.jsxs("span",{children:[m.jsx(yn,{href:En({test:d},o),title:[...d.path,d.title].join(" › "),children:m.jsx("span",{className:"test-file-title",children:[...d.path,d.title].join(" › ")})}),m.jsx(Yh,{style:{marginLeft:"6px"},projectNames:i,activeProjectName:d.projectName,otherLabels:d.tags})]})]}),m.jsx("span",{"data-testid":"test-duration",style:{minWidth:"50px",textAlign:"right"},children:yl(d.duration)})]}),m.jsx("div",{className:"test-file-details-row",children:m.jsxs("div",{className:"test-file-details-row-items",children:[m.jsx(yn,{href:En({test:d},o),title:[...d.path,d.title].join(" › "),className:"test-file-path-link",children:m.jsxs("span",{className:"test-file-path",children:[d.location.file,":",d.location.line]})}),m.jsx(bv,{test:d}),m.jsx(pv,{test:d}),m.jsx(Lh,{test:d,dim:!0})]})})]},`test-${d.testId}`))})};function bv({test:u}){const i=ue();for(const c of u.results)for(const f of c.attachments)if(f.contentType.startsWith("image/")&&f.name.match(/-(expected|actual|diff)/))return m.jsx(gr,{href:En({test:u,result:c,anchor:`attachment-${c.attachments.indexOf(f)}`},i),title:"View images",dim:!0,children:w5()})}function pv({test:u}){const i=ue(),c=u.results.find(f=>f.attachments.some(r=>r.name==="video"));return c?m.jsx(gr,{href:En({test:u,result:c,anchor:"attachment-video"},i),title:"View video",dim:!0,children:R5()}):void 0}class xv extends ct.Component{constructor(){super(...arguments);dn(this,"state",{error:null,errorInfo:null})}componentDidCatch(c,f){this.setState({error:c,errorInfo:f})}render(){var c,f,r;return this.state.error||this.state.errorInfo?m.jsxs("div",{className:"metadata-view p-3",children:[m.jsx("p",{children:"An error was encountered when trying to render metadata."}),m.jsx("p",{children:m.jsxs("pre",{style:{overflow:"scroll"},children:[(c=this.state.error)==null?void 0:c.message,m.jsx("br",{}),(f=this.state.error)==null?void 0:f.stack,m.jsx("br",{}),(r=this.state.errorInfo)==null?void 0:r.componentStack]})})]}):this.props.children}}const Sv=u=>m.jsx(xv,{children:m.jsx(Tv,{metadata:u.metadata})}),Tv=u=>{const i=ue(),c=u.metadata,f=i.has("show-metadata-other")?Object.entries(u.metadata).filter(([o])=>!qh.has(o)):[];if(c.ci||c.gitCommit||f.length>0)return m.jsxs("div",{className:"metadata-view",children:[c.ci&&!c.gitCommit&&m.jsx(wv,{info:c.ci}),c.gitCommit&&m.jsx(Rv,{ci:c.ci,commit:c.gitCommit}),f.length>0&&m.jsxs(m.Fragment,{children:[(c.gitCommit||c.ci)&&m.jsx("div",{className:"metadata-separator"}),m.jsx("div",{className:"metadata-section metadata-properties",role:"list",children:f.map(([o,d])=>{const y=typeof d!="object"||d===null||d===void 0?String(d):JSON.stringify(d),v=y.length>1e3?y.slice(0,1e3)+"…":y;return m.jsx("div",{className:"copyable-property",role:"listitem",children:m.jsxs(hr,{value:y,children:[m.jsx("span",{style:{fontWeight:"bold"},title:o,children:o}),": ",m.jsx("span",{title:v,children:Ri(v)})]})},o)})})]})]})},wv=({info:u})=>{const i=u.prTitle||`Commit ${u.commitHash}`,c=u.prHref||u.commitHref;return m.jsx("div",{className:"metadata-section",role:"list",children:m.jsx("div",{role:"listitem",children:m.jsx("a",{href:Qe(c),target:"_blank",rel:"noopener noreferrer",title:i,children:i})})})},Rv=({ci:u,commit:i})=>{const c=(u==null?void 0:u.prTitle)||i.subject,f=(u==null?void 0:u.prHref)||(u==null?void 0:u.commitHref),r=` <${i.author.email}>`,o=`${i.author.name}${r}`,d=Intl.DateTimeFormat(void 0,{dateStyle:"medium"}).format(i.committer.time),y=Intl.DateTimeFormat(void 0,{dateStyle:"full",timeStyle:"long"}).format(i.committer.time);return m.jsxs("div",{className:"metadata-section",role:"list",children:[m.jsxs("div",{role:"listitem",children:[f&&m.jsx("a",{href:Qe(f),target:"_blank",rel:"noopener noreferrer",title:c,children:c}),!f&&m.jsx("span",{title:c,children:c})]}),m.jsxs("div",{role:"listitem",className:"hbox",children:[m.jsx("span",{className:"mr-1",children:o}),m.jsxs("span",{title:y,children:[" on ",d]})]})]})},qh=new Set(["ci","gitCommit","gitDiff","actualWorkers"]),Ov=u=>{const i=Object.entries(u).filter(([c])=>!qh.has(c));return!u.ci&&!u.gitCommit&&!i.length},Dv=({files:u,expandedFiles:i,setExpandedFiles:c,projectNames:f})=>{const r=ct.useMemo(()=>{const o=[];let d=0;for(const y of u)d+=y.tests.length,o.push({file:y,defaultExpanded:d<200});return o},[u]);return m.jsx(m.Fragment,{children:r.length>0?r.map(({file:o,defaultExpanded:d})=>m.jsx(Zh,{file:o,projectNames:f,isFileExpanded:y=>{const v=i.get(y);return v===void 0?d:!!v},setFileExpanded:(y,v)=>{const A=new Map(i);A.set(y,v),c(A)}},`file-${o.fileId}`)):m.jsx("div",{className:"chip-header test-file-no-files",children:"No tests found"})})},x2=({report:u,filteredStats:i,metadataVisible:c,toggleMetadataVisible:f})=>{if(!u)return null;const r=u.projectNames.length===1&&!!u.projectNames[0],o=!r&&!i,d=!Ov(u.metadata)&&m.jsxs("div",{className:Ye("metadata-toggle",!o&&"metadata-toggle-second-line"),role:"button",onClick:f,title:c?"Hide metadata":"Show metadata",children:[c?Mi():vl(),"Metadata"]}),y=m.jsxs("div",{className:"test-file-header-info",children:[r&&m.jsxs("div",{"data-testid":"project-name",children:["Project: ",u.projectNames[0]]}),i&&m.jsxs("div",{"data-testid":"filtered-tests-count",children:["Filtered: ",i.total," ",!!i.total&&"("+yl(i.duration)+")"]}),o&&d]}),v=m.jsxs(m.Fragment,{children:[m.jsx("div",{"data-testid":"overall-time",style:{marginRight:"10px"},children:u?new Date(u.startTime).toLocaleString():""}),m.jsxs("div",{"data-testid":"overall-duration",children:["Total time: ",yl(u.duration??0)]})]});return m.jsxs(m.Fragment,{children:[m.jsx(Ar,{title:u.options.title,leftSuperHeader:y,rightSuperHeader:v}),!o&&d,c&&m.jsx(Sv,{metadata:u.metadata}),!!u.errors.length&&m.jsx(Ke,{header:"Errors",dataTestId:"report-errors",children:u.errors.map((A,E)=>m.jsx(vr,{code:A},"test-report-error-message-"+E))})]})};function Cv({report:u,tests:i}){return m.jsx(m.Fragment,{children:m.jsx(Mv,{report:u,tests:i})})}function Mv({report:u,tests:i}){const[c,f]=ie.useState(50);return m.jsx(Zh,{file:{fileId:"slowest",fileName:"Slowest Tests",tests:i.slice(0,c),stats:null},projectNames:u.json().projectNames,footer:cf(r=>r+50),children:[Mi(),"Show 50 more"]}):void 0})}const jv=u=>!u.has("testId")&&!u.has("speedboard"),Hv=u=>u.has("testId"),Nv=u=>u.has("speedboard")&&!u.has("testId"),Bv=({report:u})=>{var I,V;const i=ue(),[c,f]=ct.useState(new Map),[r,o]=ct.useState(i.get("q")||""),[d,y]=ct.useState(!1),v=i.has("speedboard"),[A]=Uh("mergeFiles",!1),E=i.get("testId"),w=((I=i.get("q"))==null?void 0:I.toString())||"",R=w?"&q="+w:"",z=(V=u==null?void 0:u.json())==null?void 0:V.options.title,N=ct.useMemo(()=>{const j=new Map;for(const G of(u==null?void 0:u.json().files)||[])for(const L of G.tests)j.set(L.testId,G.fileId);return j},[u]),x=ct.useMemo(()=>lc.parse(r),[r]),p=ct.useMemo(()=>x.empty()?void 0:Qv((u==null?void 0:u.json().files)||[],x),[u,x]),T=ct.useMemo(()=>v?zv(u,x):A?Lv(u,x):Yv(u,x),[u,x,A,v]),{prev:D,next:U}=ct.useMemo(()=>{const j=T.tests.findIndex(W=>W.testId===E),G=j>0?T.tests[j-1]:void 0,L=j{const j=G=>{if(!(G.target instanceof HTMLInputElement||G.target instanceof HTMLTextAreaElement||G.shiftKey||G.ctrlKey||G.metaKey||G.altKey))switch(G.key){case"a":G.preventDefault(),_n("#?");break;case"p":G.preventDefault(),i.delete("testId"),i.delete("speedboard"),_n(wa(i,"s:passed",!1));break;case"f":G.preventDefault(),i.delete("testId"),i.delete("speedboard"),_n(wa(i,"s:failed",!1));break;case"ArrowLeft":D&&(G.preventDefault(),i.delete("testId"),_n(En({test:D},i)+R));break;case"ArrowRight":U&&(G.preventDefault(),i.delete("testId"),_n(En({test:U},i)+R));break}};return document.addEventListener("keydown",j),()=>document.removeEventListener("keydown",j)},[D,U,R,w,i]),ct.useEffect(()=>{z?document.title=z:document.title="Playwright Test Report"},[z]),m.jsx("div",{className:"htmlreport vbox px-4 pb-4",children:m.jsxs("main",{children:[u&&m.jsx(F5,{stats:u.json().stats,filterText:r,setFilterText:o}),m.jsxs(Yf,{predicate:jv,children:[m.jsx(x2,{report:u==null?void 0:u.json(),filteredStats:p,metadataVisible:d,toggleMetadataVisible:()=>y(j=>!j)}),m.jsx(Dv,{files:T.files,expandedFiles:c,setExpandedFiles:f,projectNames:(u==null?void 0:u.json().projectNames)||[]})]}),m.jsxs(Yf,{predicate:Nv,children:[m.jsx(x2,{report:u==null?void 0:u.json(),filteredStats:p,metadataVisible:d,toggleMetadataVisible:()=>y(j=>!j)}),u&&m.jsx(Cv,{report:u,tests:T.tests})]}),m.jsx(Yf,{predicate:Hv,children:u&&m.jsx(Uv,{report:u,next:U,prev:D,testId:E,testIdToFileIdMap:N})})]})})},Uv=({report:u,testIdToFileIdMap:i,next:c,prev:f,testId:r})=>{const o=ue(),[d,y]=ct.useState("loading"),v=+(o.get("run")||"0");if(ct.useEffect(()=>{(async()=>{if(!r||typeof d=="object"&&r===d.testId)return;const R=i.get(r);if(!R){y("not-found");return}const z=await u.entry(`${R}.json`);y((z==null?void 0:z.tests.find(N=>N.testId===r))||"not-found")})()},[d,u,r,i]),d==="loading")return m.jsx("div",{className:"test-case-column"});if(d==="not-found")return m.jsxs("div",{className:"test-case-column",children:[m.jsx(Ar,{title:"Test not found"}),m.jsxs("div",{className:"test-case-location",children:["Test ID: ",r]})]});const{projectNames:A,metadata:E,options:w}=u.json();return m.jsx("div",{className:"test-case-column",children:m.jsx(yv,{projectNames:A,testRunMetadata:E,options:w,next:c,prev:f,test:d,run:v})})};function Qv(u,i){const c={total:0,duration:0};for(const f of u){const r=f.tests.filter(o=>i.matches(o));c.total+=r.length;for(const o of r)c.duration+=o.duration}return c}function Yv(u,i){const c={files:[],tests:[]};for(const f of(u==null?void 0:u.json().files)||[]){const r=f.tests.filter(o=>i.matches(o));r.length&&c.files.push({...f,tests:r}),c.tests.push(...r)}return c}function Lv(u,i){const c=[],f=new Map;for(const o of(u==null?void 0:u.json().files)||[]){const d=o.tests.filter(y=>i.matches(y));for(const y of d){const v=y.path[0]??"";let A=f.get(v);A||(A={fileId:v,fileName:v,tests:[],stats:{total:0,expected:0,unexpected:0,flaky:0,skipped:0,ok:!0}},f.set(v,A),c.push(A));const E={...y,path:y.path.slice(1)};A.tests.push(E)}}c.sort((o,d)=>o.fileName.localeCompare(d.fileName));const r={files:c,tests:[]};for(const o of c)r.tests.push(...o.tests);return r}function zv(u,i){const f=((u==null?void 0:u.json().files)||[]).flatMap(r=>r.tests).filter(r=>i.matches(r));return f.sort((r,o)=>o.duration-r.duration),{files:[],tests:f}}const Gv="data:image/svg+xml,%3csvg%20width='400'%20height='400'%20viewBox='0%200%20400%20400'%20fill='none'%20xmlns='http://www.w3.org/2000/svg'%3e%3cpath%20d='M136.444%20221.556C123.558%20225.213%20115.104%20231.625%20109.535%20238.032C114.869%20233.364%20122.014%20229.08%20131.652%20226.348C141.51%20223.554%20149.92%20223.574%20156.869%20224.915V219.481C150.941%20218.939%20144.145%20219.371%20136.444%20221.556ZM108.946%20175.876L61.0895%20188.484C61.0895%20188.484%2061.9617%20189.716%2063.5767%20191.36L104.153%20180.668C104.153%20180.668%20103.578%20188.077%2098.5847%20194.705C108.03%20187.559%20108.946%20175.876%20108.946%20175.876ZM149.005%20288.347C81.6582%20306.486%2046.0272%20228.438%2035.2396%20187.928C30.2556%20169.229%2028.0799%20155.067%2027.5%20145.928C27.4377%20144.979%2027.4665%20144.179%2027.5336%20143.446C24.04%20143.657%2022.3674%20145.473%2022.7077%20150.721C23.2876%20159.855%2025.4633%20174.016%2030.4473%20192.721C41.2301%20233.225%2076.8659%20311.273%20144.213%20293.134C158.872%20289.185%20169.885%20281.992%20178.152%20272.81C170.532%20279.692%20160.995%20285.112%20149.005%20288.347ZM161.661%20128.11V132.903H188.077C187.535%20131.206%20186.989%20129.677%20186.447%20128.11H161.661Z'%20fill='%232D4552'/%3e%3cpath%20d='M193.981%20167.584C205.861%20170.958%20212.144%20179.287%20215.465%20186.658L228.711%20190.42C228.711%20190.42%20226.904%20164.623%20203.57%20157.995C181.741%20151.793%20168.308%20170.124%20166.674%20172.496C173.024%20167.972%20182.297%20164.268%20193.981%20167.584ZM299.422%20186.777C277.573%20180.547%20264.145%20198.916%20262.535%20201.255C268.89%20196.736%20278.158%20193.031%20289.837%20196.362C301.698%20199.741%20307.976%20208.06%20311.307%20215.436L324.572%20219.212C324.572%20219.212%20322.736%20193.41%20299.422%20186.777ZM286.262%20254.795L176.072%20223.99C176.072%20223.99%20177.265%20230.038%20181.842%20237.869L274.617%20263.805C282.255%20259.386%20286.262%20254.795%20286.262%20254.795ZM209.867%20321.102C122.618%20297.71%20133.166%20186.543%20147.284%20133.865C153.097%20112.156%20159.073%2096.0203%20164.029%2085.204C161.072%2084.5953%20158.623%2086.1529%20156.203%2091.0746C150.941%20101.747%20144.212%20119.124%20137.7%20143.45C123.586%20196.127%20113.038%20307.29%20200.283%20330.682C241.406%20341.699%20273.442%20324.955%20297.323%20298.659C274.655%20319.19%20245.714%20330.701%20209.867%20321.102Z'%20fill='%232D4552'/%3e%3cpath%20d='M161.661%20262.296V239.863L99.3324%20257.537C99.3324%20257.537%20103.938%20230.777%20136.444%20221.556C146.302%20218.762%20154.713%20218.781%20161.661%20220.123V128.11H192.869C189.471%20117.61%20186.184%20109.526%20183.423%20103.909C178.856%2094.612%20174.174%20100.775%20163.545%20109.665C156.059%20115.919%20137.139%20129.261%20108.668%20136.933C80.1966%20144.61%2057.179%20142.574%2047.5752%20140.911C33.9601%20138.562%2026.8387%20135.572%2027.5049%20145.928C28.0847%20155.062%2030.2605%20169.224%2035.2445%20187.928C46.0272%20228.433%2081.663%20306.481%20149.01%20288.342C166.602%20283.602%20179.019%20274.233%20187.626%20262.291H161.661V262.296ZM61.0848%20188.484L108.946%20175.876C108.946%20175.876%20107.551%20194.288%2089.6087%20199.018C71.6614%20203.743%2061.0848%20188.484%2061.0848%20188.484Z'%20fill='%23E2574C'/%3e%3cpath%20d='M341.786%20129.174C329.345%20131.355%20299.498%20134.072%20262.612%20124.185C225.716%20114.304%20201.236%2097.0224%20191.537%2088.8994C177.788%2077.3834%20171.74%2069.3802%20165.788%2081.4857C160.526%2092.163%20153.797%20109.54%20147.284%20133.866C133.171%20186.543%20122.623%20297.706%20209.867%20321.098C297.093%20344.47%20343.53%20242.92%20357.644%20190.238C364.157%20165.917%20367.013%20147.5%20367.799%20135.625C368.695%20122.173%20359.455%20126.078%20341.786%20129.174ZM166.497%20172.756C166.497%20172.756%20180.246%20151.372%20203.565%20158C226.899%20164.628%20228.706%20190.425%20228.706%20190.425L166.497%20172.756ZM223.42%20268.713C182.403%20256.698%20176.077%20223.99%20176.077%20223.99L286.262%20254.796C286.262%20254.791%20264.021%20280.578%20223.42%20268.713ZM262.377%20201.495C262.377%20201.495%20276.107%20180.126%20299.422%20186.773C322.736%20193.411%20324.572%20219.208%20324.572%20219.208L262.377%20201.495Z'%20fill='%232EAD33'/%3e%3cpath%20d='M139.88%20246.04L99.3324%20257.532C99.3324%20257.532%20103.737%20232.44%20133.607%20222.496L110.647%20136.33L108.663%20136.933C80.1918%20144.611%2057.1742%20142.574%2047.5704%20140.911C33.9554%20138.563%2026.834%20135.572%2027.5001%20145.929C28.08%20155.063%2030.2557%20169.224%2035.2397%20187.929C46.0225%20228.433%2081.6583%20306.481%20149.005%20288.342L150.989%20287.719L139.88%20246.04ZM61.0848%20188.485L108.946%20175.876C108.946%20175.876%20107.551%20194.288%2089.6087%20199.018C71.6615%20203.743%2061.0848%20188.485%2061.0848%20188.485Z'%20fill='%23D65348'/%3e%3cpath%20d='M225.27%20269.163L223.415%20268.712C182.398%20256.698%20176.072%20223.99%20176.072%20223.99L232.89%20239.872L262.971%20124.281L262.607%20124.185C225.711%20114.304%20201.232%2097.0224%20191.532%2088.8994C177.783%2077.3834%20171.735%2069.3802%20165.783%2081.4857C160.526%2092.163%20153.797%20109.54%20147.284%20133.866C133.171%20186.543%20122.623%20297.706%20209.867%20321.097L211.655%20321.5L225.27%20269.163ZM166.497%20172.756C166.497%20172.756%20180.246%20151.372%20203.565%20158C226.899%20164.628%20228.706%20190.425%20228.706%20190.425L166.497%20172.756Z'%20fill='%231D8D22'/%3e%3cpath%20d='M141.946%20245.451L131.072%20248.537C133.641%20263.019%20138.169%20276.917%20145.276%20289.195C146.513%20288.922%20147.74%20288.687%20149%20288.342C152.302%20287.451%20155.364%20286.348%20158.312%20285.145C150.371%20273.361%20145.118%20259.789%20141.946%20245.451ZM137.7%20143.451C132.112%20164.307%20127.113%20194.326%20128.489%20224.436C130.952%20223.367%20133.554%20222.371%20136.444%20221.551L138.457%20221.101C136.003%20188.939%20141.308%20156.165%20147.284%20133.866C148.799%20128.225%20150.318%20122.978%20151.832%20118.085C149.393%20119.637%20146.767%20121.228%20143.776%20122.867C141.759%20129.093%20139.722%20135.898%20137.7%20143.451Z'%20fill='%23C04B41'/%3e%3c/svg%3e",Gf=o5,yr=document.createElement("link");yr.rel="shortcut icon";yr.href=Gv;document.head.appendChild(yr);const Xv=()=>{const[u,i]=ct.useState();return ct.useEffect(()=>{const c=new Vv;c.load().then(()=>{var f;(f=document.getElementById("playwrightReportBase64"))==null||f.remove(),i(c)})},[]),m.jsx(z5,{children:m.jsx(Bv,{report:u})})};window.onload=()=>{q5(),E5.createRoot(document.querySelector("#root")).render(m.jsx(Xv,{}))};class Vv{constructor(){dn(this,"_entries",new Map);dn(this,"_json")}async load(){const i=document.getElementById("playwrightReportBase64").textContent,c=new Gf.ZipReader(new Gf.Data64URIReader(i),{useWebWorkers:!1});for(const f of await c.getEntries())this._entries.set(f.filename,f);this._json=await this.entry("report.json")}json(){return this._json}async entry(i){const c=this._entries.get(i),f=new Gf.TextWriter;return await c.getData(f),JSON.parse(await f.getData())}} + + -
+
- + \ No newline at end of file diff --git a/testing/panel/src/routes/api.simulator-chat.ts b/testing/panel/src/routes/api.simulator-chat.ts index 1e62c4237..1d62e691f 100644 --- a/testing/panel/src/routes/api.simulator-chat.ts +++ b/testing/panel/src/routes/api.simulator-chat.ts @@ -3,12 +3,12 @@ import { chat, maxIterations, toServerSentEventsResponse } from '@tanstack/ai' import type { AIAdapter, ChatOptions, StreamChunk } from '@tanstack/ai' import { - serverTool, - serverToolWithApproval, clientServerTool, clientServerToolWithApproval, clientToolDef, clientToolWithApprovalDef, + serverTool, + serverToolWithApproval, } from '@/lib/simulator-tools' /** @@ -27,8 +27,9 @@ interface ParsedToolCall { arguments: Record } -function parseToolCalls(message: string): ParsedToolCall[] { - const toolCalls: ParsedToolCall[] = [] +function parseToolCalls(message: string): Array { + TOOL_CALL_REGEX.lastIndex = 0 + const toolCalls: Array = [] let match while ((match = TOOL_CALL_REGEX.exec(message)) !== null) { diff --git a/testing/panel/src/routes/simulator.tsx b/testing/panel/src/routes/simulator.tsx index 9ac30ba01..1ae9b5f54 100644 --- a/testing/panel/src/routes/simulator.tsx +++ b/testing/panel/src/routes/simulator.tsx @@ -1,13 +1,13 @@ import { useEffect, useRef, useState } from 'react' import { createFileRoute } from '@tanstack/react-router' import { - Send, - Square, - Zap, FlaskConical, - Server, Monitor, + Send, + Server, ShieldCheck, + Square, + Zap, } from 'lucide-react' import ReactMarkdown from 'react-markdown' import rehypeRaw from 'rehype-raw' @@ -20,10 +20,10 @@ import { clientTools } from '@tanstack/ai-client' import type { UIMessage } from '@tanstack/ai-react' import { - clientToolDef, - clientToolWithApprovalDef, clientServerToolDef, clientServerToolWithApprovalDef, + clientToolDef, + clientToolWithApprovalDef, createClientResult, } from '@/lib/simulator-tools' @@ -52,6 +52,17 @@ const tools = clientTools( clientServerToolWithApprovalClient, ) +// Static Tailwind class mappings for JIT compatibility +const categoryColors: Record< + string, + { text: string; hoverBorder: string } +> = { + cyan: { text: 'text-cyan-400', hoverBorder: 'hover:border-cyan-500/30' }, + purple: { text: 'text-purple-400', hoverBorder: 'hover:border-purple-500/30' }, + yellow: { text: 'text-yellow-400', hoverBorder: 'hover:border-yellow-500/30' }, + green: { text: 'text-green-400', hoverBorder: 'hover:border-green-500/30' }, +} + // Tool injection templates const TOOL_TEMPLATES = [ { @@ -200,11 +211,11 @@ function Messages({ className="text-white prose dark:prose-invert max-w-none" > {part.content} @@ -231,7 +242,13 @@ function Messages({
                               {JSON.stringify(
-                                JSON.parse(part.arguments),
+                                (() => {
+                                  try {
+                                    return JSON.parse(part.arguments)
+                                  } catch {
+                                    return part.arguments
+                                  }
+                                })(),
                                 null,
                                 2,
                               )}
@@ -485,7 +502,9 @@ function ToolInjectionPanel({
         {TOOL_TEMPLATES.map((category) => (
           
- + {category.category} @@ -495,7 +514,7 @@ function ToolInjectionPanel({