Skip to content

Commit 72ea95e

Browse files
fix: Fix max_tool_calls for openai provider and add integration tests for the max_tool_calls feat (llamastack#4190)
# Problem OpenAI gpt-4 returned an error when built-in and mcp calls were skipped due to max_tool_calls parameter. Following is from the server log: ``` RuntimeError: OpenAI response failed: Error code: 400 - {'error': {'message': "An assistant message with 'tool_calls' must be followed by tool messages responding to each 'tool_call_id'. The following tool_call_ids did not have response messages: call_Yi9V1QNpN73dJCAgP2Arcjej", 'type': 'invalid_request_error', 'param': 'messages', 'code': None}} ``` # What does this PR do? - Fixes error returned by openai/gpt when calls were skipped due to max_tool_calls. We now return a tool message that explicitly mentions that the call is skipped. - Adds integration tests as a follow-up to PR#[4062](llamastack#4062) <!-- If resolving an issue, uncomment and update the line below --> Part 2 for issue #[3563](llamastack#3563) ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> - Added integration tests - Added new recordings --------- Co-authored-by: Ashwin Bharambe <[email protected]>
1 parent f18870a commit 72ea95e

11 files changed

+8386
-168
lines changed

src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
OpenAIResponseUsage,
6767
OpenAIResponseUsageInputTokensDetails,
6868
OpenAIResponseUsageOutputTokensDetails,
69+
OpenAIToolMessageParam,
6970
Safety,
7071
WebSearchToolTypes,
7172
)
@@ -906,10 +907,16 @@ async def _coordinate_tool_execution(
906907
"""Coordinate execution of both function and non-function tool calls."""
907908
# Execute non-function tool calls
908909
for tool_call in non_function_tool_calls:
909-
# Check if total calls made to built-in and mcp tools exceed max_tool_calls
910+
# if total calls made to built-in and mcp tools exceed max_tool_calls
911+
# then create a tool response message indicating the call was skipped
910912
if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls:
911913
logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.")
912-
break
914+
skipped_call_message = OpenAIToolMessageParam(
915+
content=f"Tool call skipped: maximum tool calls limit ({self.max_tool_calls}) reached.",
916+
tool_call_id=tool_call.id,
917+
)
918+
next_turn_messages.append(skipped_call_message)
919+
continue
913920

914921
# Find the item_id for this tool call
915922
matching_item_id = None

tests/integration/agents/test_openai_responses.py

Lines changed: 0 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -516,169 +516,3 @@ def test_response_with_instructions(openai_client, client_with_models, text_mode
516516

517517
# Verify instructions from previous response was not carried over to the next response
518518
assert response_with_instructions2.instructions == instructions2
519-
520-
521-
@pytest.mark.skip(reason="Tool calling is not reliable.")
522-
def test_max_tool_calls_with_function_tools(openai_client, client_with_models, text_model_id):
523-
"""Test handling of max_tool_calls with function tools in responses."""
524-
if isinstance(client_with_models, LlamaStackAsLibraryClient):
525-
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
526-
527-
client = openai_client
528-
max_tool_calls = 1
529-
530-
tools = [
531-
{
532-
"type": "function",
533-
"name": "get_weather",
534-
"description": "Get weather information for a specified location",
535-
"parameters": {
536-
"type": "object",
537-
"properties": {
538-
"location": {
539-
"type": "string",
540-
"description": "The city name (e.g., 'New York', 'London')",
541-
},
542-
},
543-
},
544-
},
545-
{
546-
"type": "function",
547-
"name": "get_time",
548-
"description": "Get current time for a specified location",
549-
"parameters": {
550-
"type": "object",
551-
"properties": {
552-
"location": {
553-
"type": "string",
554-
"description": "The city name (e.g., 'New York', 'London')",
555-
},
556-
},
557-
},
558-
},
559-
]
560-
561-
# First create a response that triggers function tools
562-
response = client.responses.create(
563-
model=text_model_id,
564-
input="Can you tell me the weather in Paris and the current time?",
565-
tools=tools,
566-
stream=False,
567-
max_tool_calls=max_tool_calls,
568-
)
569-
570-
# Verify we got two function calls and that the max_tool_calls do not affect function tools
571-
assert len(response.output) == 2
572-
assert response.output[0].type == "function_call"
573-
assert response.output[0].name == "get_weather"
574-
assert response.output[0].status == "completed"
575-
assert response.output[1].type == "function_call"
576-
assert response.output[1].name == "get_time"
577-
assert response.output[0].status == "completed"
578-
579-
# Verify we have a valid max_tool_calls field
580-
assert response.max_tool_calls == max_tool_calls
581-
582-
583-
def test_max_tool_calls_invalid(openai_client, client_with_models, text_model_id):
584-
"""Test handling of invalid max_tool_calls in responses."""
585-
if isinstance(client_with_models, LlamaStackAsLibraryClient):
586-
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
587-
588-
client = openai_client
589-
590-
input = "Search for today's top technology news."
591-
invalid_max_tool_calls = 0
592-
tools = [
593-
{"type": "web_search"},
594-
]
595-
596-
# Create a response with an invalid max_tool_calls value i.e. 0
597-
# Handle ValueError from LLS and BadRequestError from OpenAI client
598-
with pytest.raises((ValueError, BadRequestError)) as excinfo:
599-
client.responses.create(
600-
model=text_model_id,
601-
input=input,
602-
tools=tools,
603-
stream=False,
604-
max_tool_calls=invalid_max_tool_calls,
605-
)
606-
607-
error_message = str(excinfo.value)
608-
assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
609-
f"Expected error message about invalid max_tool_calls, got: {error_message}"
610-
)
611-
612-
613-
def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, text_model_id):
614-
"""Test handling of max_tool_calls with built-in tools in responses."""
615-
if isinstance(client_with_models, LlamaStackAsLibraryClient):
616-
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
617-
618-
client = openai_client
619-
620-
input = "Search for today's top technology and a positive news story. You MUST make exactly two separate web search calls."
621-
max_tool_calls = [1, 5]
622-
tools = [
623-
{"type": "web_search"},
624-
]
625-
626-
# First create a response that triggers web_search tools without max_tool_calls
627-
response = client.responses.create(
628-
model=text_model_id,
629-
input=input,
630-
tools=tools,
631-
stream=False,
632-
)
633-
634-
# Verify we got two web search calls followed by a message
635-
assert len(response.output) == 3
636-
assert response.output[0].type == "web_search_call"
637-
assert response.output[0].status == "completed"
638-
assert response.output[1].type == "web_search_call"
639-
assert response.output[1].status == "completed"
640-
assert response.output[2].type == "message"
641-
assert response.output[2].status == "completed"
642-
assert response.output[2].role == "assistant"
643-
644-
# Next create a response that triggers web_search tools with max_tool_calls set to 1
645-
response_2 = client.responses.create(
646-
model=text_model_id,
647-
input=input,
648-
tools=tools,
649-
stream=False,
650-
max_tool_calls=max_tool_calls[0],
651-
)
652-
653-
# Verify we got one web search tool call followed by a message
654-
assert len(response_2.output) == 2
655-
assert response_2.output[0].type == "web_search_call"
656-
assert response_2.output[0].status == "completed"
657-
assert response_2.output[1].type == "message"
658-
assert response_2.output[1].status == "completed"
659-
assert response_2.output[1].role == "assistant"
660-
661-
# Verify we have a valid max_tool_calls field
662-
assert response_2.max_tool_calls == max_tool_calls[0]
663-
664-
# Finally create a response that triggers web_search tools with max_tool_calls set to 5
665-
response_3 = client.responses.create(
666-
model=text_model_id,
667-
input=input,
668-
tools=tools,
669-
stream=False,
670-
max_tool_calls=max_tool_calls[1],
671-
)
672-
673-
# Verify we got two web search calls followed by a message
674-
assert len(response_3.output) == 3
675-
assert response_3.output[0].type == "web_search_call"
676-
assert response_3.output[0].status == "completed"
677-
assert response_3.output[1].type == "web_search_call"
678-
assert response_3.output[1].status == "completed"
679-
assert response_3.output[2].type == "message"
680-
assert response_3.output[2].status == "completed"
681-
assert response_3.output[2].role == "assistant"
682-
683-
# Verify we have a valid max_tool_calls field
684-
assert response_3.max_tool_calls == max_tool_calls[1]

0 commit comments

Comments
 (0)