Skip to content

Commit 59a50af

Browse files
[Frontend] OpenAI Responses API supports Tool/Function calling - non-harmony (vllm-project#26874)
Signed-off-by: chaunceyjiang <[email protected]>
1 parent 981cadb commit 59a50af

File tree

12 files changed

+404
-30
lines changed

12 files changed

+404
-30
lines changed
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
"""
4+
Set up this example by starting a vLLM OpenAI-compatible server with tool call
5+
options enabled.
6+
Reasoning models can be used through the Responses API as seen here
7+
https://platform.openai.com/docs/api-reference/responses
8+
For example:
9+
vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3 \
10+
--structured-outputs-config.backend xgrammar \
11+
--enable-auto-tool-choice --tool-call-parser hermes
12+
"""
13+
14+
import json
15+
16+
from openai import OpenAI
17+
from utils import get_first_model
18+
19+
20+
def get_weather(latitude: float, longitude: float) -> str:
21+
"""
22+
Mock function to simulate getting weather data.
23+
In a real application, this would call an external weather API.
24+
"""
25+
return f"Current temperature at ({latitude}, {longitude}) is 20°C."
26+
27+
28+
tools = [
29+
{
30+
"type": "function",
31+
"name": "get_weather",
32+
"description": "Get current temperature for provided coordinates in celsius.",
33+
"parameters": {
34+
"type": "object",
35+
"properties": {
36+
"latitude": {"type": "number"},
37+
"longitude": {"type": "number"},
38+
},
39+
"required": ["latitude", "longitude"],
40+
"additionalProperties": False,
41+
},
42+
"strict": True,
43+
}
44+
]
45+
46+
input_messages = [
47+
{"role": "user", "content": "What's the weather like in Paris today?"}
48+
]
49+
50+
51+
def main():
52+
base_url = "http://0.0.0.0:8000/v1"
53+
client = OpenAI(base_url=base_url, api_key="empty")
54+
model = get_first_model(client)
55+
response = client.responses.create(
56+
model=model, input=input_messages, tools=tools, tool_choice="required"
57+
)
58+
59+
for out in response.output:
60+
if out.type == "function_call":
61+
print("Function call:", out.name, out.arguments)
62+
tool_call = out
63+
args = json.loads(tool_call.arguments)
64+
result = get_weather(args["latitude"], args["longitude"])
65+
66+
input_messages.append(tool_call) # append model's function call message
67+
input_messages.append(
68+
{ # append result message
69+
"type": "function_call_output",
70+
"call_id": tool_call.call_id,
71+
"output": str(result),
72+
}
73+
)
74+
response_2 = client.responses.create(
75+
model=model,
76+
input=input_messages,
77+
tools=tools,
78+
)
79+
print(response_2.output_text)
80+
81+
82+
if __name__ == "__main__":
83+
main()

tests/v1/entrypoints/openai/responses/conftest.py renamed to tests/v1/entrypoints/openai/serving_responses/conftest.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,13 @@ def default_server_args():
1515
"--max-model-len",
1616
"8192",
1717
"--enforce-eager", # For faster startup.
18+
"--enable-auto-tool-choice",
19+
"--structured-outputs-config.backend",
20+
"xgrammar",
21+
"--tool-call-parser",
22+
"hermes",
1823
"--reasoning-parser",
19-
"deepseek_r1",
24+
"qwen3",
2025
]
2126

2227

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
import json
5+
6+
import openai # use the official client for correctness check
7+
import pytest
8+
9+
MODEL_NAME = "Qwen/Qwen3-1.7B"
10+
tools = [
11+
{
12+
"type": "function",
13+
"name": "get_current_weather",
14+
"description": "Get the current weather in a given location",
15+
"parameters": {
16+
"type": "object",
17+
"properties": {
18+
"city": {
19+
"type": "string",
20+
"description": "The city to find the weather for, e.g. 'Vienna'",
21+
"default": "Vienna",
22+
},
23+
"country": {
24+
"type": "string",
25+
"description": "The country that the city is in, e.g. 'Austria'",
26+
},
27+
"unit": {
28+
"type": "string",
29+
"description": "The unit to fetch the temperature in",
30+
"enum": ["celsius", "fahrenheit"],
31+
},
32+
"options": {
33+
"$ref": "#/$defs/WeatherOptions",
34+
"description": "Optional parameters for weather query",
35+
},
36+
},
37+
"required": ["country", "unit"],
38+
"$defs": {
39+
"WeatherOptions": {
40+
"title": "WeatherOptions",
41+
"type": "object",
42+
"additionalProperties": False,
43+
"properties": {
44+
"unit": {
45+
"type": "string",
46+
"enum": ["celsius", "fahrenheit"],
47+
"default": "celsius",
48+
"description": "Temperature unit",
49+
"title": "Temperature Unit",
50+
},
51+
"include_forecast": {
52+
"type": "boolean",
53+
"default": False,
54+
"description": "Whether to include a 24-hour forecast",
55+
"title": "Include Forecast",
56+
},
57+
"language": {
58+
"type": "string",
59+
"default": "zh-CN",
60+
"description": "Language of the response",
61+
"title": "Language",
62+
"enum": ["zh-CN", "en-US", "ja-JP"],
63+
},
64+
},
65+
},
66+
},
67+
},
68+
},
69+
{
70+
"type": "function",
71+
"name": "get_forecast",
72+
"description": "Get the weather forecast for a given location",
73+
"parameters": {
74+
"type": "object",
75+
"properties": {
76+
"city": {
77+
"type": "string",
78+
"description": "The city to get the forecast for, e.g. 'Vienna'",
79+
"default": "Vienna",
80+
},
81+
"country": {
82+
"type": "string",
83+
"description": "The country that the city is in, e.g. 'Austria'",
84+
},
85+
"days": {
86+
"type": "integer",
87+
"description": "Number of days to get the forecast for (1-7)",
88+
},
89+
"unit": {
90+
"type": "string",
91+
"description": "The unit to fetch the temperature in",
92+
"enum": ["celsius", "fahrenheit"],
93+
},
94+
},
95+
"required": ["country", "days", "unit"],
96+
},
97+
},
98+
]
99+
100+
101+
@pytest.mark.asyncio
102+
@pytest.mark.parametrize("model_name", [MODEL_NAME])
103+
@pytest.mark.parametrize("tool_choice", ["auto", "required"])
104+
async def test_function_tool_use(
105+
client: openai.AsyncOpenAI, model_name: str, tool_choice: str
106+
):
107+
prompt = [
108+
{
109+
"role": "user",
110+
"content": "Can you tell me what the current weather is in Berlin and the "
111+
"forecast for the next 5 days, in fahrenheit?",
112+
},
113+
]
114+
response = await client.responses.create(
115+
model=model_name,
116+
input=prompt,
117+
tools=tools,
118+
tool_choice=tool_choice,
119+
)
120+
121+
assert len(response.output) >= 1
122+
tool_call = None
123+
reasoning = None
124+
for out in response.output:
125+
if out.type == "function_call":
126+
tool_call = out
127+
if out.type == "reasoning":
128+
reasoning = out
129+
assert tool_call is not None
130+
assert tool_call.type == "function_call"
131+
assert json.loads(tool_call.arguments) is not None
132+
assert reasoning is not None
133+
assert reasoning.type == "reasoning"
134+
135+
136+
@pytest.mark.asyncio
137+
async def test_named_tool_use(client: openai.AsyncOpenAI):
138+
def get_weather(latitude: float, longitude: float) -> str:
139+
"""
140+
Mock function to simulate getting weather data.
141+
In a real application, this would call an external weather API.
142+
"""
143+
return f"Current temperature at ({latitude}, {longitude}) is 20°C."
144+
145+
tools = [
146+
{
147+
"type": "function",
148+
"name": "get_weather",
149+
"description": (
150+
"Get current temperature for provided coordinates in celsius."
151+
),
152+
"parameters": {
153+
"type": "object",
154+
"properties": {
155+
"latitude": {"type": "number"},
156+
"longitude": {"type": "number"},
157+
},
158+
"required": ["latitude", "longitude"],
159+
"additionalProperties": False,
160+
},
161+
"strict": True,
162+
}
163+
]
164+
165+
input_messages = [
166+
{"role": "user", "content": "What's the weather like in Paris today?"}
167+
]
168+
169+
response = await client.responses.create(
170+
model=MODEL_NAME,
171+
input=input_messages,
172+
tools=tools,
173+
tool_choice={"type": "function", "name": "get_weather"},
174+
)
175+
assert len(response.output) >= 1
176+
for out in response.output:
177+
if out.type == "function_call":
178+
tool_call = out
179+
assert tool_call is not None
180+
assert tool_call.type == "function_call"
181+
assert tool_call.name == "get_weather"
182+
args = json.loads(tool_call.arguments)
183+
assert args["latitude"] is not None
184+
assert args["longitude"] is not None
185+
# call the tool
186+
result = get_weather(args["latitude"], args["longitude"])
187+
input_messages.append(tool_call) # append model's function call message
188+
input_messages.append(
189+
{ # append result message
190+
"type": "function_call_output",
191+
"call_id": tool_call.call_id,
192+
"output": str(result),
193+
}
194+
)
195+
# create a new response with the tool call result
196+
response_2 = await client.responses.create(model=MODEL_NAME, input=input_messages)
197+
# check the output
198+
assert len(response_2.output_text) > 0

vllm/entrypoints/openai/serving_engine.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,13 +1098,13 @@ async def _preprocess_chat(
10981098
)
10991099

11001100
if should_parse_tools:
1101-
if not isinstance(request, ChatCompletionRequest):
1102-
msg = "Tool usage is only supported for Chat Completions API"
1101+
if not isinstance(request, ChatCompletionRequest | ResponsesRequest):
1102+
msg = (
1103+
"Tool usage is only supported for Chat Completions API "
1104+
"or Responses API requests."
1105+
)
11031106
raise NotImplementedError(msg)
1104-
1105-
request = tool_parser(tokenizer).adjust_request( # type: ignore
1106-
request=request
1107-
)
1107+
request = tool_parser(tokenizer).adjust_request(request=request) # type: ignore
11081108

11091109
if tokenizer is None:
11101110
assert isinstance(request_prompt, str), (

0 commit comments

Comments
 (0)