11# SPDX-License-Identifier: Apache-2.0
22"""Example Python client for `vllm.entrypoints.api_server`
3+ Start the demo server:
4+ python -m vllm.entrypoints.api_server --model <model_name>
5+
36NOTE: The API server is used only for demonstration and simple performance
47benchmarks. It is not intended for production use.
58For production use, we recommend `vllm serve` and the OpenAI client API.
69"""
710
811import argparse
912import json
13+ from argparse import Namespace
1014from collections .abc import Iterable
1115
1216import requests
@@ -27,7 +31,6 @@ def post_http_request(prompt: str,
2731 pload = {
2832 "prompt" : prompt ,
2933 "n" : n ,
30- "use_beam_search" : True ,
3134 "temperature" : 0.0 ,
3235 "max_tokens" : 16 ,
3336 "stream" : stream ,
@@ -55,14 +58,7 @@ def get_response(response: requests.Response) -> list[str]:
5558 return output
5659
5760
58- if __name__ == "__main__" :
59- parser = argparse .ArgumentParser ()
60- parser .add_argument ("--host" , type = str , default = "localhost" )
61- parser .add_argument ("--port" , type = int , default = 8000 )
62- parser .add_argument ("--n" , type = int , default = 4 )
63- parser .add_argument ("--prompt" , type = str , default = "San Francisco is a" )
64- parser .add_argument ("--stream" , action = "store_true" )
65- args = parser .parse_args ()
61+ def main (args : Namespace ):
6662 prompt = args .prompt
6763 api_url = f"http://{ args .host } :{ args .port } /generate"
6864 n = args .n
@@ -83,3 +79,14 @@ def get_response(response: requests.Response) -> list[str]:
8379 output = get_response (response )
8480 for i , line in enumerate (output ):
8581 print (f"Beam candidate { i } : { line !r} " , flush = True )
82+
83+
84+ if __name__ == "__main__" :
85+ parser = argparse .ArgumentParser ()
86+ parser .add_argument ("--host" , type = str , default = "localhost" )
87+ parser .add_argument ("--port" , type = int , default = 8000 )
88+ parser .add_argument ("--n" , type = int , default = 1 )
89+ parser .add_argument ("--prompt" , type = str , default = "San Francisco is a" )
90+ parser .add_argument ("--stream" , action = "store_true" )
91+ args = parser .parse_args ()
92+ main (args )
0 commit comments