|
60 | 60 | StatefulSemaphore,
|
61 | 61 | api_server_logger,
|
62 | 62 | console_logger,
|
| 63 | + is_package_installed, |
63 | 64 | is_port_available,
|
64 | 65 | retrive_model_from_server,
|
65 | 66 | )
|
66 | 67 |
|
67 | 68 | parser = FlexibleArgumentParser()
|
68 | 69 | parser.add_argument("--port", default=8000, type=int, help="port to the http server")
|
69 | 70 | parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
|
70 |
| -parser.add_argument("--workers", default=1, type=int, help="number of workers") |
| 71 | +parser.add_argument("--workers", default=None, type=int, help="number of workers") |
71 | 72 | parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
|
72 | 73 | parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
|
73 | 74 | parser.add_argument(
|
|
82 | 83 | )
|
83 | 84 | parser = EngineArgs.add_cli_args(parser)
|
84 | 85 | args = parser.parse_args()
|
| 86 | + |
| 87 | +if args.workers is None: |
| 88 | + # In GPU, the workers of uvicorn will be set according to the parameter `max-num-seqs` |
| 89 | + if is_package_installed("paddlepaddle-gpu"): |
| 90 | + args.workers = max(min(int(args.max_num_seqs // 32), 8), 1) |
| 91 | + else: |
| 92 | + args.workers = 1 |
| 93 | +console_logger.info(f"Number of api-server workers: {args.workers}.") |
| 94 | + |
85 | 95 | args.model = retrive_model_from_server(args.model, args.revision)
|
86 | 96 | chat_template = load_chat_template(args.chat_template, args.model)
|
87 | 97 | if args.tool_parser_plugin:
|
|
0 commit comments