Skip to content

Commit f361d03

Browse files
committed
[Feature] Setting number of apiserver workers automatically
1 parent 27f2e7a commit f361d03

File tree

3 files changed

+21
-2
lines changed

3 files changed

+21
-2
lines changed

fastdeploy/engine/common_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,7 @@ def _insert_zmq_task_to_scheduler(self):
589589
else:
590590
err, data = self.zmq_server.receive_pyobj_once(block)
591591
if err is not None:
592-
llm_logger.error("Engine stops inserting zmq task into scheduler, err:{err}")
592+
llm_logger.error(f"Engine stops inserting zmq task into scheduler, err:{err}")
593593
break
594594

595595
request, insert_task = None, []

fastdeploy/entrypoints/openai/api_server.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,15 @@
6060
StatefulSemaphore,
6161
api_server_logger,
6262
console_logger,
63+
is_package_installed,
6364
is_port_available,
6465
retrive_model_from_server,
6566
)
6667

6768
parser = FlexibleArgumentParser()
6869
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
6970
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
70-
parser.add_argument("--workers", default=1, type=int, help="number of workers")
71+
parser.add_argument("--workers", default=None, type=int, help="number of workers")
7172
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
7273
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
7374
parser.add_argument(
@@ -82,6 +83,15 @@
8283
)
8384
parser = EngineArgs.add_cli_args(parser)
8485
args = parser.parse_args()
86+
87+
if args.workers is None:
88+
# In GPU, the workers of uvicorn will be set according to the parameter `max-num-seqs`
89+
if is_package_installed("paddlepaddle-gpu"):
90+
args.workers = max(min(int(args.max_num_seqs // 32), 8), 1)
91+
else:
92+
args.workers = 1
93+
console_logger.info(f"Number of api-server workers: {args.workers}.")
94+
8595
args.model = retrive_model_from_server(args.model, args.revision)
8696
chat_template = load_chat_template(args.chat_template, args.model)
8797
if args.tool_parser_plugin:

fastdeploy/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import tarfile
2828
import time
2929
from datetime import datetime
30+
from importlib.metadata import PackageNotFoundError, distribution
3031
from logging.handlers import BaseRotatingHandler
3132
from pathlib import Path
3233
from typing import Literal, TypeVar, Union
@@ -668,6 +669,14 @@ def import_from_path(module_name: str, file_path: Union[str, os.PathLike]):
668669
return module
669670

670671

672+
def is_package_installed(package_name):
673+
try:
674+
distribution(package_name)
675+
return True
676+
except PackageNotFoundError:
677+
return False
678+
679+
671680
def version():
672681
"""
673682
Prints the contents of the version.txt file located in the parent directory of this script.

0 commit comments

Comments
 (0)