Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fastdeploy/engine/common_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ def _insert_zmq_task_to_scheduler(self):
else:
err, data = self.zmq_server.receive_pyobj_once(block)
if err is not None:
llm_logger.error("Engine stops inserting zmq task into scheduler, err:{err}")
llm_logger.error(f"Engine stops inserting zmq task into scheduler, err:{err}")
break

request, insert_task = None, []
Expand Down
12 changes: 11 additions & 1 deletion fastdeploy/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,15 @@
StatefulSemaphore,
api_server_logger,
console_logger,
is_package_installed,
is_port_available,
retrive_model_from_server,
)

parser = FlexibleArgumentParser()
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
parser.add_argument("--workers", default=1, type=int, help="number of workers")
parser.add_argument("--workers", default=None, type=int, help="number of workers")
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
parser.add_argument(
Expand All @@ -82,6 +83,15 @@
)
parser = EngineArgs.add_cli_args(parser)
args = parser.parse_args()

if args.workers is None:
# In GPU, the workers of uvicorn will be set according to the parameter `max-num-seqs`
if is_package_installed("paddlepaddle-gpu"):
args.workers = max(min(int(args.max_num_seqs // 32), 8), 1)
else:
args.workers = 1
console_logger.info(f"Number of api-server workers: {args.workers}.")

args.model = retrive_model_from_server(args.model, args.revision)
chat_template = load_chat_template(args.chat_template, args.model)
if args.tool_parser_plugin:
Expand Down
9 changes: 9 additions & 0 deletions fastdeploy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import tarfile
import time
from datetime import datetime
from importlib.metadata import PackageNotFoundError, distribution
from logging.handlers import BaseRotatingHandler
from pathlib import Path
from typing import Literal, TypeVar, Union
Expand Down Expand Up @@ -668,6 +669,14 @@ def import_from_path(module_name: str, file_path: Union[str, os.PathLike]):
return module


def is_package_installed(package_name):
try:
distribution(package_name)
return True
except PackageNotFoundError:
return False


def version():
"""
Prints the contents of the version.txt file located in the parent directory of this script.
Expand Down
Loading