Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions framework/ServeTest/baseline_pic_mtp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<response>The first image shows a bank branch with the signage "中国工商银行" (Industrial and Commercial Bank of China), and the second image also shows a bank branch with the signage "中国工商银行" (Industrial and Commercial Bank of China). Therefore, both images are related to Industrial and Commercial Bank of China.</response><|im_end|>
1 change: 1 addition & 0 deletions framework/ServeTest/baseline_text_mtp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<response>“温故而知新”出自《论语·为政》,意为复习旧知识从而获得新的理解和体会。这一思想强调了学习的循环性和持续性,认为通过对已学内容的回顾和反思,可以深化对知识的理解,并从中发现新的价值和应用场景。在学习过程中,温故而知新不仅有助于巩固基础知识,还能培养批判性思维和创新能力,使学习更加高效和有意义。</response><|im_end|>
47 changes: 38 additions & 9 deletions framework/ServeTest/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
import sys
import time
import traceback

import requests
import yaml
Expand Down Expand Up @@ -58,10 +59,12 @@ def get_available_port(env_key: str, default_start: int):
FD_API_PORT = get_available_port("FD_API_PORT", FLASK_PORT + 1)
FD_ENGINE_QUEUE_PORT = get_available_port("FD_ENGINE_QUEUE_PORT", FD_API_PORT + 1)
FD_METRICS_PORT = get_available_port("FD_METRICS_PORT", FD_ENGINE_QUEUE_PORT + 1)
FD_CACHE_QUEUE_PORT = get_available_port("FD_CACHE_QUEUE_PORT", FD_METRICS_PORT + 1)
DEFAULT_PARAMS = {
"--port": FD_API_PORT,
"--engine-worker-queue-port": FD_ENGINE_QUEUE_PORT,
"--metrics-port": FD_METRICS_PORT,
"--cache-queue-port": FD_CACHE_QUEUE_PORT,
"--enable-logprob": True,
}

Expand All @@ -78,6 +81,7 @@ def build_command(config):
# 添加配置参数
for key, value in config.items():
if "--enable" in key:
value = bool(value if isinstance(value, bool) else eval(value))
if value:
cmd.append(key)
else:
Expand Down Expand Up @@ -175,19 +179,34 @@ def stop_server(signum=None, frame=None):
# 终止进程组(包括所有子进程)
os.killpg(os.getpgid(pid_port["PID"]), signal.SIGTERM)
except Exception as e:
print(f"Failed to stop server: {e}")
print(f"Failed to stop server: {e}, {str(traceback.format_exc())}")
try:
result = subprocess.run(
f"ps -efww | grep '\-\-cache_queue_port {FD_CACHE_QUEUE_PORT}' | grep -v grep", shell=True, capture_output=True, text=True
)
for line in result.stdout.strip().split("\n"):
if not line:
continue
parts = line.split()
pid = int(parts[1])
print(f"Killing PID: {pid}")
os.kill(pid, signal.SIGKILL)
except Exception as e:
print(f"Failed to kill cache manager process: {e}, {str(traceback.format_exc())}")

for port in [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT]:
for port in [FD_API_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, FD_CACHE_QUEUE_PORT]:
try:
output = subprocess.check_output(f"lsof -i:{port} -t", shell=True).decode().strip()
for pid in output.splitlines():
os.kill(int(pid), signal.SIGKILL)
print(f"Killed process on port {port}, pid={pid}")
except Exception as e:
print(f"Failed to killed process on port: {e}")
print(f"Failed to kill process on port: {e}, {str(traceback.format_exc())}")
# 若log目录存在,则重命名为log_timestamp
if os.path.isdir("./log"):
os.rename("./log", "./log_{}".format(time.strftime("%Y%m%d%H%M%S")))
if os.path.exists("gemm_profiles.json"):
os.remove("gemm_profiles.json")

if signum:
sys.exit(0)
Expand Down Expand Up @@ -229,8 +248,10 @@ def start_service():
# 构建命令
cmd = build_command(final_config)
except Exception as e:
error_msg = f"Failed to start service: {e}, {str(traceback.format_exc())}"
print(error_msg)
return Response(
json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False),
json.dumps({"status": "error", "message": error_msg}, ensure_ascii=False),
status=500,
content_type="application/json",
)
Expand Down Expand Up @@ -264,8 +285,10 @@ def start_service():

return Response(json.dumps(json_data, ensure_ascii=False), status=200, content_type="application/json")
except Exception as e:
error_msg = f"Failed to start service: {e}, {str(traceback.format_exc())}"
print(error_msg)
return Response(
json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False),
json.dumps({"status": "error", "message": error_msg}, ensure_ascii=False),
status=500,
content_type="application/json",
)
Expand Down Expand Up @@ -295,8 +318,10 @@ def switch_service():
# 构建命令
cmd = build_command(final_config)
except Exception as e:
error_msg = f"Failed to switch service: {e}, {str(traceback.format_exc())}"
print(error_msg)
return Response(
json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False),
json.dumps({"status": "error", "message": error_msg}, ensure_ascii=False),
status=500,
content_type="application/json",
)
Expand Down Expand Up @@ -330,8 +355,10 @@ def switch_service():

return Response(json.dumps(json_data, ensure_ascii=False), status=200, content_type="application/json")
except Exception as e:
error_msg = f"Failed to switch service: {e}, {str(traceback.format_exc())}"
print(error_msg)
return Response(
json.dumps({"status": "error", "message": str(e)}, ensure_ascii=False),
json.dumps({"status": "error", "message": error_msg}, ensure_ascii=False),
status=500,
content_type="application/json",
)
Expand Down Expand Up @@ -406,8 +433,10 @@ def get_config():
)

except Exception as e:
error_msg = f"{e}, {str(traceback.format_exc())}"
print(error_msg)
return Response(
json.dumps({"message": "api_server.log解析失败,请检查log", "error": str(e)}, ensure_ascii=False),
json.dumps({"message": "api_server.log解析失败,请检查log", "error": error_msg}, ensure_ascii=False),
status=500,
content_type="application/json",
)
Expand Down Expand Up @@ -447,7 +476,7 @@ def tail_file(path, lines=50):
with open(path, "r", encoding="utf-8", errors="ignore") as f:
return "".join(f.readlines()[-lines:])
except Exception as e:
return f"[无法读取 {path}]: {e}\n"
return f"[无法读取 {path}]: {e}, {str(traceback.format_exc())}\n"

result = f"服务启动超时,耗时:[{timeout}s]\n\n"
result += "==== server.log tail 50 ====\n"
Expand Down
4 changes: 4 additions & 0 deletions framework/ServeTest/mtp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
speculative_config:
method: mtp
num_speculative_tokens: 1
model: /MODELDATA/safetensor_ckpt_step1600/mtp/
Loading
Loading