Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 139 additions & 0 deletions .github/workflows/api_eval.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
name: api_eval

on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM/lmdeploy'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"


env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
FAIL_CONFIG: '--lf'
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL

jobs:
linux-build:
if: ${{ !cancelled() }}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.4
OUTPUT_FOLDER: cuda12.4_dist_${{ github.run_id }}
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}

test_evaluation:
needs: linux-build
if: ${{ !cancelled() }}
runs-on: [self-hosted, test-140]
timeout-minutes: 2400
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
container:
image: openmmlab/lmdeploy:latest-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/resources:/root/resources
- /nvme/github-actions/opencompass-data:/root/opencompass-data
- /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/shared:/mnt/shared
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
- /mnt/187:/mnt/187
steps:
- name: Create and change to _wk directory
run: |
echo "Working directory set to: $(pwd)"
- name: Clone repository
uses: actions/checkout@v2
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Download Artifacts
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r requirements_cuda.txt
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install opencompass
run: |
python3 -m pip install opencompass
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
mkdir -p ${{ env.REPORT_DIR }}/.pytest_cache
ln -s ${{ env.REPORT_DIR }}/.pytest_cache autotest
- name: Setup paths for evaluation
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
run: |
overall_exit=0
ln -s /mnt/187/opencompass-data/data ./data
pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_1 and not pr_test and ${{matrix.backend}}" -n 8 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_2 and not pr_test and ${{matrix.backend}}" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_4 and not pr_test and ${{matrix.backend}}" -n 2 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_8 and not pr_test and ${{matrix.backend}}" -n 1 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
exit $overall_exit
- name: Clear workspace
if: always()
run: |
export workdir=$(pwd)
rm -rf $workdir/*
13 changes: 13 additions & 0 deletions autotest/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ benchmark_path: /nvme/qa_test_models/benchmark-reports
dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json
env_tag: a100


tp_config:
Llama-4-Scout-17B-16E-Instruct: 4
Meta-Llama-3-1-70B-Instruct: 4
Expand All @@ -22,6 +23,7 @@ tp_config:
Qwen3-32B: 2
Qwen3-30B-A3B: 2
Qwen3-30B-A3B-Base: 2
Qwen2.5-32B-Instruct : 2
Qwen2.5-72B-Instruct: 4
Qwen2.5-VL-32B-Instruct: 2
DeepSeek-V2-Lite-Chat: 2
Expand All @@ -37,6 +39,7 @@ tp_config:
gpt-oss-120b-bf16: 4



turbomind_chat_model:
- meta-llama/Llama-3.2-1B-Instruct
- meta-llama/Llama-3.2-3B-Instruct
Expand Down Expand Up @@ -370,3 +373,13 @@ benchmark_model:
- deepseek-ai/DeepSeek-V2-Lite-Chat
- lmsys/gpt-oss-20b-bf16
- lmsys/gpt-oss-120b-bf16


evaluate_model:
- google/gemma-2-9b-it
- google/gemma-2-27b-it
- meta-llama/Meta-Llama-3-1-8B-Instruct
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-32B-Instruct
- Qwen/Qwen1.5-MoE-A2.7B-Chat
- Qwen/Qwen3-30B-A3B
10 changes: 9 additions & 1 deletion autotest/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import os

import pytest
Expand All @@ -23,7 +24,14 @@ def config():

with open(config_path) as f:
env_config = yaml.load(f.read(), Loader=yaml.SafeLoader)
return env_config

config_copy = copy.deepcopy(env_config)
github_run_id = os.environ.get('GITHUB_RUN_ID', 'local_run')
if 'log_path' in config_copy:
config_copy['log_path'] = os.path.join(config_copy['log_path'], str(github_run_id))
os.makedirs(config_copy['log_path'], exist_ok=True)

return config_copy


@pytest.fixture(scope='session')
Expand Down
40 changes: 40 additions & 0 deletions autotest/evaluate/eval_config_chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from mmengine.config import read_base
from opencompass.models import OpenAISDK

with read_base():
from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets # noqa: F401, E501
from opencompass.configs.datasets.mmlu.mmlu_gen_4d595a import mmlu_datasets # noqa: F401, E501
from opencompass.configs.summarizers.groups.mmlu import mmlu_summary_groups # noqa: F401, E501

datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])

MODEL_NAME = ''
MODEL_PATH = ''
API_BASE = ''

api_meta_template = dict(round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True),
])

models = [
dict(
type=OpenAISDK,
abbr=f'{MODEL_NAME}-lmdeploy-api',
openai_api_base=API_BASE,
key='EMPTY',
path=MODEL_PATH,
meta_template=api_meta_template,
max_out_len=2048,
batch_size=500,
temperature=0.1,
)
]

summarizer = dict(
dataset_abbrs=[
['mmlu', 'naive_average'],
['gsm8k', 'accuracy'],
],
summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []),
)
119 changes: 119 additions & 0 deletions autotest/evaluate/test_api_evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import pytest
from utils.config_utils import get_evaluate_pytorch_model_list, get_evaluate_turbomind_model_list, get_workerid
from utils.evaluate_utils import restful_test
from utils.run_restful_chat import start_restful_api, stop_restful_api

DEFAULT_PORT = 23333


@pytest.fixture(scope='function', autouse=True)
def prepare_environment(request, config, worker_id):
param = request.param
model = param['model']
backend = param['backend']
model_path = config.get('model_path') + '/' + model
pid, startRes = start_restful_api(config, param, model, model_path, backend, worker_id)
yield param
stop_restful_api(pid, startRes, param)


def get_turbomind_model_list(tp_num):
model_list = get_evaluate_turbomind_model_list(tp_num, kvint_list=[4, 8])
new_model_list = []
for model in model_list:
model['cuda_prefix'] = None
new_model_list.append(model)
return new_model_list


def get_pytorch_model_list(tp_num):
model_list = get_evaluate_pytorch_model_list(tp_num, kvint_list=[4, 8])
new_model_list = []
for model in model_list:
model['cuda_prefix'] = None
new_model_list.append(model)
return new_model_list


def run_test(config, run_id, prepare_environment, worker_id):
if get_workerid(worker_id) is None:
result, msg = restful_test(config, run_id, prepare_environment, worker_id=worker_id)
else:
result, msg = restful_test(config,
run_id,
prepare_environment,
worker_id=worker_id,
port=DEFAULT_PORT + get_workerid(worker_id))
return result, msg


@pytest.mark.turbomind
@pytest.mark.gpu_num_1
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('prepare_environment', get_turbomind_model_list(tp_num=1), indirect=True)
def test_turbomind_restful_tp1(config, run_id, prepare_environment, worker_id):
result, msg = run_test(config, run_id, prepare_environment, worker_id)
assert result, msg


@pytest.mark.turbomind
@pytest.mark.gpu_num_2
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('prepare_environment', get_turbomind_model_list(tp_num=2), indirect=True)
def test_turbomind_restful_tp2(config, run_id, prepare_environment, worker_id):
result, msg = run_test(config, run_id, prepare_environment, worker_id)
assert result, msg


@pytest.mark.turbomind
@pytest.mark.gpu_num_4
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('prepare_environment', get_turbomind_model_list(tp_num=4), indirect=True)
def test_turbomind_restful_tp4(config, run_id, prepare_environment, worker_id):
result, msg = run_test(config, run_id, prepare_environment, worker_id)
assert result, msg


@pytest.mark.turbomind
@pytest.mark.gpu_num_8
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('prepare_environment', get_turbomind_model_list(tp_num=8), indirect=True)
def test_turbomind_restful_tp8(config, run_id, prepare_environment, worker_id):
result, msg = run_test(config, run_id, prepare_environment, worker_id)
assert result, msg


@pytest.mark.pytorch
@pytest.mark.gpu_num_1
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('prepare_environment', get_pytorch_model_list(tp_num=1), indirect=True)
def test_pytorch_restful_tp1(config, run_id, prepare_environment, worker_id):
result, msg = run_test(config, run_id, prepare_environment, worker_id)
assert result, msg


@pytest.mark.pytorch
@pytest.mark.gpu_num_2
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('prepare_environment', get_pytorch_model_list(tp_num=2), indirect=True)
def test_pytorch_restful_tp2(config, run_id, prepare_environment, worker_id):
result, msg = run_test(config, run_id, prepare_environment, worker_id)
assert result, msg


@pytest.mark.pytorch
@pytest.mark.gpu_num_4
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('prepare_environment', get_pytorch_model_list(tp_num=4), indirect=True)
def test_pytorch_restful_tp4(config, run_id, prepare_environment, worker_id):
result, msg = run_test(config, run_id, prepare_environment, worker_id)
assert result, msg


@pytest.mark.pytorch
@pytest.mark.gpu_num_8
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('prepare_environment', get_pytorch_model_list(tp_num=8), indirect=True)
def test_pytorch_restful_tp8(config, run_id, prepare_environment, worker_id):
result, msg = run_test(config, run_id, prepare_environment, worker_id)
assert result, msg
Loading
Loading