InternLM · littlegy · Sep 18, 2025 · Sep 18, 2025 · Sep 18, 2025 · Sep 18, 2025
diff --git a/.github/workflows/api_eval.yml b/.github/workflows/api_eval.yml
@@ -0,0 +1,139 @@
+name: api_eval
+
+on:
+  workflow_dispatch:
+    inputs:
+      repo_org:
+        required: false
+        description: 'Tested repository organization name. Default is InternLM/lmdeploy'
+        type: string
+        default: 'InternLM/lmdeploy'
+      repo_ref:
+        required: false
+        description: 'Set branch or tag or commit id. Default is "main"'
+        type: string
+        default: 'main'
+      backend:
+        required: true
+        description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
+        type: string
+        default: "['turbomind', 'pytorch']"
+
+
+env:
+  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
+  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
+  OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
+  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
+  REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }}
+  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
+  FAIL_CONFIG: '--lf'
+  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }}
+  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
+  OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
+  DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL
+
+jobs:
+  linux-build:
+    if: ${{ !cancelled() }}
+    strategy:
+      matrix:
+        pyver: [py310]
+    runs-on: ubuntu-latest
+    env:
+      PYTHON_VERSION: ${{ matrix.pyver }}
+      PLAT_NAME: manylinux2014_x86_64
+      DOCKER_TAG: cuda12.4
+      OUTPUT_FOLDER: cuda12.4_dist_${{ github.run_id }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
+          ref: ${{github.event.inputs.repo_ref || 'main'}}
+      - name: Build
+        run: |
+          echo ${PYTHON_VERSION}
+          echo ${PLAT_NAME}
+          echo ${DOCKER_TAG}
+          echo ${OUTPUT_FOLDER}
+          echo ${GITHUB_RUN_ID}
+          # remove -it
+          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
+          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
+      - name: Upload Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          if-no-files-found: error
+          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
+          retention-days: 1
+          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
+
+  test_evaluation:
+    needs: linux-build
+    if: ${{ !cancelled() }}
+    runs-on: [self-hosted, test-140]
+    timeout-minutes: 2400
+    strategy:
+      fail-fast: false
+      matrix:
+        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
+    container:
+      image: openmmlab/lmdeploy:latest-cu12.8
+      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
+      volumes:
+        - /nvme/github-actions/pip-cache:/root/.cache/pip
+        - /nvme/github-actions/packages:/root/packages
+        - /nvme/github-actions/resources:/root/resources
+        - /nvme/github-actions/opencompass-data:/root/opencompass-data
+        - /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports
+        - /nvme/qa_test_models:/nvme/qa_test_models
+        - /mnt/shared:/mnt/shared
+        - /mnt/bigdisk:/mnt/bigdisk
+        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
+        - /mnt/187:/mnt/187
+    steps:
+      - name: Create and change to _wk directory
+        run: |
+          echo "Working directory set to: $(pwd)"
+      - name: Clone repository
+        uses: actions/checkout@v2
+        with:
+          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
+          ref: ${{github.event.inputs.repo_ref || 'main'}}
+      - name: Download Artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: my-artifact-${{ github.run_id }}-py310
+      - name: Install lmdeploy - dependency
+        run: |
+          python3 -m pip install -r requirements_cuda.txt
+      - name: Install lmdeploy
+        run: |
+          python3 -m pip install lmdeploy-*.whl --no-deps
+          python3 -m pip install -r requirements/test.txt
+      - name: Install opencompass
+        run: |
+          python3 -m pip install opencompass
+      - name: Check env
+        run: |
+          python3 -m pip list
+          lmdeploy check_env
+          rm -rf allure-results
+          mkdir -p ${{ env.REPORT_DIR }}/.pytest_cache
+          ln -s ${{ env.REPORT_DIR }}/.pytest_cache autotest
+      - name: Setup paths for evaluation
+        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
+        run: |
+          overall_exit=0
+          ln -s /mnt/187/opencompass-data/data ./data
+          pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_1 and not pr_test and ${{matrix.backend}}" -n 8 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_2 and not pr_test and ${{matrix.backend}}" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_4 and not pr_test and ${{matrix.backend}}" -n 2 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_8 and not pr_test and ${{matrix.backend}}" -n 1 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          exit $overall_exit
+      - name: Clear workspace
+        if: always()
+        run: |
+          export workdir=$(pwd)
+          rm -rf $workdir/*
diff --git a/autotest/config.yaml b/autotest/config.yaml
@@ -6,6 +6,7 @@ benchmark_path: /nvme/qa_test_models/benchmark-reports
 dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json
 env_tag: a100
 
+
 tp_config:
     Llama-4-Scout-17B-16E-Instruct: 4
     Meta-Llama-3-1-70B-Instruct: 4
@@ -22,6 +23,7 @@ tp_config:
     Qwen3-32B: 2
     Qwen3-30B-A3B: 2
     Qwen3-30B-A3B-Base: 2
+    Qwen2.5-32B-Instruct : 2
     Qwen2.5-72B-Instruct: 4
     Qwen2.5-VL-32B-Instruct: 2
     DeepSeek-V2-Lite-Chat: 2
@@ -37,6 +39,7 @@ tp_config:
     gpt-oss-120b-bf16: 4
 
 
+
 turbomind_chat_model:
     - meta-llama/Llama-3.2-1B-Instruct
     - meta-llama/Llama-3.2-3B-Instruct
@@ -370,3 +373,13 @@ benchmark_model:
     - deepseek-ai/DeepSeek-V2-Lite-Chat
     - lmsys/gpt-oss-20b-bf16
     - lmsys/gpt-oss-120b-bf16
+
+
+evaluate_model:
+  - google/gemma-2-9b-it
+  - google/gemma-2-27b-it
+  - meta-llama/Meta-Llama-3-1-8B-Instruct
+  - Qwen/Qwen2.5-7B-Instruct
+  - Qwen/Qwen2.5-32B-Instruct
+  - Qwen/Qwen1.5-MoE-A2.7B-Chat
+  - Qwen/Qwen3-30B-A3B
diff --git a/autotest/conftest.py b/autotest/conftest.py
@@ -1,3 +1,4 @@
+import copy
 import os
 
 import pytest
@@ -23,7 +24,14 @@ def config():
 
     with open(config_path) as f:
         env_config = yaml.load(f.read(), Loader=yaml.SafeLoader)
-    return env_config
+
+    config_copy = copy.deepcopy(env_config)
+    github_run_id = os.environ.get('GITHUB_RUN_ID', 'local_run')
+    if 'log_path' in config_copy:
+        config_copy['log_path'] = os.path.join(config_copy['log_path'], str(github_run_id))
+        os.makedirs(config_copy['log_path'], exist_ok=True)
+
+    return config_copy
 
 
 @pytest.fixture(scope='session')

diff --git a/autotest/evaluate/eval_config_chat.py b/autotest/evaluate/eval_config_chat.py
@@ -0,0 +1,40 @@
+from mmengine.config import read_base
+from opencompass.models import OpenAISDK
+
+with read_base():
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets  # noqa: F401, E501
+    from opencompass.configs.datasets.mmlu.mmlu_gen_4d595a import mmlu_datasets  # noqa: F401, E501
+    from opencompass.configs.summarizers.groups.mmlu import mmlu_summary_groups  # noqa: F401, E501
+
+datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
+
+MODEL_NAME = ''
+MODEL_PATH = ''
+API_BASE = ''
+
+api_meta_template = dict(round=[
+    dict(role='HUMAN', api_role='HUMAN'),
+    dict(role='BOT', api_role='BOT', generate=True),
+])
+
+models = [
+    dict(
+        type=OpenAISDK,
+        abbr=f'{MODEL_NAME}-lmdeploy-api',
+        openai_api_base=API_BASE,
+        key='EMPTY',
+        path=MODEL_PATH,
+        meta_template=api_meta_template,
+        max_out_len=2048,
+        batch_size=500,
+        temperature=0.1,
+    )
+]
+
+summarizer = dict(
+    dataset_abbrs=[
+        ['mmlu', 'naive_average'],
+        ['gsm8k', 'accuracy'],
+    ],
+    summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []),
+)
diff --git a/autotest/evaluate/test_api_evaluate.py b/autotest/evaluate/test_api_evaluate.py
@@ -0,0 +1,119 @@
+import pytest
+from utils.config_utils import get_evaluate_pytorch_model_list, get_evaluate_turbomind_model_list, get_workerid
+from utils.evaluate_utils import restful_test
+from utils.run_restful_chat import start_restful_api, stop_restful_api
+
+DEFAULT_PORT = 23333
+
+
+@pytest.fixture(scope='function', autouse=True)
+def prepare_environment(request, config, worker_id):
+    param = request.param
+    model = param['model']
+    backend = param['backend']
+    model_path = config.get('model_path') + '/' + model
+    pid, startRes = start_restful_api(config, param, model, model_path, backend, worker_id)
+    yield param
+    stop_restful_api(pid, startRes, param)
+
+
+def get_turbomind_model_list(tp_num):
+    model_list = get_evaluate_turbomind_model_list(tp_num, kvint_list=[4, 8])
+    new_model_list = []
+    for model in model_list:
+        model['cuda_prefix'] = None
+        new_model_list.append(model)
+    return new_model_list
+
+
+def get_pytorch_model_list(tp_num):
+    model_list = get_evaluate_pytorch_model_list(tp_num, kvint_list=[4, 8])
+    new_model_list = []
+    for model in model_list:
+        model['cuda_prefix'] = None
+        new_model_list.append(model)
+    return new_model_list
+
+
+def run_test(config, run_id, prepare_environment, worker_id):
+    if get_workerid(worker_id) is None:
+        result, msg = restful_test(config, run_id, prepare_environment, worker_id=worker_id)
+    else:
+        result, msg = restful_test(config,
+                                   run_id,
+                                   prepare_environment,
+                                   worker_id=worker_id,
+                                   port=DEFAULT_PORT + get_workerid(worker_id))
+    return result, msg
+
+
+@pytest.mark.turbomind
+@pytest.mark.gpu_num_1
+@pytest.mark.flaky(reruns=0)
+@pytest.mark.parametrize('prepare_environment', get_turbomind_model_list(tp_num=1), indirect=True)
+def test_turbomind_restful_tp1(config, run_id, prepare_environment, worker_id):
+    result, msg = run_test(config, run_id, prepare_environment, worker_id)
+    assert result, msg
+
+
+@pytest.mark.turbomind
+@pytest.mark.gpu_num_2
+@pytest.mark.flaky(reruns=0)
+@pytest.mark.parametrize('prepare_environment', get_turbomind_model_list(tp_num=2), indirect=True)
+def test_turbomind_restful_tp2(config, run_id, prepare_environment, worker_id):
+    result, msg = run_test(config, run_id, prepare_environment, worker_id)
+    assert result, msg
+
+
+@pytest.mark.turbomind
+@pytest.mark.gpu_num_4
+@pytest.mark.flaky(reruns=0)
+@pytest.mark.parametrize('prepare_environment', get_turbomind_model_list(tp_num=4), indirect=True)
+def test_turbomind_restful_tp4(config, run_id, prepare_environment, worker_id):
+    result, msg = run_test(config, run_id, prepare_environment, worker_id)
+    assert result, msg
+
+
+@pytest.mark.turbomind
+@pytest.mark.gpu_num_8
+@pytest.mark.flaky(reruns=0)
+@pytest.mark.parametrize('prepare_environment', get_turbomind_model_list(tp_num=8), indirect=True)
+def test_turbomind_restful_tp8(config, run_id, prepare_environment, worker_id):
+    result, msg = run_test(config, run_id, prepare_environment, worker_id)
+    assert result, msg
+
+
+@pytest.mark.pytorch
+@pytest.mark.gpu_num_1
+@pytest.mark.flaky(reruns=0)
+@pytest.mark.parametrize('prepare_environment', get_pytorch_model_list(tp_num=1), indirect=True)
+def test_pytorch_restful_tp1(config, run_id, prepare_environment, worker_id):
+    result, msg = run_test(config, run_id, prepare_environment, worker_id)
+    assert result, msg
+
+
+@pytest.mark.pytorch
+@pytest.mark.gpu_num_2
+@pytest.mark.flaky(reruns=0)
+@pytest.mark.parametrize('prepare_environment', get_pytorch_model_list(tp_num=2), indirect=True)
+def test_pytorch_restful_tp2(config, run_id, prepare_environment, worker_id):
+    result, msg = run_test(config, run_id, prepare_environment, worker_id)
+    assert result, msg
+
+
+@pytest.mark.pytorch
+@pytest.mark.gpu_num_4
+@pytest.mark.flaky(reruns=0)
+@pytest.mark.parametrize('prepare_environment', get_pytorch_model_list(tp_num=4), indirect=True)
+def test_pytorch_restful_tp4(config, run_id, prepare_environment, worker_id):
+    result, msg = run_test(config, run_id, prepare_environment, worker_id)
+    assert result, msg
+
+
+@pytest.mark.pytorch
+@pytest.mark.gpu_num_8
+@pytest.mark.flaky(reruns=0)
+@pytest.mark.parametrize('prepare_environment', get_pytorch_model_list(tp_num=8), indirect=True)
+def test_pytorch_restful_tp8(config, run_id, prepare_environment, worker_id):
+    result, msg = run_test(config, run_id, prepare_environment, worker_id)
+    assert result, msg