InternLM · lvhan028 · Sep 18, 2025 · Aug 27, 2025 · Aug 28, 2025 · Aug 29, 2025
diff --git a/.github/workflows/daily_ete_test_ascend.yml b/.github/workflows/daily_ete_test_ascend.yml
@@ -0,0 +1,171 @@
+name: daily_ete_test_ascend
+
+on:
+  workflow_dispatch:
+    inputs:
+      repo_org:
+        required: false
+        description: 'Tested repository organization name. Default is InternLM'
+        type: string
+        default: 'InternLM/lmdeploy'
+      repo_ref:
+        required: false
+        description: 'Set branch or tag or commit id. Default is "main"'
+        type: string
+        default: 'main'
+      backend:
+        required: true
+        description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
+        type: string
+        default: "['pytorch']"
+      model:
+        required: true
+        description: 'Set testcase module filter: llm, vllm. Default contains all models'
+        type: string
+        default: "['llm']"
+      function:
+        required: true
+        description: 'Set testcase function filter: chat, restful, pipeline. Default contains all functions'
+        type: string
+        default: '["pipeline", "restful", "chat"]'
+      regression_func:
+        required: true
+        description: 'regression functions'
+        type: string
+        default: "['tools']"
+
+env:
+  REPORT_DIR: /test/test-reports/${{ github.run_id }}
+  COV_PARAM: --cov /usr/local/python3.10.17/lib/python3.10/site-packages/lmdeploy
+  FAIL_CONFIG: ${{ github.event_name == 'push' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}
+  TEST_CODE_PATH: /test/test_pkg/lmdeploy/${{ github.run_id }}
+  LOG_PATH: /test/log/${{ github.run_id }}
+  TMPDIR: /mnt/deeplink/docker-tmp/qa_tmp
+  RAY_TMPDIR: /mnt/deeplink/docker-tmp/qa_tmp/ray
+
+jobs:
+  download_pkgs:
+    if: ${{!cancelled()}}
+    runs-on: [self-hosted, ascend-013]
+    timeout-minutes: 50
+    container:
+      image: crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/ascend:910b-latest
+      options: "--device=/dev/davinci0 --device=/dev/davinci1 --device=/dev/davinci2 --device=/dev/davinci3 --device=/dev/davinci4 --device=/dev/davinci5 --device=/dev/davinci6 --device=/dev/davinci7 --device=/dev/davinci_manager --device=/dev/devmm_svm --device=/dev/hisi_hdc -e PIP_CACHE_DIR=/root/.cache/pip --shm-size=150g --pull never"
+      volumes:
+        - /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro
+        - /usr/local/sbin:/usr/local/sbin:ro
+        - /var/log/npu/slog:/var/log/npu/slog
+        - /var/log/npu/profiling:/var/log/npu/profiling
+        - /var/log/npu/dump:/var/log/npu/dump
+        - /var/log/npu:/usr/slog
+        - /etc/hccn.conf:/etc/hccn.conf:ro
+        - /root/qa_test:/test
+        - /mnt:/mnt
+        - /root/.cache/pip:/root/.cache/pip
+    steps:
+      - name: Clone repository
+        uses: actions/checkout@v2
+        if: ${{ !cancelled() }}
+        with:
+          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
+          ref: ${{github.event.inputs.repo_ref || 'main'}}
+      - name: Copy repository
+        if: ${{ !cancelled() }}
+        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
+
+  test_tools:
+    if: ${{!cancelled() && contains(fromJSON(github.event.inputs.regression_func), 'tools')}}
+    runs-on: [self-hosted, ascend-013]
+    needs: download_pkgs
+    timeout-minutes: 300
+    strategy:
+      fail-fast: false
+      matrix:
+        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
+        model: ${{ fromJSON(inputs.model || '["llm", "mllm"]')}}
+        function: ${{ fromJSON(inputs.function || '["pipeline","restful","chat"]')}}
+        exclude:
+          - backend: turbomind
+            model: mllm
+            function: chat
+          - backend: pytorch
+            model: mllm
+            function: chat
+    container:
+      image: crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/ascend:910b-latest
+      options: "--device=/dev/davinci0 --device=/dev/davinci1 --device=/dev/davinci2 --device=/dev/davinci3 --device=/dev/davinci4 --device=/dev/davinci5 --device=/dev/davinci6 --device=/dev/davinci7 --device=/dev/davinci_manager --device=/dev/devmm_svm --device=/dev/hisi_hdc -e PIP_CACHE_DIR=/root/.cache/pip --shm-size=150g --pull never"
+      volumes:
+        - /usr/local/Ascend/driver:/usr/local/Ascend/driver
+        - /usr/local/sbin:/usr/local/sbin
+        - /var/log/npu/slog:/var/log/npu/slog
+        - /var/log/npu/profiling:/var/log/npu/profiling
+        - /var/log/npu/dump:/var/log/npu/dump
+        - /var/log/npu:/usr/slog
+        - /etc/hccn.conf:/etc/hccn.conf
+        - /root/qa_test:/test
+        - /mnt:/mnt
+        - /root/.cache/pip:/root/.cache/pip
+    steps:
+      - name: Copy repository and Artifacts
+        run: |
+          cp -r ${{ env.TEST_CODE_PATH }}/. .
+      - name: Install lmdeploy - offline
+        run: |
+          python3 -m pip install -r requirements_ascend.txt -i https://mirrors.aliyun.com/pypi/simple/
+      - name: Install lmdeploy - test
+        run: |
+          python3 -m pip install -r requirements/test.txt -i https://mirrors.aliyun.com/pypi/simple/
+      - name: Check env
+        run: |
+          python3 -m pip list
+          lmdeploy check_env
+          rm -rf allure-results
+          # remove tmp log in testcase
+          rm -rf ${{ env.LOG_PATH }}/*
+          mkdir ${{ env.REPORT_DIR }}/.pytest_cache -p
+          ln -s ${{ env.REPORT_DIR }}/.pytest_cache autotest
+      - name: Test lmdeploy - chat
+        continue-on-error: true
+        if: ${{ (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'llm' && matrix.function == 'chat' }}
+        run: |
+          pytest autotest/tools/chat/test_command_chat_hf_${{ matrix.backend }}.py -m 'gpu_num_1 and test_ascend' -n 8 --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S') || true
+          pytest autotest/tools/chat/test_command_chat_hf_${{ matrix.backend }}.py -m 'gpu_num_2 and test_ascend' -n 4 --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S')
+          pytest autotest/tools/chat/test_command_chat_hf_${{ matrix.backend }}.py -m 'gpu_num_4 and test_ascend' -n 2 --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S')
+          pytest autotest/tools/chat/test_command_chat_hf_${{ matrix.backend }}.py -m 'gpu_num_8 and test_ascend' --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S')
+      - name: Test lmdeploy - pipeline
+        continue-on-error: true
+        if: ${{ matrix.function == 'pipeline' }}
+        run: |
+          pytest autotest/tools/pipeline/test_pipeline_chat_${{ matrix.backend }}_${{ matrix.model }}.py -m 'gpu_num_1 and test_ascend' -n 8 --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S') || true
+          pytest autotest/tools/pipeline/test_pipeline_chat_${{ matrix.backend }}_${{ matrix.model }}.py -m 'gpu_num_2 and test_ascend' -n 4 --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S')
+          pytest autotest/tools/pipeline/test_pipeline_chat_${{ matrix.backend }}_${{ matrix.model }}.py -m 'gpu_num_4 and test_ascend' -n 2 --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S')
+          pytest autotest/tools/pipeline/test_pipeline_chat_${{ matrix.backend }}_${{ matrix.model }}.py -m 'gpu_num_8 and test_ascend' --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S')
+      - name: Test lmdeploy - restful
+        continue-on-error: true
+        if: ${{ matrix.function == 'restful' }}
+        run: |
+          pytest autotest/tools/restful/test_restful_chat_hf_${{ matrix.backend }}_${{ matrix.model }}.py -m 'gpu_num_1 and test_ascend' -n 8 --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S') || true
+          pytest autotest/tools/restful/test_restful_chat_hf_${{ matrix.backend }}_${{ matrix.model }}.py -m 'gpu_num_2 and test_ascend' -n 4 --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S')
+          pytest autotest/tools/restful/test_restful_chat_hf_${{ matrix.backend }}_${{ matrix.model }}.py -m 'gpu_num_4 and test_ascend' -n 2 --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S')
+          pytest autotest/tools/restful/test_restful_chat_hf_${{ matrix.backend }}_${{ matrix.model }}.py -m 'gpu_num_8 and test_ascend' --device ascend --alluredir=${{ env.REPORT_DIR }} ${{ env.COV_PARAM }} || true
+          mv .coverage ${{ env.REPORT_DIR }}/.coverage.$(date +'%Y%m%d%H%M%S')
+      - name: Clear workfile
+        if: always()
+        run: |
+          chmod -R 777 $REPORT_DIR
+          export workdir=$(pwd)
+          cd ..
+          rm -rf $workdir
+          mkdir $workdir
+          chmod -R 777 $workdir
diff --git a/autotest/benchmark/test_throughput_performance.py b/autotest/benchmark/test_throughput_performance.py
@@ -1,3 +1,5 @@
+import os
+
 import pytest
 from utils.benchmark_utils import throughput_test
 from utils.config_utils import get_benchmark_model_list, get_cuda_id_by_workerid, get_cuda_prefix_by_workerid
@@ -92,11 +94,15 @@ def test_throughput_func_tp2(config, run_id, run_config, worker_id):
     'tp_num': 1
 }])
 def test_throughput_prtest_tp1(config, run_id, run_config, worker_id):
+    device_type = os.environ.get('DEVICE', 'cuda')
+    if device_type == 'ascend':
+        env_var = 'ASCEND_RT_VISIBLE_DEVICES='
+    else:
+        env_var = 'CUDA_VISIBLE_DEVICES='
     result, msg = throughput_test(config,
                                   run_id,
                                   run_config,
-                                  cuda_prefix='CUDA_VISIBLE_DEVICES=' +
-                                  str(int(get_cuda_id_by_workerid(worker_id)) + 5),
+                                  cuda_prefix=f'{env_var}' + str(int(get_cuda_id_by_workerid(worker_id)) + 5),
                                   worker_id=worker_id,
                                   is_smoke=True)
 

diff --git a/autotest/config-ascend.yaml b/autotest/config-ascend.yaml
@@ -0,0 +1,35 @@
+model_path: /mnt/deeplink/group01/deeplink-test/weight
+resource_path: /nvme/qa_test_models/resource
+dst_path: /nvme/qa_test_models/autotest_model
+log_path: /test/log
+benchmark_path: /nvme/qa_test_models/benchmark-reports
+dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json
+env_tag: a100
+
+tp_config:
+    Qwen2.5-32B-Instruct: 2
+
+pytorch_chat_model:
+    - /Qwen3-0.6B
+
+pytorch_vl_model:
+    - /Qwen3-0.6B
+
+pytorch_base_model:
+    - /Qwen3-0.6B
+
+pytorch_quatization:
+    awq:
+        - meta-llama/Meta-Llama-3-8B-Instruct
+    w8a8:
+        - meta-llama/Meta-Llama-3-8B-Instruct
+    no_kvint4:
+        - /Qwen3-0.6B
+    no_kvint8:
+        - /Qwen3-0.6B
+
+longtext_model:
+    - /Qwen3-0.6B
+
+benchmark_model:
+    - /Qwen3-0.6B
diff --git a/autotest/conftest.py b/autotest/conftest.py
@@ -10,7 +10,17 @@
 
 @pytest.fixture(scope='session')
 def config():
-    config_path = os.path.join(config_file)
+    # Use device-specific config file if DEVICE environment variable is set
+    device = os.environ.get('DEVICE', '')
+    if device:
+        device_config_path = f'autotest/config-{device}.yaml'
+        if os.path.exists(device_config_path):
+            config_path = device_config_path
+        else:
+            config_path = config_file
+    else:
+        config_path = config_file
+
     with open(config_path) as f:
         env_config = yaml.load(f.read(), Loader=yaml.SafeLoader)
     return env_config
@@ -34,8 +44,21 @@ def common_case_config():
 
 def pytest_addoption(parser):
     parser.addoption('--run_id', action='store', default='', help='github run_id')
+    parser.addoption('--device', action='store', default='', help='device config suffix')
+
+
+def pytest_configure(config):
+    # Set DEVICE environment variable before test execution
+    device = config.getoption('--device')
+    if device:
+        os.environ['DEVICE'] = device
 
 
 @pytest.fixture(scope='session')
 def run_id(request):
     return request.config.getoption('--run_id')
+
+
+@pytest.fixture(scope='session')
+def device(request):
+    return request.config.getoption('--device')