vllm-project
diff --git a/‎.github/Dockerfile.buildwheel‎
Lines changed: 3 additions & 1 deletion b/‎.github/Dockerfile.buildwheel‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/_e2e_nightly_multi_node.yaml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/_e2e_nightly_multi_node.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/_e2e_nightly_single_node.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_e2e_nightly_single_node.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_e2e_nightly_single_node_models.yaml‎
Lines changed: 14 additions & 10 deletions b/‎.github/workflows/_e2e_nightly_single_node_models.yaml‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎.github/workflows/_e2e_test.yaml‎
Lines changed: 20 additions & 22 deletions b/‎.github/workflows/_e2e_test.yaml‎
Lines changed: 20 additions & 22 deletions
diff --git a/‎.github/workflows/_nightly_image_build.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/_nightly_image_build.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/format_pr_body.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/format_pr_body.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/image_310p_openeuler.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/image_310p_openeuler.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/image_310p_ubuntu.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/image_310p_ubuntu.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/image_a3_openeuler.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/image_a3_openeuler.yml‎
Lines changed: 1 addition & 1 deletion
@@ -15,13 +15,15 @@
 # This file is a part of the vllm-ascend project.
 #
 ARG PY_VERSION=3.11
-FROM quay.io/ascend/manylinux:8.3.rc1-910b-manylinux_2_28-py${PY_VERSION}
+FROM quay.io/ascend/manylinux:8.3.rc2-910b-manylinux_2_28-py${PY_VERSION}
 
 ARG COMPILE_CUSTOM_KERNELS=1
+ARG SOC_VERSION="ascend910b1"
 
 # Define environments
 ENV DEBIAN_FRONTEND=noninteractive
 ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
+ENV SOC_VERSION=$SOC_VERSION
 RUN yum update -y && \
     yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
     rm -rf /var/cache/yum
 
@@ -15,7 +15,7 @@ on:
         required: false
         type: string
         description: base image for pods
-        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
+        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
       config_file_path:
         required: true
         type: string
@@ -32,7 +32,7 @@ on:
         description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
       vllm_version:
         required: false
-        default: "v0.11.2"
+        default: "86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24"
         type: string
         description: vllm version to use
       vllm_ascend_remote_url:
@@ -69,7 +69,7 @@ jobs:
     # This is the runner with no NPU for k8s controller
     runs-on: ${{ inputs.runner }}
     container:
-      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
+      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
       env:
         KUBECONFIG: /tmp/kubeconfig
         KUBECTL: /root/.cache/.kube/kubectl
@@ -106,7 +106,7 @@ jobs:
             echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
 
         - name: Checkout code
-          uses: actions/checkout@v6
+          uses: actions/checkout@v6.0.0
 
         - name: Prepare scripts
           run: |
 
@@ -29,7 +29,7 @@ on:
       image:
         required: false
         type: string
-        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
+        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
       tests:
         required: true
         type: string
 
@@ -59,7 +59,7 @@ jobs:
     name: ${{inputs.model_list}} accuracy test
     runs-on: ${{ inputs.runner }}
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: "${{ inputs.image }}"
       env:
         VLLM_USE_MODELSCOPE: True
         GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
@@ -78,15 +78,15 @@ jobs:
           git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
 
       - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.0
 
       - name: Install system dependencies
         run: |
           apt-get -y install `cat packages.txt`
           apt-get -y install gcc g++ cmake libnuma-dev
 
       - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.0
         with:
           repository: vllm-project/vllm
           ref: ${{ inputs.vllm }}
@@ -108,11 +108,14 @@ jobs:
         if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
         shell: bash -l {0}
         run: |
-          wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
-          chmod a+x /tmp/Ascend-BiSheng-toolkit_aarch64.run
-          /tmp/Ascend-BiSheng-toolkit_aarch64.run --install
-          . /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
-          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
+          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
+          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
+
+      - name: Install tensorflow (for Molmo-7B-D-0924)
+        if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
+        shell: bash -l {0}
+        run: |
+          pip install tensorflow --no-cache-dir
 
       - name: Resolve vllm-ascend version
         run: |
@@ -132,7 +135,7 @@ jobs:
           echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
 
       - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.0
         with:
           repository: vllm-project/vllm-ascend
           path: ./vllm-ascend
@@ -175,6 +178,7 @@ jobs:
         id: report
         env:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
+          HF_DATASETS_OFFLINE: True
           VLLM_USE_MODELSCOPE: True
           VLLM_CI_RUNNER: ${{ inputs.runner }}
           VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
@@ -225,4 +229,4 @@ jobs:
           path: ./benchmarks/accuracy/
           if-no-files-found: warn
           retention-days: 90
-          overwrite: true
+          overwrite: true
@@ -40,15 +40,15 @@ jobs:
           apt install git -y
 
       - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.0
 
       - name: Install system dependencies
         run: |
           apt-get -y install `cat packages.txt`
           apt-get -y install gcc g++ cmake libnuma-dev
 
       - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.0
         with:
           repository: vllm-project/vllm
           ref: ${{ inputs.vllm }}
@@ -91,14 +91,13 @@ jobs:
           pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
           pytest -sv tests/e2e/singlecard/test_aclgraph.py
           pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
-          pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
           pytest -sv tests/e2e/singlecard/test_bge_model.py
           pytest -sv tests/e2e/singlecard/test_camem.py
-          pytest -sv tests/e2e/singlecard/test_chunked.py
           pytest -sv tests/e2e/singlecard/test_embedding.py
           # pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
           pytest -sv tests/e2e/singlecard/test_guided_decoding.py
-          pytest -sv tests/e2e/singlecard/test_ilama_lora.py
+          # torch 2.8 doesn't work with lora, fix me
+          #pytest -sv tests/e2e/singlecard/test_ilama_lora.py
           pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
           pytest -sv tests/e2e/singlecard/test_quantization.py
           pytest -sv tests/e2e/singlecard/test_sampler.py
@@ -134,15 +133,15 @@ jobs:
           apt install git -y
 
       - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.0
 
       - name: Install system dependencies
         run: |
           apt-get -y install `cat packages.txt`
           apt-get -y install gcc g++ cmake libnuma-dev
 
       - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.0
         with:
           repository: vllm-project/vllm
           ref: ${{ inputs.vllm }}
@@ -179,6 +178,7 @@ jobs:
           VLLM_USE_MODELSCOPE: True
         if: ${{ inputs.type == 'full' }}
         run: |
+          pytest -sv tests/e2e/multicard/test_quantization.py
           pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
           pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
           pytest -sv tests/e2e/multicard/test_full_graph_mode.py
@@ -187,7 +187,8 @@ jobs:
           pytest -sv tests/e2e/multicard/test_external_launcher.py
           pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
           pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
-          pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
+          # torch 2.8 doesn't work with lora, fix me
+          #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
 
           # To avoid oom, we need to run the test in a single process.
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
@@ -211,7 +212,7 @@ jobs:
     if: ${{ needs.e2e.result == 'success' && needs.e2e-2-cards.result == 'success' && inputs.type == 'full' }}
     runs-on: linux-aarch64-a3-4
     container:
-      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
+      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
       env:
         VLLM_LOGGING_LEVEL: ERROR
         VLLM_USE_MODELSCOPE: True
@@ -230,7 +231,7 @@ jobs:
           git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
 
       - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.0
         with:
           path: ./vllm-ascend
 
@@ -240,7 +241,7 @@ jobs:
           apt-get -y install gcc g++ cmake libnuma-dev
 
       - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v6
+        uses: actions/checkout@v6.0.0
         with:
           repository: vllm-project/vllm
           ref: ${{ inputs.vllm }}
@@ -265,20 +266,17 @@ jobs:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
           VLLM_USE_MODELSCOPE: True
         run: |
-          pytest -sv \
-            tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe \
-            tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC 
-            # tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP \
-            # tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_W8A8_WITH_EP
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
+          # pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
+          # pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_W8A8_WITH_EP
+          pytest -sv tests/e2e/multicard/test_data_parallel_tp2.py
 
       - name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
         shell: bash -l {0}
         run: |
-          wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
-          chmod a+x /tmp/Ascend-BiSheng-toolkit_aarch64.run
-          /tmp/Ascend-BiSheng-toolkit_aarch64.run --install
-          . /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
-          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
+          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
+          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
 
       - name: Run vllm-project/vllm-ascend Qwen3 Next test
         working-directory: ./vllm-ascend
@@ -287,5 +285,5 @@ jobs:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
           VLLM_USE_MODELSCOPE: True
         run: |
-          . /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
+          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
           pytest -sv tests/e2e/multicard/test_qwen3_next.py
@@ -19,7 +19,7 @@ jobs:
       image-tag: ${{ steps.build-image.outputs.image-tag }}
 
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v6.0.0
 
       - name: Show build target
         run: |
@@ -45,7 +45,7 @@ jobs:
             --network host \
             --platform linux/arm64 \
             -f .github/Dockerfile.nightly.${TARGET} \
-            --build-arg CANN_VERSION="8.3.rc1" \
+            --build-arg CANN_VERSION="8.3.rc2" \
             --build-arg UBUNTU_VERSION="22.04" \
             --build-arg PYTHON_VERSION="3.11" \
             -t "$IMAGE_TAG" .
 
@@ -36,14 +36,14 @@ jobs:
 
       - name: Get vLLM version
         run: |
-          VLLM_COMMIT=v0.11.2
+          VLLM_COMMIT=86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
           echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
 
       - name: Checkout repository
-        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.2.2
+        uses: actions/checkout@c2d88d3ecc89a9ef08eebf45d9637801dcee7eb5 # v4.2.2
 
       - name: Set up Python
-        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
 
       - name: Get vLLM release version
         run: |
 
@@ -62,7 +62,7 @@ jobs:
       }}
     if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
     steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v6.0.0
       with:
         fetch-depth: 0
         persist-credentials: false
 
@@ -58,7 +58,7 @@ jobs:
     runs-on: ubuntu-latest
     if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
     steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v6.0.0
       with:
         fetch-depth: 0
         persist-credentials: false
 
@@ -62,7 +62,7 @@ jobs:
       }}
     if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
     steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v6.0.0
       with:
         fetch-depth: 0
         persist-credentials: false