Skip to content

Commit 8461e6c

Browse files
committed
merge main
Signed-off-by: Ronald1995 <[email protected]>
2 parents a0da596 + 7271f0d commit 8461e6c

File tree

335 files changed

+28985
-7845
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

335 files changed

+28985
-7845
lines changed

.github/Dockerfile.buildwheel

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@
1515
# This file is a part of the vllm-ascend project.
1616
#
1717
ARG PY_VERSION=3.11
18-
FROM quay.io/ascend/manylinux:8.3.rc1-910b-manylinux_2_28-py${PY_VERSION}
18+
FROM quay.io/ascend/manylinux:8.3.rc2-910b-manylinux_2_28-py${PY_VERSION}
1919

2020
ARG COMPILE_CUSTOM_KERNELS=1
21+
ARG SOC_VERSION="ascend910b1"
2122

2223
# Define environments
2324
ENV DEBIAN_FRONTEND=noninteractive
2425
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
26+
ENV SOC_VERSION=$SOC_VERSION
2527
RUN yum update -y && \
2628
yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
2729
rm -rf /var/cache/yum

.github/workflows/_e2e_nightly_multi_node.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ on:
1515
required: false
1616
type: string
1717
description: base image for pods
18-
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
18+
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
1919
config_file_path:
2020
required: true
2121
type: string
@@ -32,7 +32,7 @@ on:
3232
description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
3333
vllm_version:
3434
required: false
35-
default: "v0.11.2"
35+
default: "86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24"
3636
type: string
3737
description: vllm version to use
3838
vllm_ascend_remote_url:
@@ -69,7 +69,7 @@ jobs:
6969
# This is the runner with no NPU for k8s controller
7070
runs-on: ${{ inputs.runner }}
7171
container:
72-
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
72+
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
7373
env:
7474
KUBECONFIG: /tmp/kubeconfig
7575
KUBECTL: /root/.cache/.kube/kubectl
@@ -106,7 +106,7 @@ jobs:
106106
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
107107
108108
- name: Checkout code
109-
uses: actions/checkout@v6
109+
uses: actions/checkout@v6.0.0
110110

111111
- name: Prepare scripts
112112
run: |

.github/workflows/_e2e_nightly_single_node.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ on:
2929
image:
3030
required: false
3131
type: string
32-
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
32+
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
3333
tests:
3434
required: true
3535
type: string

.github/workflows/_e2e_nightly_single_node_models.yaml

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ jobs:
5959
name: ${{inputs.model_list}} accuracy test
6060
runs-on: ${{ inputs.runner }}
6161
container:
62-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
62+
image: "${{ inputs.image }}"
6363
env:
6464
VLLM_USE_MODELSCOPE: True
6565
GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
@@ -78,15 +78,15 @@ jobs:
7878
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
7979
8080
- name: Checkout vllm-project/vllm-ascend repo
81-
uses: actions/checkout@v6
81+
uses: actions/checkout@v6.0.0
8282

8383
- name: Install system dependencies
8484
run: |
8585
apt-get -y install `cat packages.txt`
8686
apt-get -y install gcc g++ cmake libnuma-dev
8787
8888
- name: Checkout vllm-project/vllm repo
89-
uses: actions/checkout@v6
89+
uses: actions/checkout@v6.0.0
9090
with:
9191
repository: vllm-project/vllm
9292
ref: ${{ inputs.vllm }}
@@ -108,11 +108,14 @@ jobs:
108108
if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
109109
shell: bash -l {0}
110110
run: |
111-
wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
112-
chmod a+x /tmp/Ascend-BiSheng-toolkit_aarch64.run
113-
/tmp/Ascend-BiSheng-toolkit_aarch64.run --install
114-
. /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
115-
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
111+
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
112+
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
113+
114+
- name: Install tensorflow (for Molmo-7B-D-0924)
115+
if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
116+
shell: bash -l {0}
117+
run: |
118+
pip install tensorflow --no-cache-dir
116119
117120
- name: Resolve vllm-ascend version
118121
run: |
@@ -132,7 +135,7 @@ jobs:
132135
echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
133136
134137
- name: Checkout vllm-project/vllm-ascend repo
135-
uses: actions/checkout@v6
138+
uses: actions/checkout@v6.0.0
136139
with:
137140
repository: vllm-project/vllm-ascend
138141
path: ./vllm-ascend
@@ -175,6 +178,7 @@ jobs:
175178
id: report
176179
env:
177180
VLLM_WORKER_MULTIPROC_METHOD: spawn
181+
HF_DATASETS_OFFLINE: True
178182
VLLM_USE_MODELSCOPE: True
179183
VLLM_CI_RUNNER: ${{ inputs.runner }}
180184
VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
@@ -225,4 +229,4 @@ jobs:
225229
path: ./benchmarks/accuracy/
226230
if-no-files-found: warn
227231
retention-days: 90
228-
overwrite: true
232+
overwrite: true

.github/workflows/_e2e_test.yaml

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,15 @@ jobs:
4040
apt install git -y
4141
4242
- name: Checkout vllm-project/vllm-ascend repo
43-
uses: actions/checkout@v6
43+
uses: actions/checkout@v6.0.0
4444

4545
- name: Install system dependencies
4646
run: |
4747
apt-get -y install `cat packages.txt`
4848
apt-get -y install gcc g++ cmake libnuma-dev
4949
5050
- name: Checkout vllm-project/vllm repo
51-
uses: actions/checkout@v6
51+
uses: actions/checkout@v6.0.0
5252
with:
5353
repository: vllm-project/vllm
5454
ref: ${{ inputs.vllm }}
@@ -91,14 +91,13 @@ jobs:
9191
pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
9292
pytest -sv tests/e2e/singlecard/test_aclgraph.py
9393
pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
94-
pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
9594
pytest -sv tests/e2e/singlecard/test_bge_model.py
9695
pytest -sv tests/e2e/singlecard/test_camem.py
97-
pytest -sv tests/e2e/singlecard/test_chunked.py
9896
pytest -sv tests/e2e/singlecard/test_embedding.py
9997
# pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
10098
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
101-
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
99+
# torch 2.8 doesn't work with lora, fix me
100+
#pytest -sv tests/e2e/singlecard/test_ilama_lora.py
102101
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
103102
pytest -sv tests/e2e/singlecard/test_quantization.py
104103
pytest -sv tests/e2e/singlecard/test_sampler.py
@@ -134,15 +133,15 @@ jobs:
134133
apt install git -y
135134
136135
- name: Checkout vllm-project/vllm-ascend repo
137-
uses: actions/checkout@v6
136+
uses: actions/checkout@v6.0.0
138137

139138
- name: Install system dependencies
140139
run: |
141140
apt-get -y install `cat packages.txt`
142141
apt-get -y install gcc g++ cmake libnuma-dev
143142
144143
- name: Checkout vllm-project/vllm repo
145-
uses: actions/checkout@v6
144+
uses: actions/checkout@v6.0.0
146145
with:
147146
repository: vllm-project/vllm
148147
ref: ${{ inputs.vllm }}
@@ -179,6 +178,7 @@ jobs:
179178
VLLM_USE_MODELSCOPE: True
180179
if: ${{ inputs.type == 'full' }}
181180
run: |
181+
pytest -sv tests/e2e/multicard/test_quantization.py
182182
pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
183183
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
184184
pytest -sv tests/e2e/multicard/test_full_graph_mode.py
@@ -187,7 +187,8 @@ jobs:
187187
pytest -sv tests/e2e/multicard/test_external_launcher.py
188188
pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
189189
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
190-
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
190+
# torch 2.8 doesn't work with lora, fix me
191+
#pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
191192
192193
# To avoid oom, we need to run the test in a single process.
193194
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
@@ -211,7 +212,7 @@ jobs:
211212
if: ${{ needs.e2e.result == 'success' && needs.e2e-2-cards.result == 'success' && inputs.type == 'full' }}
212213
runs-on: linux-aarch64-a3-4
213214
container:
214-
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
215+
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
215216
env:
216217
VLLM_LOGGING_LEVEL: ERROR
217218
VLLM_USE_MODELSCOPE: True
@@ -230,7 +231,7 @@ jobs:
230231
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
231232
232233
- name: Checkout vllm-project/vllm-ascend repo
233-
uses: actions/checkout@v6
234+
uses: actions/checkout@v6.0.0
234235
with:
235236
path: ./vllm-ascend
236237

@@ -240,7 +241,7 @@ jobs:
240241
apt-get -y install gcc g++ cmake libnuma-dev
241242
242243
- name: Checkout vllm-project/vllm repo
243-
uses: actions/checkout@v6
244+
uses: actions/checkout@v6.0.0
244245
with:
245246
repository: vllm-project/vllm
246247
ref: ${{ inputs.vllm }}
@@ -265,20 +266,17 @@ jobs:
265266
VLLM_WORKER_MULTIPROC_METHOD: spawn
266267
VLLM_USE_MODELSCOPE: True
267268
run: |
268-
pytest -sv \
269-
tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe \
270-
tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
271-
# tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP \
272-
# tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_W8A8_WITH_EP
269+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
270+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
271+
# pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
272+
# pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_W8A8_WITH_EP
273+
pytest -sv tests/e2e/multicard/test_data_parallel_tp2.py
273274
274275
- name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
275276
shell: bash -l {0}
276277
run: |
277-
wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
278-
chmod a+x /tmp/Ascend-BiSheng-toolkit_aarch64.run
279-
/tmp/Ascend-BiSheng-toolkit_aarch64.run --install
280-
. /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
281-
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
278+
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
279+
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
282280
283281
- name: Run vllm-project/vllm-ascend Qwen3 Next test
284282
working-directory: ./vllm-ascend
@@ -287,5 +285,5 @@ jobs:
287285
VLLM_WORKER_MULTIPROC_METHOD: spawn
288286
VLLM_USE_MODELSCOPE: True
289287
run: |
290-
. /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
288+
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
291289
pytest -sv tests/e2e/multicard/test_qwen3_next.py

.github/workflows/_nightly_image_build.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
image-tag: ${{ steps.build-image.outputs.image-tag }}
2020

2121
steps:
22-
- uses: actions/checkout@v6
22+
- uses: actions/checkout@v6.0.0
2323

2424
- name: Show build target
2525
run: |
@@ -45,7 +45,7 @@ jobs:
4545
--network host \
4646
--platform linux/arm64 \
4747
-f .github/Dockerfile.nightly.${TARGET} \
48-
--build-arg CANN_VERSION="8.3.rc1" \
48+
--build-arg CANN_VERSION="8.3.rc2" \
4949
--build-arg UBUNTU_VERSION="22.04" \
5050
--build-arg PYTHON_VERSION="3.11" \
5151
-t "$IMAGE_TAG" .

.github/workflows/format_pr_body.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ jobs:
3636

3737
- name: Get vLLM version
3838
run: |
39-
VLLM_COMMIT=v0.11.2
39+
VLLM_COMMIT=86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
4040
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
4141
4242
- name: Checkout repository
43-
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.2.2
43+
uses: actions/checkout@c2d88d3ecc89a9ef08eebf45d9637801dcee7eb5 # v4.2.2
4444

4545
- name: Set up Python
46-
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
46+
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
4747

4848
- name: Get vLLM release version
4949
run: |

.github/workflows/image_310p_openeuler.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
}}
6363
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
6464
steps:
65-
- uses: actions/checkout@v6
65+
- uses: actions/checkout@v6.0.0
6666
with:
6767
fetch-depth: 0
6868
persist-credentials: false

.github/workflows/image_310p_ubuntu.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ jobs:
5858
runs-on: ubuntu-latest
5959
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
6060
steps:
61-
- uses: actions/checkout@v6
61+
- uses: actions/checkout@v6.0.0
6262
with:
6363
fetch-depth: 0
6464
persist-credentials: false

.github/workflows/image_a3_openeuler.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
}}
6363
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
6464
steps:
65-
- uses: actions/checkout@v6
65+
- uses: actions/checkout@v6.0.0
6666
with:
6767
fetch-depth: 0
6868
persist-credentials: false

0 commit comments

Comments
 (0)