Skip to content

Commit 8d950cc

Browse files
committed
[TEST] add xlite e2e test
Signed-off-by: lulina <[email protected]>
1 parent b3ac128 commit 8d950cc

File tree

4 files changed

+144
-1
lines changed

4 files changed

+144
-1
lines changed

.github/workflows/_e2e_test.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ jobs:
103103
pytest -sv tests/e2e/singlecard/test_sampler.py
104104
pytest -sv tests/e2e/singlecard/test_vlm.py
105105
pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py
106+
pytest -sv tests/e2e/singlecard/test_xlite.py
106107
107108
# ------------------------------------ v1 spec decode test ------------------------------------ #
108109
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py

requirements-dev.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@ soundfile
2020
pytest_mock
2121
msserviceprofiler>=1.2.2
2222
mindstudio-probe>=8.3.0
23-
arctic-inference==0.1.1
23+
arctic-inference==0.1.1
24+
xlite

tests/e2e/singlecard/test_xlite.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
# Copyright 2023 The vLLM team.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
"""
18+
Compare the outputs of vLLM with and without xlite.
19+
20+
Run `pytest tests/e2e/singlecard/test_xlite.py`.
21+
"""
22+
23+
import pytest
24+
from vllm import SamplingParams
25+
26+
from tests.e2e.conftest import VllmRunner
27+
from tests.e2e.model_utils import check_outputs_equal
28+
29+
MODELS = [
30+
"Qwen/Qwen3-0.6B",
31+
]
32+
33+
34+
@pytest.mark.parametrize("model", MODELS)
35+
@pytest.mark.parametrize("max_tokens", [32])
36+
def test_models_with_xlite_decode_only(
37+
model: str,
38+
max_tokens: int,
39+
) -> None:
40+
prompts = [
41+
"Hello, my name is", "The president of the United States is",
42+
"The capital of France is", "The future of AI is"
43+
]
44+
45+
sampling_params = SamplingParams(max_tokens=max_tokens, temperature=0.0)
46+
with VllmRunner(
47+
model,
48+
block_size=128,
49+
max_model_len=1024,
50+
enforce_eager=False,
51+
additional_config={"xlite_graph_config": {
52+
"enabled": True
53+
}},
54+
) as runner:
55+
vllm_xlite_outputs = runner.model.generate(prompts, sampling_params)
56+
57+
with VllmRunner(
58+
model,
59+
block_size=128,
60+
max_model_len=1024,
61+
enforce_eager=True,
62+
) as runner:
63+
vllm_eager_outputs = runner.model.generate(prompts, sampling_params)
64+
vllm_xlite_outputs_list = []
65+
for output in vllm_xlite_outputs:
66+
vllm_xlite_outputs_list.append(
67+
(output.outputs[0].index, output.outputs[0].text))
68+
69+
vllm_eager_outputs_list = []
70+
for output in vllm_eager_outputs:
71+
vllm_eager_outputs_list.append(
72+
(output.outputs[0].index, output.outputs[0].text))
73+
74+
check_outputs_equal(
75+
outputs_0_lst=vllm_eager_outputs_list,
76+
outputs_1_lst=vllm_xlite_outputs_list,
77+
name_0="vllm_eager_outputs",
78+
name_1="vllm_xlite_outputs",
79+
)
80+
81+
82+
@pytest.mark.parametrize("model", MODELS)
83+
@pytest.mark.parametrize("max_tokens", [32])
84+
def test_models_with_xlite_full_mode(
85+
model: str,
86+
max_tokens: int,
87+
) -> None:
88+
prompts = [
89+
"Hello, my name is", "The president of the United States is",
90+
"The capital of France is", "The future of AI is"
91+
]
92+
93+
sampling_params = SamplingParams(max_tokens=max_tokens, temperature=0.0)
94+
with VllmRunner(
95+
model,
96+
block_size=128,
97+
max_model_len=1024,
98+
enforce_eager=False,
99+
additional_config={
100+
"xlite_graph_config": {
101+
"enabled": True,
102+
"full_mode": True
103+
}
104+
},
105+
) as runner:
106+
vllm_xlite_outputs = runner.model.generate(prompts, sampling_params)
107+
108+
with VllmRunner(
109+
model,
110+
block_size=128,
111+
max_model_len=1024,
112+
enforce_eager=True,
113+
) as runner:
114+
vllm_eager_outputs = runner.model.generate(prompts, sampling_params)
115+
vllm_xlite_outputs_list = []
116+
for output in vllm_xlite_outputs:
117+
vllm_xlite_outputs_list.append(
118+
(output.outputs[0].index, output.outputs[0].text))
119+
120+
vllm_eager_outputs_list = []
121+
for output in vllm_eager_outputs:
122+
vllm_eager_outputs_list.append(
123+
(output.outputs[0].index, output.outputs[0].text))
124+
125+
check_outputs_equal(
126+
outputs_0_lst=vllm_eager_outputs_list,
127+
outputs_1_lst=vllm_xlite_outputs_list,
128+
name_0="vllm_eager_outputs",
129+
name_1="vllm_xlite_outputs",
130+
)

tests/ut/test_platform.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def mock_vllm_config():
3232
def mock_vllm_ascend_config():
3333
mock_ascend_config = MagicMock()
3434
mock_ascend_config.torchair_graph_config.enabled = False
35+
mock_ascend_config.xlite_graph_config.enabled = False
3536
mock_ascend_config.enable_shared_expert_dp = False
3637
return mock_ascend_config
3738

@@ -512,6 +513,16 @@ def test_check_and_update_config_v1_worker_class_selection(
512513
"vllm_ascend.torchair.torchair_worker.NPUTorchairWorker",
513514
)
514515

516+
test_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
517+
test_ascend_config.xlite_graph_config.enabled = True
518+
mock_init_ascend.return_value = test_ascend_config
519+
vllm_config.parallel_config.worker_cls = "auto"
520+
self.platform.check_and_update_config(vllm_config)
521+
self.assertEqual(
522+
vllm_config.parallel_config.worker_cls,
523+
"vllm_ascend.xlite.xlite_worker.XliteWorker",
524+
)
525+
515526
@patch("vllm_ascend.ascend_config.check_ascend_config")
516527
@patch("vllm_ascend.ascend_config.init_ascend_config")
517528
@patch('vllm_ascend.utils.get_ascend_device_type',

0 commit comments

Comments
 (0)