Skip to content

Commit 39835cb

Browse files
author
tanqingshan (A)
committed
Fix eplb device transfer loader issues (ref vllm-project#4490)
Signed-off-by: tanqingshan (A) <[email protected]>
1 parent b32ef53 commit 39835cb

File tree

8 files changed

+1839
-441
lines changed

8 files changed

+1839
-441
lines changed
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
from vllm.config import SchedulerConfig
17+
18+
from tests.ut.base import TestBase
19+
from vllm_ascend.core.schedule_config import AscendSchedulerConfig
20+
21+
22+
class TestAscendSchedulerConfig(TestBase):
23+
24+
def setUp(self):
25+
self.basic_scheduler_config = SchedulerConfig(
26+
max_num_batched_tokens=8192,
27+
max_model_len=8192,
28+
is_multimodal_model=False,
29+
send_delta_data=False,
30+
is_encoder_decoder=False,
31+
)
32+
33+
def test_initialize_from_config_with_default(self):
34+
# No additional config given, check the default value here.
35+
ascend_config = AscendSchedulerConfig.initialize_from_config(
36+
self.basic_scheduler_config, {})
37+
self.assertEqual(ascend_config.enable_chunked_prefill, False)
38+
self.assertEqual(ascend_config.policy, "fcfs")
39+
self.assertEqual(ascend_config.scheduler_cls,
40+
"vllm_ascend.core.scheduler.AscendScheduler")
41+
self.assertEqual(ascend_config.max_num_encoder_input_tokens, 8192)
42+
self.assertEqual(ascend_config.encoder_cache_size, 8192)
43+
44+
def test_initialize_from_config_with_override(self):
45+
# test override
46+
ascend_config = AscendSchedulerConfig.initialize_from_config(
47+
self.basic_scheduler_config,
48+
AscendSchedulerConfig(
49+
enable_chunked_prefill=False,
50+
policy="fcfs",
51+
scheduler_cls="vllm_ascend.core.scheduler.AscendScheduler",
52+
max_num_batched_tokens=8192,
53+
max_model_len=2048,
54+
max_long_partial_prefills=1,
55+
long_prefill_token_threshold=512,
56+
),
57+
)
58+
self.assertEqual(ascend_config.enable_chunked_prefill, False)
59+
self.assertEqual(ascend_config.policy, "fcfs")
60+
self.assertEqual(ascend_config.scheduler_cls,
61+
"vllm_ascend.core.scheduler.AscendScheduler")
62+
self.assertEqual(ascend_config.max_num_batched_tokens, 8192)
63+
self.assertEqual(ascend_config.encoder_cache_size, 8192)
64+
self.assertEqual(ascend_config.max_long_partial_prefills, 1)
65+
self.assertEqual(ascend_config.long_prefill_token_threshold, 512)
66+
67+
def test_not_implemented_policy(self):
68+
with self.assertRaises(NotImplementedError) as context:
69+
AscendSchedulerConfig.initialize_from_config(
70+
self.basic_scheduler_config,
71+
AscendSchedulerConfig(
72+
policy="custom_policy",
73+
max_num_batched_tokens=8192,
74+
max_model_len=2048,
75+
),
76+
)
77+
self.assertIn(
78+
"currently AscendScheduler only supports fcfs policy",
79+
str(context.exception),
80+
)
81+
82+
def test_no_override(self):
83+
ascend_config = AscendSchedulerConfig.initialize_from_config(
84+
self.basic_scheduler_config, {})
85+
self.assertEqual(ascend_config.max_num_encoder_input_tokens, 8192)
86+
self.assertEqual(ascend_config.encoder_cache_size, 8192)
87+
88+
def test_valid_config_with_multimodal(self):
89+
config = AscendSchedulerConfig.initialize_from_config(
90+
SchedulerConfig(is_multimodal_model=True,
91+
max_num_batched_tokens=8192), {})
92+
self.assertTrue(config.is_multimodal_model)
93+
94+
def test_valid_config_with_chunked_prefill(self):
95+
ascend_config = AscendSchedulerConfig.initialize_from_config(
96+
self.basic_scheduler_config,
97+
AscendSchedulerConfig(
98+
enable_chunked_prefill=True,
99+
max_num_batched_tokens=8192,
100+
max_model_len=8192,
101+
),
102+
)
103+
self.assertEqual(ascend_config.max_num_batched_tokens, 8192)
104+
self.assertEqual(ascend_config.max_model_len, 8192)
105+
self.assertTrue(ascend_config.enable_chunked_prefill)
106+
107+
def test_invalid_config_without_chunked_prefill(self):
108+
with self.assertRaises(ValueError) as context:
109+
AscendSchedulerConfig.initialize_from_config(
110+
self.basic_scheduler_config,
111+
AscendSchedulerConfig(
112+
enable_chunked_prefill=False,
113+
max_num_batched_tokens=2048,
114+
max_model_len=8192,
115+
),
116+
)
117+
self.assertIn(
118+
"Ascend scheduler is enabled without chunked prefill feature",
119+
str(context.exception),
120+
)
121+
self.assertIn("max_num_batched_tokens (2048)", str(context.exception))
122+
self.assertIn("max_model_len (8192)", str(context.exception))
123+
124+
def test_initialize_from_config_with_pd_transfer(self):
125+
ascend_config = AscendSchedulerConfig.initialize_from_config(
126+
self.basic_scheduler_config,
127+
AscendSchedulerConfig(
128+
enable_pd_transfer=True,
129+
decode_max_num_seqs=48,
130+
max_num_batched_tokens=8192,
131+
max_model_len=4096,
132+
),
133+
)
134+
self.assertEqual(ascend_config.enable_pd_transfer, True)
135+
self.assertEqual(ascend_config.decode_max_num_seqs, 48)

0 commit comments

Comments
 (0)