Skip to content

Commit 9af9207

Browse files
zzy-ContiLearngemini-code-assist[bot]LCAIZJ
authored andcommitted
bugfix: fix initialization error for mooncake in k8s (vllm-project#2541)
### What this PR does / why we need it? The detail has been clarified in that issue : vllm-project#2557 ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? easy to test beacause we just need to echo the variable - vLLM version: v0.10.1.1 - vLLM main: vllm-project/vllm@6997a25 --------- Signed-off-by: zzy-ContiLearn <[email protected]> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: LCAIZJ <[email protected]>
1 parent e5bdfad commit 9af9207

File tree

4 files changed

+25
-3
lines changed

4 files changed

+25
-3
lines changed

examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ export GLOO_SOCKET_IFNAME="xxxxxx"
3232
export TP_SOCKET_IFNAME="xxxxxx"
3333
export HCCL_SOCKET_IFNAME="xxxxxx"
3434
export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3
35+
export PHYSICAL_DEVICES=$(ls /dev/davinci* 2>/dev/null | grep -o '[0-9]\+' | sort -n | paste -sd',' -)
3536
3637
vllm serve "/xxxxx/DeepSeek-V2-Lite-Chat" \
3738
--host localhost \
@@ -100,6 +101,7 @@ export GLOO_SOCKET_IFNAME="xxxxxx"
100101
export TP_SOCKET_IFNAME="xxxxxx"
101102
export HCCL_SOCKET_IFNAME="xxxxxx"
102103
export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7
104+
export PHYSICAL_DEVICES=$(ls /dev/davinci* 2>/dev/null | grep -o '[0-9]\+' | sort -n | paste -sd',' -)
103105
104106
vllm serve "/xxxxx/DeepSeek-V2-Lite-Chat" \
105107
--host localhost \

tests/ut/kv_connector/test_mooncake_connector.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,7 @@ def register_memory(self, *args, **kwargs):
10941094

10951095
class MockEnvsAscend:
10961096
MOONCAKE_CONNECTOR_PROTOCOL = "mock_protocol"
1097+
PHYSICAL_DEVICES = "10,11"
10971098

10981099

10991100
def mock_get_tensor_model_parallel_rank():
@@ -1122,7 +1123,7 @@ def setUp(self):
11221123
self.mock_transfer_engine.register_memory.return_value = 0
11231124

11241125
self.patches = [
1125-
patch('os.getenv', return_value="0,1"),
1126+
patch('os.getenv', return_value="10,11"),
11261127
patch('torch.Tensor.size', return_value=(10, 16, 8, 16)),
11271128
patch('torch.Tensor.element_size', return_value=4),
11281129
patch('torch.Tensor.data_ptr', return_value=0x1000),
@@ -1191,6 +1192,12 @@ def test_register_kv_caches_mla_case(self):
11911192
self.assertTrue(worker.use_mla)
11921193
self.assertEqual(len(worker.block_len), 2)
11931194

1195+
def test_device_id_selection_with_physical_devices(self):
1196+
# Test with physical devices set
1197+
worker = MooncakeConnectorWorker(self.vllm_config, self.engine_id)
1198+
# Default tp_rank is 0, so device_id should be 10
1199+
self.assertEqual(worker.device_id, 10)
1200+
11941201

11951202
if __name__ == '__main__':
11961203
unittest.main()

vllm_ascend/distributed/mooncake_connector.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import contextlib
33
import hashlib
44
import math
5-
import os
65
import queue
76
import random
87
import struct
@@ -29,6 +28,8 @@
2928
from vllm.v1.core.sched.output import SchedulerOutput
3029
from vllm.v1.request import RequestStatus
3130

31+
import vllm_ascend.envs as envs_ascend
32+
3233
if TYPE_CHECKING:
3334
from vllm.attention.backends.abstract import AttentionMetadata
3435
from vllm.forward_context import ForwardContext
@@ -758,13 +759,21 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
758759
# get tp device id
759760
# TODO(kw): https://github.com/vllm-project/vllm-ascend/pull/940
760761
# introducing some changes
761-
device_ids_str = os.getenv("ASCEND_RT_VISIBLE_DEVICES", None)
762+
device_ids_str = envs_ascend.PHYSICAL_DEVICES
762763
if device_ids_str is None:
763764
device_ids = list(
764765
range(self.dp_rank * self.tp_size,
765766
(self.dp_rank + 1) * self.tp_size))
766767
else:
767768
device_ids = list(map(int, device_ids_str.split(',')))
769+
start_index = self.dp_rank * self.tp_size
770+
end_index = start_index + self.tp_size
771+
if len(device_ids) < end_index:
772+
raise ValueError(
773+
f"Not enough physical devices available for DP rank {self.dp_rank}. "
774+
f"Expected at least {end_index} devices, but found {len(device_ids)} "
775+
"in PHYSICAL_DEVICES.")
776+
device_ids = device_ids[start_index:end_index]
768777
assert len(device_ids) > self.tp_rank # type: ignore
769778
self.device_id = device_ids[self.tp_rank] # type: ignore
770779

vllm_ascend/envs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@
135135
# this feature in eager mode will get better performance.
136136
"VLLM_ASCEND_ENABLE_MLP_OPTIMIZE":
137137
lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_MLP_OPTIMIZE", '0'))),
138+
# Determine the number of physical devices in a non-full-use scenario
139+
# caused by the initialization of the Mooncake connector.
140+
"PHYSICAL_DEVICES":
141+
lambda: os.getenv("PHYSICAL_DEVICES", None),
138142
}
139143

140144
# end-env-vars-definition

0 commit comments

Comments
 (0)