Skip to content

Commit 0fe0140

Browse files
authored
[KV offload] Enable CPU KV offload on CUDA alike Platforms (vllm-project#27770)
Signed-off-by: zhewenli <[email protected]>
1 parent 4e68cc9 commit 0fe0140

File tree

2 files changed

+2
-6
lines changed

2 files changed

+2
-6
lines changed

tests/v1/kv_offload/test_cpu_offloading.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from vllm import LLM, SamplingParams, TokensPrompt
1313
from vllm.config import KVEventsConfig, KVTransferConfig
1414
from vllm.distributed.kv_events import BlockStored, KVEventBatch
15-
from vllm.platforms import current_platform
1615

1716
CPU_BLOCK_SIZES = [16, 48]
1817

@@ -64,9 +63,6 @@ def close(self):
6463
self.sub.close()
6564

6665

67-
@pytest.mark.skipif(
68-
not current_platform.is_cuda(), reason="CPU offloading only supported on CUDA"
69-
)
7066
@pytest.mark.parametrize("cpu_block_size", CPU_BLOCK_SIZES)
7167
def test_cpu_offloading(cpu_block_size: int) -> None:
7268
"""

vllm/v1/kv_offload/cpu.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ def get_handlers(
5151
self, kv_caches: dict[str, torch.Tensor]
5252
) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], OffloadingHandler]]:
5353
if not self._handler:
54-
if not current_platform.is_cuda():
54+
if not current_platform.is_cuda_alike():
5555
raise Exception(
56-
"CPU Offloading is currently only supported on CUDA GPUs"
56+
"CPU Offloading is currently only supported on CUDA-alike GPUs"
5757
)
5858

5959
layer_names = list(kv_caches.keys())

0 commit comments

Comments
 (0)