Skip to content

Commit be93a0c

Browse files
yao-matrixkashif
andauthored
enable vllm c-s tests on XPU (#3445)
Signed-off-by: Matrix Yao <[email protected]> Co-authored-by: Kashif Rasul <[email protected]>
1 parent f9fbd91 commit be93a0c

File tree

2 files changed

+24
-17
lines changed

2 files changed

+24
-17
lines changed

tests/test_vllm_client_server.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020
import psutil
2121
import pytest
2222
from transformers import AutoModelForCausalLM
23-
from transformers.testing_utils import require_torch_multi_gpu
23+
from transformers.testing_utils import require_torch_multi_accelerator, torch_device
2424

2525
from trl.extras.vllm_client import VLLMClient
2626
from trl.scripts.vllm_serve import chunk_list
2727

28-
from .testing_utils import require_3_gpus
28+
from .testing_utils import require_3_accelerators
2929

3030

3131
class TestChunkList(unittest.TestCase):
@@ -55,15 +55,16 @@ def test_any_dtype(self):
5555

5656

5757
@pytest.mark.slow
58-
@require_torch_multi_gpu
58+
@require_torch_multi_accelerator
5959
class TestVLLMClientServer(unittest.TestCase):
6060
model_id = "Qwen/Qwen2.5-1.5B"
6161

6262
@classmethod
6363
def setUpClass(cls):
64-
# We want the server to run on GPU 1, so we set CUDA_VISIBLE_DEVICES to "1"
64+
# We want the server to run on accelerator 1, so we set VISIBLE_DEVICES to "1"
6565
env = os.environ.copy()
66-
env["CUDA_VISIBLE_DEVICES"] = "1" # Restrict to GPU 1
66+
VISIBLE_DEVICES = "ZE_AFFINITY_MASK" if torch_device == "xpu" else "CUDA_VISIBLE_DEVICES"
67+
env[VISIBLE_DEVICES] = "1" # Restrict to accelerator 1
6768

6869
# Start the server process
6970
cls.server_process = subprocess.Popen(
@@ -107,7 +108,7 @@ def test_generate_with_params(self):
107108
self.assertLessEqual(len(seq), 32)
108109

109110
def test_update_model_params(self):
110-
model = AutoModelForCausalLM.from_pretrained(self.model_id, device_map="cuda")
111+
model = AutoModelForCausalLM.from_pretrained(self.model_id, device_map=torch_device)
111112
self.client.update_model_params(model)
112113

113114
def test_reset_prefix_cache(self):
@@ -132,15 +133,16 @@ def tearDownClass(cls):
132133

133134

134135
@pytest.mark.slow
135-
@require_3_gpus
136+
@require_3_accelerators
136137
class TestVLLMClientServerTP(unittest.TestCase):
137138
model_id = "Qwen/Qwen2.5-1.5B"
138139

139140
@classmethod
140141
def setUpClass(cls):
141-
# We want the server to run on GPU 1 and 2, so we set CUDA_VISIBLE_DEVICES to "1,2"
142+
# We want the server to run on accelerator 1 and 2, so we set VISIBLE_DEVICES to "1,2"
142143
env = os.environ.copy()
143-
env["CUDA_VISIBLE_DEVICES"] = "1,2" # Restrict to GPU 1 and 2
144+
VISIBLE_DEVICES = "ZE_AFFINITY_MASK" if torch_device == "xpu" else "CUDA_VISIBLE_DEVICES"
145+
env[VISIBLE_DEVICES] = "1,2" # Restrict to accelerator 1 and 2
144146

145147
# Start the server process
146148
cls.server_process = subprocess.Popen(
@@ -169,7 +171,7 @@ def test_generate(self):
169171
self.assertTrue(all(isinstance(tok, int) for tok in seq))
170172

171173
def test_update_model_params(self):
172-
model = AutoModelForCausalLM.from_pretrained(self.model_id, device_map="cuda")
174+
model = AutoModelForCausalLM.from_pretrained(self.model_id, device_map=torch_device)
173175
self.client.update_model_params(model)
174176

175177
def test_reset_prefix_cache(self):
@@ -194,15 +196,16 @@ def tearDownClass(cls):
194196

195197

196198
@pytest.mark.slow
197-
@require_3_gpus
199+
@require_3_accelerators
198200
class TestVLLMClientServerDP(unittest.TestCase):
199201
model_id = "Qwen/Qwen2.5-1.5B"
200202

201203
@classmethod
202204
def setUpClass(cls):
203-
# We want the server to run on GPU 1 and 2, so we set CUDA_VISIBLE_DEVICES to "1,2"
205+
# We want the server to run on accelerator 1 and 2, so we set VISIBLE_DEVICES to "1,2"
204206
env = os.environ.copy()
205-
env["CUDA_VISIBLE_DEVICES"] = "1,2" # Restrict to GPU 1 and 2
207+
VISIBLE_DEVICES = "ZE_AFFINITY_MASK" if torch_device == "xpu" else "CUDA_VISIBLE_DEVICES"
208+
env[VISIBLE_DEVICES] = "1,2" # Restrict to accelerator 1 and 2
206209

207210
# Start the server process
208211
cls.server_process = subprocess.Popen(
@@ -230,7 +233,7 @@ def test_generate(self):
230233
self.assertTrue(all(isinstance(tok, int) for tok in seq))
231234

232235
def test_update_model_params(self):
233-
model = AutoModelForCausalLM.from_pretrained(self.model_id, device_map="cuda")
236+
model = AutoModelForCausalLM.from_pretrained(self.model_id, device_map=torch_device)
234237
self.client.update_model_params(model)
235238

236239
def test_reset_prefix_cache(self):

tests/testing_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import torch
1919
from transformers import is_bitsandbytes_available, is_comet_available, is_sklearn_available, is_wandb_available
20+
from transformers.testing_utils import torch_device
2021
from transformers.utils import is_rich_available
2122

2223
from trl import BaseBinaryJudge, BasePairwiseJudge
@@ -94,11 +95,14 @@ def require_no_wandb(test_case):
9495
return unittest.skipUnless(not is_wandb_available(), "test requires no wandb")(test_case)
9596

9697

97-
def require_3_gpus(test_case):
98+
def require_3_accelerators(test_case):
9899
"""
99-
Decorator marking a test that requires at least num_gpus GPUs. Skips the test if num_gpus is not available.
100+
Decorator marking a test that requires at least 3 accelerators. Skips the test if 3 accelerators are not available.
100101
"""
101-
return unittest.skipUnless(torch.cuda.device_count() > 3, "test requires at least 3 GPUs")(test_case)
102+
torch_accelerator_module = getattr(torch, torch_device, torch.cuda)
103+
return unittest.skipUnless(
104+
torch_accelerator_module.device_count() > 3, f"test requires at least 3 {torch_device}s"
105+
)(test_case)
102106

103107

104108
class RandomBinaryJudge(BaseBinaryJudge):

0 commit comments

Comments
 (0)