Skip to content

Commit 3b3fc0b

Browse files
authored
detection of Intel GPU drivers corrected - 2.13 (#1532)
1 parent a6c8fc7 commit 3b3fc0b

File tree

3 files changed

+195
-42
lines changed

3 files changed

+195
-42
lines changed

platform/services/installer/app/checks/resources.py

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import logging
99
import os
10+
import re
1011
import subprocess
1112
from subprocess import CalledProcessError, TimeoutExpired
1213

@@ -160,7 +161,33 @@ def check_gpu_driver_version(config: InstallationConfig | UpgradeConfig) -> None
160161
logger.debug("GPU driver version matched.")
161162

162163

163-
def _get_intel_gpus() -> str:
164+
def _check_intel_gpu_driver(env: dict[str, str]) -> bool:
165+
"""
166+
Returns true if intel gpu driver is installed
167+
"""
168+
try:
169+
command = 'clinfo|grep "' + ResourcesChecksTexts.intel_gpu_arc_device_name + '"|grep Intel'
170+
logger.debug(f"Getting the list of Intel GPU drivers with {command}")
171+
172+
clinfo_output = subprocess.check_output( # noqa: S602 # nosec: B602
173+
command,
174+
stderr=subprocess.STDOUT,
175+
shell=True,
176+
timeout=5,
177+
env=env,
178+
).decode("utf-8")
179+
logger.debug(clinfo_output)
180+
if ResourcesChecksTexts.intel_gpu_arc_device_name in clinfo_output:
181+
return True
182+
183+
except (CalledProcessError, TimeoutExpired, FileNotFoundError) as err:
184+
logger.debug(f"Checking the installed Intel GPU driver failed with {err}")
185+
return False
186+
187+
return False
188+
189+
190+
def _get_intel_gpus() -> tuple[str, bool]: # noqa: C901
164191
"""
165192
MAX cards:
166193
Attempt to get Intel GPUs with xpu-smi
@@ -182,33 +209,49 @@ def _get_intel_gpus() -> str:
182209
logger.debug(xpu_output)
183210
if ResourcesChecksTexts.intel_gpu_no_devices in xpu_output:
184211
logger.debug("No devices")
185-
return ""
212+
return "", True
186213
if ResourcesChecksTexts.intel_gpu_max_card in xpu_output:
187214
logger.debug("Max 1100 found")
188-
return xpu_output
215+
return GPU_PROVIDER_INTEL_MAX, True
189216
except (CalledProcessError, TimeoutExpired, FileNotFoundError) as err:
190217
logger.debug(f"Getting the list of Intel GPU failed with {err}")
191218

192219
# Only valid for ARC cards
220+
if not _check_intel_gpu_driver(env):
221+
return "", False
193222
try:
194-
command = 'clinfo|grep "' + ResourcesChecksTexts.intel_gpu_arc_device_name + '"|grep Intel'
223+
command = "lspci -nnk | grep -iA3 'VGA\|3D\|Display'"
195224
logger.debug(f"Getting the list of Intel ARC with {command}")
196225

197-
clinfo_output = subprocess.check_output( # noqa: S602 # nosec: B602
226+
lspci_output = subprocess.check_output( # noqa: S602 # nosec: B602
198227
command,
199228
stderr=subprocess.STDOUT,
200229
shell=True,
201230
timeout=5,
202231
env=env,
203232
).decode("utf-8")
204-
logger.debug(clinfo_output)
205-
if ResourcesChecksTexts.intel_gpu_arc_device_name in clinfo_output:
206-
logger.debug("ARC found")
207-
return clinfo_output
233+
driver = ""
234+
cards = lspci_output.split("--\n")
235+
for card in cards:
236+
drivers = re.findall(r"Kernel driver in use:\s*([^\s]+)", card)
237+
238+
if ResourcesChecksTexts.intel_gpu_i915_driver in drivers:
239+
driver = GPU_PROVIDER_INTEL_ARC_A
240+
elif ResourcesChecksTexts.intel_gpu_xe_driver in drivers:
241+
driver = GPU_PROVIDER_INTEL_ARC
242+
243+
if driver:
244+
first_line = card.split("\n")[0]
245+
if "Intel Corporation Device" in first_line:
246+
logger.debug(f"Intel dGPU found: {driver}")
247+
return driver, True
248+
249+
logger.debug(f"Intel iGPU found: {driver}")
250+
return driver, False
208251
except (CalledProcessError, TimeoutExpired, FileNotFoundError) as err:
209252
logger.debug(f"Getting the list of Intel ARC failed with {err}")
210253

211-
return ""
254+
return "", False
212255

213256

214257
def _get_nvidia_gpus():
@@ -258,22 +301,17 @@ def check_local_gpu(config: InstallationConfig): # noqa: ANN201
258301

259302
# If Nvidia not found, let's look for Intel GPU
260303
# We prefer Intel GPU, so ignoring Nvidia if Intel GPU found
261-
intel_gpus = _get_intel_gpus()
262-
if not intel_gpus and not nvidia_gpus:
304+
intel_gpu, isdGPU = _get_intel_gpus()
305+
if not intel_gpu and not nvidia_gpus:
263306
raise ResourcesCheckWarning(ResourcesChecksTexts.gpu_requirements_check_error)
264-
if intel_gpus:
265-
if ResourcesChecksTexts.intel_gpu_max_card in intel_gpus:
266-
config.gpu_provider.value = GPU_PROVIDER_INTEL_MAX
267-
elif ResourcesChecksTexts.intel_gpu_arc_a_card in intel_gpus:
268-
config.gpu_provider.value = GPU_PROVIDER_INTEL_ARC_A
269-
else:
270-
config.gpu_provider.value = GPU_PROVIDER_INTEL_ARC
271-
logger.info(f"GPU provider: {config.gpu_provider.value}")
307+
if intel_gpu and isdGPU:
308+
config.gpu_provider.value = intel_gpu
309+
logger.info(f"GPU provider (Intel dGPU): {config.gpu_provider.value}")
272310
elif nvidia_gpus:
273311
config.gpu_provider.value = GPU_PROVIDER_NVIDIA
274312
logger.info(f"GPU provider: {config.gpu_provider.value}")
275313
found_gpus = [f"{local_gpu['name']}, mem={str(local_gpu['memory_total'])}MiB" for local_gpu in nvidia_gpus]
276-
logger.debug(f"Found GPUs: {', '.join(found_gpus)}")
314+
logger.debug(f"Found nVidia GPUs: {', '.join(found_gpus)}")
277315

278316
unsupported_gpus = [gpu for gpu in nvidia_gpus if gpu["memory_total"] < SUPPORTED_GPUS_MEMORY]
279317
if unsupported_gpus:
@@ -282,6 +320,9 @@ def check_local_gpu(config: InstallationConfig): # noqa: ANN201
282320
raise UnsupportedGpuWarning(
283321
ResourcesChecksTexts.gpu_requirements_check_memory.format(gpus=unsupported_gpus_str)
284322
)
323+
elif intel_gpu:
324+
config.gpu_provider.value = intel_gpu
325+
logger.info(f"GPU provider (Intel iGPU): {config.gpu_provider.value}")
285326

286327

287328
def check_local_mem(): # noqa: ANN201

platform/services/installer/app/texts/checks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,4 +214,5 @@ class ResourcesChecksTexts:
214214
intel_gpu_no_devices = "No device discovered"
215215
intel_gpu_max_card = "Data Center GPU Max 1100"
216216
intel_gpu_arc_device_name = "Device Name"
217-
intel_gpu_arc_a_card = "Arc(TM) A"
217+
intel_gpu_i915_driver = "i915"
218+
intel_gpu_xe_driver = "xe"

platform/services/installer/tests/unit/checks/test_resources_checks.py

Lines changed: 131 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
UnsupportedGpuWarning,
1717
)
1818
from checks.resources import (
19+
GPU_PROVIDER_INTEL_ARC,
20+
GPU_PROVIDER_INTEL_ARC_A,
21+
GPU_PROVIDER_INTEL_MAX,
1922
SUPPORTED_NVIDIA_DRIVER_VERSION,
23+
_get_intel_gpus,
2024
check_gpu_driver_version,
2125
check_local_cpu,
2226
check_local_disk,
@@ -29,6 +33,34 @@
2933
from configuration_models.upgrade_config import UpgradeConfig
3034
from texts.checks import ResourcesChecksTexts
3135

36+
arc_xe_description = """ 03:00.0 Display controller [0380]: Intel Corporation Device [8086:e216]
37+
Subsystem: Intel Corporation Device [8086:1500]
38+
Kernel driver in use: xe
39+
Kernel modules: xe"""
40+
41+
arc_i915_description = """ 03:00.0 Display controller [0380]: Intel Corporation Device [8086:e216]
42+
Subsystem: Intel Corporation Device [8086:1500]
43+
Kernel driver in use: i915
44+
Kernel modules: i915"""
45+
46+
igpu_description = """00:02.0 VGA compatible controller [0300]: Intel Corporation Raptor Lake-S GT1 [UHD Graphics 770] [8086:a780] (rev 04)
47+
DeviceName: Onboard IGD
48+
Subsystem: ASUSTeK Computer Inc. Raptor Lake-S GT1 [UHD Graphics 770] [1043:8882]
49+
Kernel driver in use: i915"""
50+
51+
nvidia_description = """08:00.0 VGA compatible controller [0300]: NVIDIA Corporation GA102 [GeForce RTX 3090] [10de:2204] (rev a1)
52+
Subsystem: Gigabyte Technology Co., Ltd GA102 [GeForce RTX 3090] [1458:4043]
53+
Kernel driver in use: nouveau
54+
Kernel modules: nvidiafb, nouveau"""
55+
56+
arc_xe_igpu_description = arc_xe_description + "\n--\n" + igpu_description
57+
58+
arc_i915_igpu_description = arc_i915_description + "\n--\n" + igpu_description
59+
60+
arc_xe_nvidia_description = arc_xe_description + "\n--\n" + nvidia_description
61+
62+
nvidia_igpu_description = nvidia_description + "\n--\n" + igpu_description
63+
3264

3365
def test_check_local_cpu(mocker):
3466
"""Check if the requirement for 12 physical cores passes successfully"""
@@ -91,36 +123,115 @@ def test_check_local_nvidia_gpu_ok(get_gpus_mock):
91123
assert install_config_mock.gpu_provider.value == "nvidia"
92124

93125

94-
def test_check_local_intel_gpu_ok(get_gpus_mock, get_intel_gpus_mock):
95-
get_gpus_mock.return_value = []
96-
get_intel_gpus_mock.return_value = "Device Name: Intel(R) Data Center GPU Max 1100"
126+
def test_get_intel_gpus_max_card(mocker):
127+
sub_process_mock = mocker.patch(
128+
"subprocess.check_output", return_value=ResourcesChecksTexts.intel_gpu_max_card.encode("utf-8")
129+
)
130+
gpus, _ = _get_intel_gpus()
131+
132+
assert GPU_PROVIDER_INTEL_MAX in gpus
133+
assert sub_process_mock.call_count == 1
134+
135+
136+
def test_get_intel_gpus_arc_xe_card(mocker):
137+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=arc_xe_description.encode("utf-8"))
138+
check_intel_gpu_driver_mock = mocker.patch("checks.resources._check_intel_gpu_driver", return_value=True)
139+
140+
gpus, isdGPU = _get_intel_gpus()
141+
142+
assert GPU_PROVIDER_INTEL_ARC in gpus
143+
assert isdGPU is True
144+
assert check_intel_gpu_driver_mock.call_count == 1
145+
assert sub_process_mock.call_count == 2
146+
147+
148+
def test_get_intel_gpus_arc_i915_card(mocker):
149+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=arc_i915_description.encode("utf-8"))
150+
check_intel_gpu_driver_mock = mocker.patch("checks.resources._check_intel_gpu_driver", return_value=True)
151+
152+
gpus, isdGPU = _get_intel_gpus()
153+
154+
assert GPU_PROVIDER_INTEL_ARC_A in gpus
155+
assert isdGPU is True
156+
assert check_intel_gpu_driver_mock.call_count == 1
157+
assert sub_process_mock.call_count == 2
158+
159+
160+
def test_get_intel_gpus_arc_igpu_card(mocker):
161+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=arc_i915_igpu_description.encode("utf-8"))
162+
check_intel_gpu_driver_mock = mocker.patch("checks.resources._check_intel_gpu_driver", return_value=True)
163+
164+
gpus, isdPGU = _get_intel_gpus()
165+
166+
assert GPU_PROVIDER_INTEL_ARC_A in gpus
167+
assert isdPGU is True
168+
assert check_intel_gpu_driver_mock.call_count == 1
169+
assert sub_process_mock.call_count == 2
170+
171+
172+
def test_get_intel_gpus_igpu_card(mocker):
173+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=igpu_description.encode("utf-8"))
174+
check_intel_gpu_driver_mock = mocker.patch("checks.resources._check_intel_gpu_driver", return_value=True)
175+
176+
gpus, isdPGU = _get_intel_gpus()
177+
178+
assert GPU_PROVIDER_INTEL_ARC_A in gpus
179+
assert isdPGU is False
180+
assert check_intel_gpu_driver_mock.call_count == 1
181+
assert sub_process_mock.call_count == 2
182+
183+
184+
def test_check_local_nvidia_arc(mocker):
185+
get_intel_mock = mocker.patch("checks.resources._get_intel_gpus", return_value=(GPU_PROVIDER_INTEL_ARC, True))
186+
get_nvidia_mock = mocker.patch(
187+
"checks.resources._get_nvidia_gpus",
188+
return_value=[
189+
{
190+
"name": "NVIDIA GeForce RTX 3090",
191+
"memory_total": 24576,
192+
}
193+
],
194+
)
195+
97196
install_config_mock = InstallationConfig(interactive_mode=False, install_telemetry_stack=False)
98197
install_config_mock.gpu_support.value = True
99198
check_local_gpu(config=install_config_mock)
100-
assert get_gpus_mock.call_count == 1
101-
assert get_intel_gpus_mock.call_count == 1
102-
assert install_config_mock.gpu_provider.value == "intel-max"
103199

200+
assert get_intel_mock.call_count == 1
201+
assert get_nvidia_mock.call_count == 1
202+
assert install_config_mock.gpu_provider.value == GPU_PROVIDER_INTEL_ARC
203+
204+
205+
def test_check_local_nvidia_igpu(mocker):
206+
get_intel_mock = mocker.patch("checks.resources._get_intel_gpus", return_value=(GPU_PROVIDER_INTEL_ARC, False))
207+
get_nvidia_mock = mocker.patch(
208+
"checks.resources._get_nvidia_gpus",
209+
return_value=(
210+
[
211+
{
212+
"name": "NVIDIA GeForce RTX 3090",
213+
"memory_total": 24576,
214+
}
215+
]
216+
),
217+
)
104218

105-
def test_check_local_intel_gpu_arc_ok(get_gpus_mock, get_intel_gpus_mock):
106-
get_gpus_mock.return_value = []
107-
get_intel_gpus_mock.return_value = "Device Name Intel(R) Graphics"
108219
install_config_mock = InstallationConfig(interactive_mode=False, install_telemetry_stack=False)
109220
install_config_mock.gpu_support.value = True
110221
check_local_gpu(config=install_config_mock)
111-
assert get_gpus_mock.call_count == 1
112-
assert get_intel_gpus_mock.call_count == 1
113-
assert install_config_mock.gpu_provider.value == "intel-arc"
114222

223+
assert get_intel_mock.call_count == 1
224+
assert get_nvidia_mock.call_count == 1
225+
assert install_config_mock.gpu_provider.value == "nvidia"
115226

116-
def test_check_local_gpu_not_found(get_gpus_mock, get_intel_gpus_mock):
117-
get_gpus_mock.return_value = []
118-
get_intel_gpus_mock.return_value = ""
119-
with pytest.raises(ResourcesCheckWarning):
120-
install_config_mock = InstallationConfig(interactive_mode=False, install_telemetry_stack=False)
121-
check_local_gpu(config=install_config_mock)
122-
assert get_gpus_mock.call_count == 1
123-
assert get_intel_gpus_mock.call_count == 1
227+
228+
def test_get_intel_gpus_no_card(mocker):
229+
sub_process_mock = mocker.patch("subprocess.check_output", return_value=b"lack of Intel gpu")
230+
231+
gpus = _get_intel_gpus()
232+
233+
assert not gpus[0]
234+
assert sub_process_mock.call_count == 2
124235

125236

126237
def test_check_local_gpu_not_supported(get_gpus_mock):

0 commit comments

Comments
 (0)