|
1 | 1 | /**
|
2 |
| - * Copyright 2024, XGBoost contributors |
| 2 | + * Copyright 2024-2025, XGBoost contributors |
3 | 3 | */
|
4 | 4 | #if defined(XGBOOST_USE_CUDA)
|
5 | 5 | #include "cuda_dr_utils.h"
|
|
10 | 10 | #include <memory> // for make_unique
|
11 | 11 | #include <mutex> // for call_once
|
12 | 12 | #include <sstream> // for stringstream
|
13 |
| -#include <string> // for string |
| 13 | +#include <string> // for string, stoi |
14 | 14 |
|
15 |
| -#include "common.h" // for safe_cuda |
| 15 | +#include "common.h" // for safe_cuda, TrimFirst, Split |
16 | 16 | #include "cuda_rt_utils.h" // for CurrentDevice
|
17 |
| -#include "xgboost/string_view.h" // for StringVie |
| 17 | +#include "io.h" // for CmdOutput |
| 18 | +#include "xgboost/string_view.h" // for StringView |
18 | 19 |
|
19 | 20 | namespace xgboost::cudr {
|
20 | 21 | CuDriverApi::CuDriverApi() {
|
@@ -104,5 +105,52 @@ void MakeCuMemLocation(CUmemLocationType type, CUmemLocation *loc) {
|
104 | 105 | MakeCuMemLocation(type, &prop.location);
|
105 | 106 | return prop;
|
106 | 107 | }
|
| 108 | + |
| 109 | +[[nodiscard]] bool GetVersionFromSmi(std::int32_t *p_major, std::int32_t *p_minor) { |
| 110 | + using ::xgboost::common::Split; |
| 111 | + using ::xgboost::common::TrimFirst; |
| 112 | + // `nvidia-smi --version` is not available for older versions, as a result, we can't query the |
| 113 | + // cuda driver version unless we want to parse the table output. |
| 114 | + |
| 115 | + // Example output on a 2-GPU system: |
| 116 | + // |
| 117 | + // $ nvidia-smi --query-gpu=driver_version --format=csv |
| 118 | + // |
| 119 | + // driver_version |
| 120 | + // 570.124.06 |
| 121 | + // 570.124.06 |
| 122 | + // |
| 123 | + auto cmd = "nvidia-smi --query-gpu=driver_version --format=csv"; |
| 124 | + auto smi_out_str = common::CmdOutput(StringView{cmd}); |
| 125 | + |
| 126 | + auto Invalid = [=] { |
| 127 | + *p_major = *p_minor = -1; |
| 128 | + return false; |
| 129 | + }; |
| 130 | + if (smi_out_str.empty()) { |
| 131 | + return Invalid(); |
| 132 | + } |
| 133 | + |
| 134 | + auto smi_split = Split(smi_out_str, '\n'); |
| 135 | + if (smi_split.size() < 2) { |
| 136 | + return Invalid(); |
| 137 | + } |
| 138 | + |
| 139 | + // Use the first GPU |
| 140 | + auto smi_ver = Split(TrimFirst(smi_split[1]), '.'); |
| 141 | + // 570.124.06 |
| 142 | + if (smi_ver.size() != 3) { |
| 143 | + return Invalid(); |
| 144 | + } |
| 145 | + try { |
| 146 | + *p_major = std::stoi(smi_ver[0]); |
| 147 | + *p_minor = std::stoi(smi_ver[1]); |
| 148 | + LOG(INFO) << "Driver version: `" << *p_major << "." << *p_minor << "`"; |
| 149 | + return true; |
| 150 | + } catch (std::exception const &) { |
| 151 | + } |
| 152 | + |
| 153 | + return Invalid(); |
| 154 | +} |
107 | 155 | } // namespace xgboost::cudr
|
108 | 156 | #endif
|
0 commit comments