Skip to content

Commit 6e45bd0

Browse files
authored
Merge pull request #3136 from ROCm/ci_cj-bp-gpu-multi-r2.20-rocm-enhanced
update for avoiding running gpu_multi on single-GPU nodes
2 parents d15ac8a + f1b9fc1 commit 6e45bd0

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

tensorflow/tools/ci_build/linux/rocm/run_gpu_multi.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,17 @@ set -e
1818
set -x
1919

2020
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
21+
# If rocm-smi exists locally (it should) use it to find
22+
# out how many GPUs we have to test with.
23+
rocm-smi -i
24+
STATUS=$?
25+
if [ $STATUS -ne 0 ]; then TF_GPU_COUNT=1; else
26+
TF_GPU_COUNT=$(rocm-smi -i|grep 'Device ID' |grep 'GPU' |wc -l)
27+
fi
28+
if [[ $TF_GPU_COUNT -lt 4 ]]; then
29+
echo "Found only ${TF_GPU_COUNT} gpus, multi-gpu tests need atleast 4 gpus."
30+
exit
31+
fi
2132
N_TEST_JOBS=1 # run tests serially
2233

2334
echo ""

0 commit comments

Comments
 (0)