Skip to content

Commit 9a1e57e

Browse files
authored
[r2.20] Use timeout from bazelrc and add ROCM_PATH repo_env (#3125)
1 parent eca3801 commit 9a1e57e

File tree

3 files changed

+33
-114
lines changed

3 files changed

+33
-114
lines changed

tensorflow/tools/ci_build/linux/rocm/run_cpu.sh

Lines changed: 7 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -28,55 +28,17 @@ export PYTHON_BIN_PATH=`which python3`
2828
PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"`
2929
export TF_PYTHON_VERSION=$PYTHON_VERSION
3030

31-
export TF_NEED_ROCM=0
31+
# Use the bazelrc files in /usertools if available
32+
if [ ! -d /tf ];then
33+
# The bazelrc files in /usertools expect /tf to exist
34+
mkdir /tf
35+
fi
3236

33-
if [ -f /usertools/cpu.bazelrc ]; then
34-
# Use the bazelrc files in /usertools if available
35-
if [ ! -d /tf ];then
36-
# The bazelrc files in /usertools expect /tf to exist
37-
mkdir /tf
38-
fi
39-
bazel \
40-
--bazelrc=/usertools/cpu.bazelrc \
41-
test \
37+
bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/cpu.bazelrc test \
4238
--config=sigbuild_local_cache \
43-
--verbose_failures \
39+
--verbose_failures \
4440
--config=pycpp \
45-
--test_env=HIP_VISIBLE_DEVICES=\"\" \
46-
--repo_env=USE_PYWRAP_RULES=${usePywrapRules} \
4741
--action_env=TF_NEED_ROCM=0 \
4842
--action_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
4943
--local_test_jobs=${N_BUILD_JOBS} \
50-
--test_timeout 920,2400,7200,9600 \
5144
--jobs=${N_BUILD_JOBS}
52-
else
53-
yes "" | $PYTHON_BIN_PATH configure.py
54-
55-
56-
# Run bazel test command. Double test timeouts to avoid flakes.
57-
# xla/mlir_hlo/tests/Dialect/gml_st tests disabled in 09/08/22 sync
58-
bazel test \
59-
-k \
60-
--verbose_failures \
61-
--test_tag_filters=-no_oss,-oss_excluded,-oss_serial,-gpu,-multi_gpu,-multi_and_single_gpu,-tpu,-cuda-only,-benchmark-test,-v1only \
62-
--test_lang_filters=cc,py \
63-
--jobs=30 \
64-
--local_ram_resources=60000 \
65-
--local_cpu_resources=15 \
66-
--local_test_jobs=${N_BUILD_JOBS} \
67-
--test_timeout 920,2400,7200,9600 \
68-
--build_tests_only \
69-
--test_output=errors \
70-
--test_sharding_strategy=disabled \
71-
--test_size_filters=small,medium \
72-
--test_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
73-
--test_env=HIP_VISIBLE_DEVICES=\"\" \
74-
--repo_env=USE_PYWRAP_RULES=${usePywrapRules} \
75-
--action_env=TF_NEED_ROCM=0 \
76-
-- \
77-
//tensorflow/... \
78-
-//tensorflow/compiler/tf2tensorrt/... \
79-
-//tensorflow/core/tpu/... \
80-
-//tensorflow/lite/... \
81-
-//tensorflow/tools/toolchains/...
82-
fi

tensorflow/tools/ci_build/linux/rocm/run_gpu_multi.sh

Lines changed: 15 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -43,67 +43,23 @@ export TF_PYTHON_VERSION=$PYTHON_VERSION
4343
export TF_NEED_ROCM=1
4444
export ROCM_PATH=$ROCM_INSTALL_DIR
4545

46-
if [ -f /usertools/rocm.bazelrc ]; then
47-
# Use the bazelrc files in /usertools if available
48-
if [ ! -d /tf ];then
49-
# The bazelrc files in /usertools expect /tf to exist
50-
mkdir /tf
51-
fi
52-
bazel \
53-
--bazelrc=/usertools/rocm.bazelrc \
54-
test \
55-
--local_test_jobs=${N_TEST_JOBS} \
56-
--jobs=30 \
57-
--local_ram_resources=60000 \
58-
--local_cpu_resources=15 \
59-
--config=sigbuild_local_cache \
60-
--config=rocm \
61-
--config=nonpip_multi_gpu \
62-
--action_env=TF_PYTHON_VERSION=$PYTHON_VERSION
63-
else
64-
# Legacy style: run configure then build
65-
yes "" | $PYTHON_BIN_PATH configure.py
66-
67-
# Run bazel test command. Double test timeouts to avoid flakes.
68-
bazel test \
69-
--config=rocm \
70-
-k \
71-
--test_tag_filters=-no_gpu,-cuda-only \
72-
--jobs=30 \
73-
--local_ram_resources=60000 \
74-
--local_cpu_resources=15 \
75-
--local_test_jobs=${N_TEST_JOBS} \
76-
--test_timeout 920,2400,7200,9600 \
77-
--build_tests_only \
78-
--test_output=errors \
79-
--test_sharding_strategy=disabled \
80-
--test_size_filters=small,medium,large \
81-
--cache_test_results=no \
82-
--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=2048 \
83-
--test_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
84-
-- \
85-
//tensorflow/core/nccl:nccl_manager_test_2gpu \
86-
//tensorflow/python/distribute/integration_test:mwms_peer_failure_test_2gpu \
87-
//tensorflow/python/distribute:checkpoint_utils_test_2gpu \
88-
//tensorflow/python/distribute:checkpointing_test_2gpu \
89-
//tensorflow/python/distribute:collective_all_reduce_strategy_test_xla_2gpu \
90-
//tensorflow/python/distribute:custom_training_loop_gradient_test_2gpu \
91-
//tensorflow/python/distribute:custom_training_loop_input_test_2gpu \
92-
//tensorflow/python/distribute:distribute_utils_test_2gpu \
93-
//tensorflow/python/distribute:input_lib_test_2gpu \
94-
//tensorflow/python/distribute:input_lib_type_spec_test_2gpu \
95-
//tensorflow/python/distribute:metrics_v1_test_2gpu \
96-
//tensorflow/python/distribute:mirrored_variable_test_2gpu \
97-
//tensorflow/python/distribute:parameter_server_strategy_test_2gpu \
98-
//tensorflow/python/distribute:ps_values_test_2gpu \
99-
//tensorflow/python/distribute:random_generator_test_2gpu \
100-
//tensorflow/python/distribute:test_util_test_2gpu \
101-
//tensorflow/python/distribute:tf_function_test_2gpu \
102-
//tensorflow/python/distribute:vars_test_2gpu \
103-
//tensorflow/python/distribute:warm_starting_util_test_2gpu \
104-
//tensorflow/python/training:saver_test_2gpu
46+
if [ ! -d /tf ];then
47+
# The bazelrc files in /usertools expect /tf to exist
48+
mkdir /tf
10549
fi
10650

51+
bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/rocm.bazelrc test \
52+
--local_test_jobs=${N_TEST_JOBS} \
53+
--jobs=30 \
54+
--local_ram_resources=60000 \
55+
--local_cpu_resources=15 \
56+
--verbose_failures \
57+
--config=rocm \
58+
--config=nonpip_multi_gpu \
59+
--config=sigbuild_local_cache \
60+
--action_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
61+
--repo_env="ROCM_PATH=$ROCM_PATH" \
62+
10763

10864
# Started failing with 210906 sync
10965
# FAILED : //tensorflow/core/kernels:collective_nccl_test_2gpu \

tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,25 +59,26 @@ if [ -z "$TARGET_ARCHS" ]; then
5959
exit 1
6060
fi
6161

62+
if [ ! -d /tf ];then
63+
# The bazelrc files in /usertools expect /tf to exist
64+
mkdir /tf
65+
fi
66+
6267
# Run bazel test command. Double test timeouts to avoid flakes.
63-
bazel test \
68+
bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/rocm.bazelrc test \
6469
--config=rocm \
70+
--config=sigbuild_local_cache \
71+
--config=pycpp \
6572
-k \
66-
--test_tag_filters=gpu,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-cuda-only,-benchmark-test,-rocm_multi_gpu,-tpu,-v1only \
6773
--jobs=${N_BUILD_JOBS} \
6874
--local_test_jobs=${N_TEST_JOBS} \
6975
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
7076
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
7177
--test_env=MIOPEN_DEBUG_CONV_WINOGRAD=0 \
72-
--test_timeout 600,900,2400,7200 \
7378
--repo_env="TF_ROCM_AMDGPU_TARGETS=$TARGET_ARCHS" \
79+
--repo_env="ROCM_PATH=$ROCM_PATH" \
7480
--build_tests_only \
7581
--test_output=errors \
82+
--verbose_failures \
7683
--test_sharding_strategy=disabled \
77-
--test_size_filters=small,medium,large \
78-
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
79-
-- \
80-
//tensorflow/... \
81-
-//tensorflow/core/tpu/... \
82-
-//tensorflow/lite/... \
83-
-//tensorflow/compiler/tf2tensorrt/... \
84+
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute

0 commit comments

Comments
 (0)