1414#
1515[% PROCESS common/template_disclaimer %]
1616
17- set -euxo pipefail
18-
1917[% INSERT common/util_functions %]
2018
19+ [% INSERT common/yarn_functions %]
20+
21+ [% INSERT gpu/mig_functions %]
22+
2123[% INSERT gpu/util_functions %]
2224
25+ set -euxo pipefail
26+
2327function main() {
28+ if [[ " ${nvsmi_works} " == " 1" ]] ; then
29+ # if this is called without the MIG script then the drivers are not installed
30+ query_nvsmi
31+ local xpath=' //nvidia_smi_log/*/mig_mode/current_mig/text()'
32+ set +e
33+ migquery_result=" $( " ${xmllint} " --xpath " ${xpath} " " ${nvsmi_query_xml} " | grep -v ' N/A' ) "
34+ set -e
35+ NUM_MIG_GPUS=" $( echo ${migquery_result} | uniq | wc -l) "
36+
37+ if [[ " ${NUM_MIG_GPUS} " -gt " 0" ]] ; then
38+ if [[ " ${NUM_MIG_GPUS} " -eq " 1" ]]; then
39+ if (echo " ${migquery_result} " | grep Enabled); then
40+ IS_MIG_ENABLED=1
41+ NVIDIA_SMI_PATH=' /usr/local/yarn-mig-scripts/'
42+ MIG_MAJOR_CAPS=` grep nvidia-caps /proc/devices | cut -d ' ' -f 1`
43+ fetch_mig_scripts
44+ fi
45+ fi
46+ fi
47+ fi
48+
49+ # if mig is enabled drivers would have already been installed
50+ if [[ $IS_MIG_ENABLED -eq 0 ]]; then
51+ install_nvidia_gpu_driver
52+ install_cuda
53+ load_kernel_module
54+
55+ # Install GPU metrics collection in Stackdriver if needed
56+ if [[ " ${INSTALL_GPU_AGENT} " == " true" ]]; then
57+ install_gpu_agent
58+ # install_gpu_monitoring_agent
59+ echo ' GPU metrics agent successfully deployed.'
60+ else
61+ echo ' GPU metrics agent has not been installed.'
62+ fi
63+ configure_gpu_exclusive_mode
64+ fi
65+
2466 setup_gpu_yarn
2567
2668 echo " yarn setup complete"
@@ -33,12 +75,15 @@ function main() {
3375
3476function exit_handler() {
3577 gpu_exit_handler
78+ pip_exit_handler
79+ yarn_exit_handler
3680 common_exit_handler
3781 return 0
3882}
3983
4084function prepare_to_install(){
4185 prepare_common_env
86+ prepare_pip_env
4287 prepare_gpu_env
4388 trap exit_handler EXIT
4489}
0 commit comments