1717
1818set -euxo pipefail
1919
20+ function os_id() ( set +x ; grep ' ^ID=' /etc/os-release | cut -d= -f2 | xargs ; )
21+ function os_version() ( set +x ; grep ' ^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; )
22+ function os_codename() ( set +x ; grep ' ^VERSION_CODENAME=' /etc/os-release | cut -d= -f2 | xargs ; )
23+
24+ # For version (or real number) comparison
25+ # if first argument is greater than or equal to, greater than, less than or equal to, or less than the second
26+ # ( version_ge 2.0 2.1 ) evaluates to false
27+ # ( version_ge 2.2 2.1 ) evaluates to true
28+ function version_ge() ( set +x ; [ " $1 " = " $( echo -e " $1 \n$2 " | sort -V | tail -n1) " ] ; )
29+ function version_gt() ( set +x ; [ " $1 " = " $2 " ] && return 1 || version_ge $1 $2 ; )
30+ function version_le() ( set +x ; [ " $1 " = " $( echo -e " $1 \n$2 " | sort -V | head -n1) " ] ; )
31+ function version_lt() ( set +x ; [ " $1 " = " $2 " ] && return 1 || version_le $1 $2 ; )
32+
33+ function define_os_comparison_functions() {
34+
35+ readonly -A supported_os=(
36+ [' debian' ]=" 10 11 12"
37+ [' rocky' ]=" 8 9"
38+ [' ubuntu' ]=" 18.04 20.04 22.04"
39+ )
40+
41+ # dynamically define OS version test utility functions
42+ if [[ " $( os_id) " == " rocky" ]];
43+ then _os_version=$( os_version | sed -e ' s/[^0-9].*$//g' )
44+ else _os_version=" $( os_version) " ; fi
45+ for os_id_val in ' rocky' ' ubuntu' ' debian' ; do
46+ eval " function is_${os_id_val} () ( set +x ; [[ \" $( os_id) \" == '${os_id_val} ' ]] ; )"
47+
48+ for osver in $( echo " ${supported_os["${os_id_val}"]} " ) ; do
49+ eval " function is_${os_id_val}${osver%% .* } () ( set +x ; is_${os_id_val} && [[ \" ${_os_version} \" == \" ${osver} \" ]] ; )"
50+ eval " function ge_${os_id_val}${osver%% .* } () ( set +x ; is_${os_id_val} && version_ge \" ${_os_version} \" \" ${osver} \" ; )"
51+ eval " function le_${os_id_val}${osver%% .* } () ( set +x ; is_${os_id_val} && version_le \" ${_os_version} \" \" ${osver} \" ; )"
52+ done
53+ done
54+ eval " function is_debuntu() ( set +x ; is_debian || is_ubuntu ; )"
55+ }
56+
2057function err() {
2158 echo " [$( date +' %Y-%m-%dT%H:%M:%S%z' ) ]: $* " >&2
2259 return 1
2360}
2461
2562function update_apt_get() {
2663 for (( i = 0 ; i < 10 ; i++ )) ; do
27- if apt-get update; then
64+ if apt-get update > /dev/null ; then
2865 return 0
2966 fi
3067 sleep 5
@@ -34,61 +71,93 @@ function update_apt_get() {
3471
3572function setup_ganglia_host() {
3673 # Install dependencies needed for Ganglia host
37- DEBIAN_FRONTEND=noninteractive apt-get install -y \
74+ apt-get install -qq -y -o DPkg::Lock::Timeout=60 \
3875 rrdtool \
3976 gmetad \
40- ganglia-webfrontend || err ' Unable to install packages'
77+ ganglia-webfrontend > /dev/null || err ' Unable to install packages'
4178
42- ln -s /etc/ganglia-webfrontend/apache.conf /etc/apache2/sites-enabled/ganglia.conf
43- sed -i " s/my cluster/ ${master_hostname} / " /etc/ganglia/gmetad.conf
79+ ln -sf /etc/ganglia-webfrontend/apache.conf /etc/apache2/sites-enabled/ganglia.conf
80+ perl -pi -e " s:^data_source.*:data_source \" ${master_hostname} \" localhost:g " /etc/ganglia/gmetad.conf
4481 sed -i ' 26s/ \$context_metrics \= \"\"\;/ \$context_metrics \= array\(\)\;/g' /usr/share/ganglia-webfrontend/cluster_view.php
4582 systemctl restart ganglia-monitor gmetad apache2
4683}
4784
48- function remove_old_backports {
85+ function setup_ganglia_worker() {
86+ # on single node instances, also configure ganglia-monitor
87+ sed -e " /deaf = no /s/no/yes/" -i /etc/ganglia/gmond.conf
88+ sed -i ' /udp_recv_channel {/,/}/d' /etc/ganglia/gmond.conf
89+ systemctl restart ganglia-monitor
90+ }
91+
92+ function repair_old_backports {
93+ if ! is_debuntu ; then return ; fi
4994 # This script uses 'apt-get update' and is therefore potentially dependent on
5095 # backports repositories which have been archived. In order to mitigate this
51- # problem, we will remove any reference to backports repos older than oldstable
96+ # problem, we will use archive.debian.org for the oldoldstable repo
5297
5398 # https://github.com/GoogleCloudDataproc/initialization-actions/issues/1157
54- oldstable=$( curl -s https://deb.debian.org/debian/dists/oldstable/Release | awk ' /^Codename/ {print $2}' ) ;
55- stable=$( curl -s https://deb.debian.org/debian/dists/stable/Release | awk ' /^Codename/ {print $2}' ) ;
56-
57- matched_files=" $( grep -rsil ' \-backports' /etc/apt/sources.list* ) "
58- if [[ -n " $matched_files " ]]; then
59- for filename in " $matched_files " ; do
60- grep -e " $oldstable -backports" -e " $stable -backports" " $filename " || \
61- sed -i -e ' s/^.*-backports.*$//' " $filename "
62- done
63- fi
99+ debdists=" https://deb.debian.org/debian/dists"
100+ oldoldstable=$( curl -s " ${debdists} /oldoldstable/Release" | awk ' /^Codename/ {print $2}' ) ;
101+ oldstable=$( curl -s " ${debdists} /oldstable/Release" | awk ' /^Codename/ {print $2}' ) ;
102+ stable=$( curl -s " ${debdists} /stable/Release" | awk ' /^Codename/ {print $2}' ) ;
103+
104+ matched_files=( $( test -d /etc/apt && grep -rsil ' \-backports' /etc/apt/sources.list* || :) )
105+
106+ for filename in " ${matched_files[@]} " ; do
107+ # Fetch from archive.debian.org for ${oldoldstable}-backports
108+ perl -pi -e " s{^(deb[^\s]*) https?://[^/]+/debian ${oldoldstable} -backports }
109+ {\$ 1 https://archive.debian.org/debian ${oldoldstable} -backports }g" " ${filename} "
110+ done
64111}
65112
66113function main() {
67114 local master_hostname=$( /usr/share/google/get_metadata_value attributes/dataproc-master)
68115 local cluster_name=$( /usr/share/google/get_metadata_value attributes/dataproc-cluster-name)
69116
117+ export DEBIAN_FRONTEND=noninteractive
118+
70119 OS=$( . /etc/os-release && echo " ${ID} " )
120+
121+ define_os_comparison_functions
122+
123+ # Detect dataproc image version
124+ SPARK_VERSION=" $( spark-submit --version 2>&1 | sed -n ' s/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/\1/p' | head -n1) "
125+ readonly SPARK_VERSION
126+
127+ if (! test -v DATAPROC_IMAGE_VERSION) ; then
128+ if test -v DATAPROC_VERSION ; then
129+ DATAPROC_IMAGE_VERSION=" ${DATAPROC_VERSION} "
130+ else
131+ if version_lt " ${SPARK_VERSION} " " 3.2" ; then DATAPROC_IMAGE_VERSION=" 2.0"
132+ elif version_lt " ${SPARK_VERSION} " " 3.4" ; then DATAPROC_IMAGE_VERSION=" 2.1"
133+ elif version_lt " ${SPARK_VERSION} " " 3.6" ; then DATAPROC_IMAGE_VERSION=" 2.2"
134+ else echo " Unknown dataproc image version" ; exit 1 ; fi
135+ fi
136+ fi
137+
71138 if [[ ${OS} == debian ]] && [[ $( echo " ${DATAPROC_IMAGE_VERSION} <= 2.1" | bc -l) == 1 ]]; then
72- remove_old_backports
139+ repair_old_backports
73140 fi
74141
75- update_apt_get || err ' Unable to update apt-get'
76- apt-get install -y ganglia-monitor
142+ update_apt_get > /dev/null || err ' Unable to update apt-get'
143+ apt-get install -qq -y -o DPkg::Lock::Timeout=60 ganglia-monitor > /dev/null
77144
78145 sed -e " /send_metadata_interval = 0 /s/0/5/" -i /etc/ganglia/gmond.conf
79146 sed -e " /name = \" unspecified\" /s/unspecified/${cluster_name} /" -i /etc/ganglia/gmond.conf
80147 sed -e ' /mcast_join /s/^ / #/' -i /etc/ganglia/gmond.conf
81148 sed -e ' /bind /s/^ / #/' -i /etc/ganglia/gmond.conf
82149 sed -e " /udp_send_channel {/a\ host = ${master_hostname} " -i /etc/ganglia/gmond.conf
83150
151+ local worker_count=$( /usr/share/google/get_metadata_value dataproc-worker-count)
84152 if [[ " $( hostname -s) " == " ${master_hostname} " ]]; then
85153 # Setup Ganglia host only on the master node ("0"-master in HA mode)
86154 setup_ganglia_host || err ' Setting up Ganglia host failed'
155+ if [[ " ${worker_count} " == " 0" ]] ; then
156+ # on single node instances, also configure ganglia-monitor
157+ setup_ganglia_worker
158+ fi
87159 else
88- # Configure non-host Ganglia nodes
89- sed -e " /deaf = no /s/no/yes/" -i /etc/ganglia/gmond.conf
90- sed -i ' /udp_recv_channel {/,/}/d' /etc/ganglia/gmond.conf
91- systemctl restart ganglia-monitor
160+ setup_ganglia_worker
92161 fi
93162}
94163
0 commit comments