Skip to content

Commit 45a0a24

Browse files
xby-Gcjac
andauthored
[ganglia] maintenance release (#1289)
* Fixing ganglia.sh Fix the script to fix two scenarios: 1. No source list file needs to be updated 2. Multiple source list files need to be updated * ganglia/BUILD: * using perl to verify http server instead of python ganglia/ganglia.sh: * export DEBIAN_FRONTEND=noninteractive * use latest implementation of repair_old_backports * include some utility functions to help check for running OS, and versions of spark, dataproc and OS versions * refactor ganglia worker setup into function * install worker on master when using single node configuration * reduce noise from apt and wait for lock release ganglia/test_ganglia.py: * increase test instance size to n1-standard-8 * replaced complicated python test with simple perl * run test with conda python * corrected version nit about where ganglia ui is supported ganglia/verify_ganglia_running.py, ganglia/verify_ganglia_running.pl: * replaced complicated python with simple perl --------- Co-authored-by: C.J. Collier <[email protected]>
1 parent 4a477e3 commit 45a0a24

File tree

5 files changed

+134
-130
lines changed

5 files changed

+134
-130
lines changed

ganglia/BUILD

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,12 @@ py_test(
66
srcs = ["test_ganglia.py"],
77
data = [
88
"ganglia.sh",
9+
"verify_ganglia_running.pl",
910
],
1011
local = True,
1112
shard_count = 4,
1213
deps = [
13-
":verify_ganglia_running",
1414
"//integration_tests:dataproc_test_case",
1515
"@io_abseil_py//absl/testing:parameterized",
1616
],
1717
)
18-
19-
py_library(
20-
name = "verify_ganglia_running",
21-
srcs = ["verify_ganglia_running.py"],
22-
)

ganglia/ganglia.sh

Lines changed: 93 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,51 @@
1717

1818
set -euxo pipefail
1919

20+
function os_id() ( set +x ; grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; )
21+
function os_version() ( set +x ; grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; )
22+
function os_codename() ( set +x ; grep '^VERSION_CODENAME=' /etc/os-release | cut -d= -f2 | xargs ; )
23+
24+
# For version (or real number) comparison
25+
# if first argument is greater than or equal to, greater than, less than or equal to, or less than the second
26+
# ( version_ge 2.0 2.1 ) evaluates to false
27+
# ( version_ge 2.2 2.1 ) evaluates to true
28+
function version_ge() ( set +x ; [ "$1" = "$(echo -e "$1\n$2" | sort -V | tail -n1)" ] ; )
29+
function version_gt() ( set +x ; [ "$1" = "$2" ] && return 1 || version_ge $1 $2 ; )
30+
function version_le() ( set +x ; [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] ; )
31+
function version_lt() ( set +x ; [ "$1" = "$2" ] && return 1 || version_le $1 $2 ; )
32+
33+
function define_os_comparison_functions() {
34+
35+
readonly -A supported_os=(
36+
['debian']="10 11 12"
37+
['rocky']="8 9"
38+
['ubuntu']="18.04 20.04 22.04"
39+
)
40+
41+
# dynamically define OS version test utility functions
42+
if [[ "$(os_id)" == "rocky" ]];
43+
then _os_version=$(os_version | sed -e 's/[^0-9].*$//g')
44+
else _os_version="$(os_version)"; fi
45+
for os_id_val in 'rocky' 'ubuntu' 'debian' ; do
46+
eval "function is_${os_id_val}() ( set +x ; [[ \"$(os_id)\" == '${os_id_val}' ]] ; )"
47+
48+
for osver in $(echo "${supported_os["${os_id_val}"]}") ; do
49+
eval "function is_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && [[ \"${_os_version}\" == \"${osver}\" ]] ; )"
50+
eval "function ge_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_ge \"${_os_version}\" \"${osver}\" ; )"
51+
eval "function le_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_le \"${_os_version}\" \"${osver}\" ; )"
52+
done
53+
done
54+
eval "function is_debuntu() ( set +x ; is_debian || is_ubuntu ; )"
55+
}
56+
2057
function err() {
2158
echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $*" >&2
2259
return 1
2360
}
2461

2562
function update_apt_get() {
2663
for ((i = 0; i < 10; i++)); do
27-
if apt-get update; then
64+
if apt-get update > /dev/null ; then
2865
return 0
2966
fi
3067
sleep 5
@@ -34,61 +71,93 @@ function update_apt_get() {
3471

3572
function setup_ganglia_host() {
3673
# Install dependencies needed for Ganglia host
37-
DEBIAN_FRONTEND=noninteractive apt-get install -y \
74+
apt-get install -qq -y -o DPkg::Lock::Timeout=60 \
3875
rrdtool \
3976
gmetad \
40-
ganglia-webfrontend || err 'Unable to install packages'
77+
ganglia-webfrontend >/dev/null || err 'Unable to install packages'
4178

42-
ln -s /etc/ganglia-webfrontend/apache.conf /etc/apache2/sites-enabled/ganglia.conf
43-
sed -i "s/my cluster/${master_hostname}/" /etc/ganglia/gmetad.conf
79+
ln -sf /etc/ganglia-webfrontend/apache.conf /etc/apache2/sites-enabled/ganglia.conf
80+
perl -pi -e "s:^data_source.*:data_source \"${master_hostname}\" localhost:g" /etc/ganglia/gmetad.conf
4481
sed -i '26s/ \$context_metrics \= \"\"\;/ \$context_metrics \= array\(\)\;/g' /usr/share/ganglia-webfrontend/cluster_view.php
4582
systemctl restart ganglia-monitor gmetad apache2
4683
}
4784

48-
function remove_old_backports {
85+
function setup_ganglia_worker() {
86+
# on single node instances, also configure ganglia-monitor
87+
sed -e "/deaf = no /s/no/yes/" -i /etc/ganglia/gmond.conf
88+
sed -i '/udp_recv_channel {/,/}/d' /etc/ganglia/gmond.conf
89+
systemctl restart ganglia-monitor
90+
}
91+
92+
function repair_old_backports {
93+
if ! is_debuntu ; then return ; fi
4994
# This script uses 'apt-get update' and is therefore potentially dependent on
5095
# backports repositories which have been archived. In order to mitigate this
51-
# problem, we will remove any reference to backports repos older than oldstable
96+
# problem, we will use archive.debian.org for the oldoldstable repo
5297

5398
# https://github.com/GoogleCloudDataproc/initialization-actions/issues/1157
54-
oldstable=$(curl -s https://deb.debian.org/debian/dists/oldstable/Release | awk '/^Codename/ {print $2}');
55-
stable=$(curl -s https://deb.debian.org/debian/dists/stable/Release | awk '/^Codename/ {print $2}');
56-
57-
matched_files="$(grep -rsil '\-backports' /etc/apt/sources.list*)"
58-
if [[ -n "$matched_files" ]]; then
59-
for filename in "$matched_files"; do
60-
grep -e "$oldstable-backports" -e "$stable-backports" "$filename" || \
61-
sed -i -e 's/^.*-backports.*$//' "$filename"
62-
done
63-
fi
99+
debdists="https://deb.debian.org/debian/dists"
100+
oldoldstable=$(curl -s "${debdists}/oldoldstable/Release" | awk '/^Codename/ {print $2}');
101+
oldstable=$( curl -s "${debdists}/oldstable/Release" | awk '/^Codename/ {print $2}');
102+
stable=$( curl -s "${debdists}/stable/Release" | awk '/^Codename/ {print $2}');
103+
104+
matched_files=( $(test -d /etc/apt && grep -rsil '\-backports' /etc/apt/sources.list*||:) )
105+
106+
for filename in "${matched_files[@]}"; do
107+
# Fetch from archive.debian.org for ${oldoldstable}-backports
108+
perl -pi -e "s{^(deb[^\s]*) https?://[^/]+/debian ${oldoldstable}-backports }
109+
{\$1 https://archive.debian.org/debian ${oldoldstable}-backports }g" "${filename}"
110+
done
64111
}
65112

66113
function main() {
67114
local master_hostname=$(/usr/share/google/get_metadata_value attributes/dataproc-master)
68115
local cluster_name=$(/usr/share/google/get_metadata_value attributes/dataproc-cluster-name)
69116

117+
export DEBIAN_FRONTEND=noninteractive
118+
70119
OS=$(. /etc/os-release && echo "${ID}")
120+
121+
define_os_comparison_functions
122+
123+
# Detect dataproc image version
124+
SPARK_VERSION="$(spark-submit --version 2>&1 | sed -n 's/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/\1/p' | head -n1)"
125+
readonly SPARK_VERSION
126+
127+
if (! test -v DATAPROC_IMAGE_VERSION) ; then
128+
if test -v DATAPROC_VERSION ; then
129+
DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}"
130+
else
131+
if version_lt "${SPARK_VERSION}" "3.2" ; then DATAPROC_IMAGE_VERSION="2.0"
132+
elif version_lt "${SPARK_VERSION}" "3.4" ; then DATAPROC_IMAGE_VERSION="2.1"
133+
elif version_lt "${SPARK_VERSION}" "3.6" ; then DATAPROC_IMAGE_VERSION="2.2"
134+
else echo "Unknown dataproc image version" ; exit 1 ; fi
135+
fi
136+
fi
137+
71138
if [[ ${OS} == debian ]] && [[ $(echo "${DATAPROC_IMAGE_VERSION} <= 2.1" | bc -l) == 1 ]]; then
72-
remove_old_backports
139+
repair_old_backports
73140
fi
74141

75-
update_apt_get || err 'Unable to update apt-get'
76-
apt-get install -y ganglia-monitor
142+
update_apt_get > /dev/null || err 'Unable to update apt-get'
143+
apt-get install -qq -y -o DPkg::Lock::Timeout=60 ganglia-monitor > /dev/null
77144

78145
sed -e "/send_metadata_interval = 0 /s/0/5/" -i /etc/ganglia/gmond.conf
79146
sed -e "/name = \"unspecified\" /s/unspecified/${cluster_name}/" -i /etc/ganglia/gmond.conf
80147
sed -e '/mcast_join /s/^ / #/' -i /etc/ganglia/gmond.conf
81148
sed -e '/bind /s/^ / #/' -i /etc/ganglia/gmond.conf
82149
sed -e "/udp_send_channel {/a\ host = ${master_hostname}" -i /etc/ganglia/gmond.conf
83150

151+
local worker_count=$(/usr/share/google/get_metadata_value dataproc-worker-count)
84152
if [[ "$(hostname -s)" == "${master_hostname}" ]]; then
85153
# Setup Ganglia host only on the master node ("0"-master in HA mode)
86154
setup_ganglia_host || err 'Setting up Ganglia host failed'
155+
if [[ "${worker_count}" == "0" ]] ; then
156+
# on single node instances, also configure ganglia-monitor
157+
setup_ganglia_worker
158+
fi
87159
else
88-
# Configure non-host Ganglia nodes
89-
sed -e "/deaf = no /s/no/yes/" -i /etc/ganglia/gmond.conf
90-
sed -i '/udp_recv_channel {/,/}/d' /etc/ganglia/gmond.conf
91-
systemctl restart ganglia-monitor
160+
setup_ganglia_worker
92161
fi
93162
}
94163

ganglia/test_ganglia.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,17 @@
66

77
from integration_tests.dataproc_test_case import DataprocTestCase
88

9-
109
class GangliaTestCase(DataprocTestCase):
1110
COMPONENT = 'ganglia'
1211
INIT_ACTIONS = ['ganglia/ganglia.sh']
13-
TEST_SCRIPT_FILE_NAME = 'verify_ganglia_running.py'
12+
TEST_SCRIPT_FILE_NAME = 'verify_ganglia_running.pl'
1413

1514
def verify_instance(self, name):
1615
test_script_path = os.path.join(
1716
os.path.dirname(os.path.abspath(__file__)),
1817
self.TEST_SCRIPT_FILE_NAME)
1918
self.upload_test_file(test_script_path, name)
20-
self.assert_instance_command(name,
21-
"yes | sudo apt-get install python3-pip libxml2-dev libxslt-dev")
22-
self.assert_instance_command(name, "sudo -H pip3 install --upgrade pip")
23-
self.assert_instance_command(name, "sudo pip3 install requests-html")
24-
self.assert_instance_command(name, "sudo pip install -U urllib3 requests")
25-
self.assert_instance_command(name, "pip install lxml[html_clean]")
26-
self.assert_instance_command(
27-
name, "python3 {}".format(self.TEST_SCRIPT_FILE_NAME))
19+
self.assert_instance_command(name,"/usr/bin/perl {}".format(self.TEST_SCRIPT_FILE_NAME))
2820
self.remove_test_script(self.TEST_SCRIPT_FILE_NAME, name)
2921

3022
@parameterized.parameters(
@@ -38,9 +30,13 @@ def test_ganglia(self, configuration, machine_suffixes):
3830
self.skipTest("Not supported in Rocky Linux-based images")
3931

4032
if self.getImageVersion() > pkg_resources.parse_version("2.0"):
41-
self.skipTest("Ganglia UI is not supported for 2.0+ versions")
33+
self.skipTest("Ganglia UI is not supported for 2.1+ versions")
4234

43-
self.createCluster(configuration, self.INIT_ACTIONS)
35+
self.createCluster(
36+
configuration,
37+
self.INIT_ACTIONS,
38+
machine_type="n1-standard-8",
39+
)
4440
for machine_suffix in machine_suffixes:
4541
self.verify_instance("{}-{}".format(self.getClusterName(),
4642
machine_suffix))

ganglia/verify_ganglia_running.pl

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/perl -w
2+
# -*-CPerl-*-
3+
4+
# verify_ganglia_running.py: Script for ganglia initialization action test.
5+
6+
use strict;
7+
use LWP::UserAgent;
8+
9+
my $hostname = qx(hostname -s); chomp $hostname;
10+
my $role = qx(/usr/share/google/get_metadata_value attributes/dataproc-role);
11+
my $primary_master = qx(/usr/share/google/get_metadata_value attributes/dataproc-master);
12+
my $cluster_name = qx(/usr/share/google/get_metadata_value attributes/dataproc-cluster-name);
13+
14+
if ( $hostname eq $primary_master ){
15+
my $hostname = 'localhost';
16+
my $port = '80';
17+
18+
my $ua = LWP::UserAgent->new;
19+
20+
my $response = $ua->get("http://${hostname}:${port}/ganglia/");
21+
22+
die $response->status_line unless $response->is_success;
23+
my( $page_title ) = ( $response->decoded_content =~ m:<b id="page_title">([^>]+)</b>: );
24+
die 'Ganglia UI is not found on master node' unless( $page_title =~ /^${cluster_name}/ );
25+
print("Ganglia UI is running on this node.",$/);
26+
}else{
27+
if ( $hostname =~ /-w-/ ){
28+
print("Ganglia UI should not run on worker node",$/);
29+
}elsif( $hostname =~ /-m-/ ){
30+
print("Ganglia UI should not run on additional master",$/);
31+
}
32+
}

ganglia/verify_ganglia_running.py

Lines changed: 0 additions & 88 deletions
This file was deleted.

0 commit comments

Comments
 (0)