4848. ${OCF_FUNCTIONS_DIR} /ocf-shellfuncs
4949
5050#
51- STORAGEMON=$HA_BIN /storage_mon
52- ATTRDUP=/usr/sbin/attrd_updater
51+ STORAGEMON=${HA_BIN} /storage_mon
52+ ATTRDUP=${HA_SBIN_DIR} /attrd_updater
53+ PIDFILE=${HA_VARRUN} /storage-mon-${OCF_RESOURCE_INSTANCE} .pid
54+ ATTRNAME=" #health-${OCF_RESOURCE_INSTANCE} "
5355
5456OCF_RESKEY_CRM_meta_interval_default=" 0"
5557OCF_RESKEY_io_timeout_default=" 10"
58+ OCF_RESKEY_check_interval_default=" 30"
5659OCF_RESKEY_inject_errors_default=" "
5760OCF_RESKEY_state_file_default=" ${HA_RSCTMP%%/ } /storage-mon-${OCF_RESOURCE_INSTANCE} .state"
61+ OCF_RESKEY_daemonize_default=" false"
5862
5963# Explicitly list all environment variables used, to make static analysis happy
6064: ${OCF_RESKEY_CRM_meta_interval:= ${OCF_RESKEY_CRM_meta_interval_default} }
6165: ${OCF_RESKEY_drives:= " " }
6266: ${OCF_RESKEY_io_timeout:= ${OCF_RESKEY_io_timeout_default} }
67+ : ${OCF_RESKEY_check_interval:= ${OCF_RESKEY_check_interval_default} }
6368: ${OCF_RESKEY_inject_errors:= ${OCF_RESKEY_inject_errors_default} }
6469: ${OCF_RESKEY_state_file:= ${OCF_RESKEY_state_file_default} }
70+ : ${OCF_RESKEY_daemonize:= ${OCF_RESKEY_daemonize_default} }
6571
6672# ######################################################################
6773
@@ -106,6 +112,14 @@ Specify disk I/O timeout in seconds. Minimum 1, recommended 10 (default).
106112<content type="integer" default="${OCF_RESKEY_io_timeout_default} " />
107113</parameter>
108114
115+ <parameter name="check_interval" unique="0">
116+ <longdesc lang="en">
117+ Specify interval between I/O checks in seconds.(Only supported with the damonize option.)
118+ </longdesc>
119+ <shortdesc lang="en">I/O check interval</shortdesc>
120+ <content type="integer" default="${OCF_RESKEY_check_interval_default} " />
121+ </parameter>
122+
109123<parameter name="inject_errors" unique="0">
110124<longdesc lang="en">
111125Used only for testing! Specify % of I/O errors to simulate drives failures.
@@ -114,6 +128,14 @@ Used only for testing! Specify % of I/O errors to simulate drives failures.
114128<content type="integer" default="${OCF_RESKEY_inject_errors_default} " />
115129</parameter>
116130
131+ <parameter name="daemonize" unique="0">
132+ <longdesc lang="en">
133+ Specifies to start storage-mon as a daemon and check for devices.
134+ </longdesc>
135+ <shortdesc lang="en">start storage-mon with daemon</shortdesc>
136+ <content type="boolean" default="${OCF_RESKEY_daemonize_default} " />
137+ </parameter>
138+
117139</parameters>
118140
119141<actions>
@@ -146,6 +168,11 @@ storage-mon_init() {
146168 exit $OCF_ERR_INSTALLED
147169 fi
148170
171+ if [ ! -x " $ATTRDUP " ] ; then
172+ ocf_log err " ${ATTRDUP} not installed."
173+ exit $OCF_ERR_INSTALLED
174+ fi
175+
149176 i=0
150177 for DRIVE in ${OCF_RESKEY_drives} ; do
151178 if [ ! -e " $DRIVE " ] ; then
@@ -161,7 +188,12 @@ storage-mon_init() {
161188 fi
162189
163190 if [ " ${OCF_RESKEY_io_timeout} " -lt " 1" ]; then
164- ocf_log err " Minimum timeout is 1. Recommended 10 (default)."
191+ ocf_log err " Minimum timeout is 1. Recommended ${OCF_RESKEY_io_timeout_default} (default)."
192+ exit $OCF_ERR_CONFIGURED
193+ fi
194+
195+ if [ " ${OCF_RESKEY_check_interval} " -lt " 1" ]; then
196+ ocf_log err " Minimum interval to check is 1. default ${OCF_RESKEY_check_interval_default} ."
165197 exit $OCF_ERR_CONFIGURED
166198 fi
167199
@@ -173,77 +205,181 @@ storage-mon_init() {
173205 fi
174206}
175207
176- storage-mon_validate () {
177- storage-mon_init
208+ storage-mon_update_attribute () {
178209
179- # Is the state directory writable?
180- state_dir= $( dirname " $OCF_RESKEY_state_file " )
181- touch " $state_dir / $$ "
182- if [ $? -ne 0 ] ; then
183- return $OCF_ERR_CONFIGURED
184- fi
185- rm " $state_dir / $$ "
210+ while :
211+ do
212+ " $ATTRDUP " -n ${ATTRNAME} -U " $1 " -d " 5s "
213+ rc= $?
214+ if [ $rc -eq 0 ] ; then
215+ break
216+ fi
186217
218+ ocf_log debug " ${1} attribute by attrd_updater failed"
219+ if [ " $1 " = " red" ]; then
220+ # If the attrd_updater fails with the red attribute, return an error to let pacemaker handle the failure immediately.
221+ return $OCF_ERR_GENERIC
222+ fi
223+ done
187224 return $OCF_SUCCESS
188225}
189226
190227storage-mon_monitor () {
191- storage-mon_init
228+ if ! ocf_is_true " $OCF_RESKEY_daemonize " ; then
229+ storage-mon_init
192230
193- # Monitor _MUST!_ differentiate correctly between running
194- # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
195- # That is THREE states, not just yes/no.
231+ # Monitor _MUST!_ differentiate correctly between running
232+ # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
233+ # That is THREE states, not just yes/no.
196234
197- if [ ! -f " ${OCF_RESKEY_state_file} " ]; then
198- return $OCF_NOT_RUNNING
199- fi
235+ if [ ! -f " ${OCF_RESKEY_state_file} " ]; then
236+ return $OCF_NOT_RUNNING
237+ fi
200238
201- # generate command line
202- cmdline=" "
203- for DRIVE in ${OCF_RESKEY_drives} ; do
204- cmdline=" $cmdline --device $DRIVE --score 1"
205- done
206- cmdline=" $cmdline --timeout ${OCF_RESKEY_io_timeout} "
207- if [ -n " ${OCF_RESKEY_inject_errors} " ]; then
208- cmdline=" $cmdline --inject-errors-percent ${OCF_RESKEY_inject_errors} "
209- fi
210- $STORAGEMON $cmdline
211- if [ $? -ne 0 ]; then
212- status=" red"
239+ # generate command line
240+ cmdline=" "
241+ for DRIVE in ${OCF_RESKEY_drives} ; do
242+ cmdline=" $cmdline --device $DRIVE --score 1"
243+ done
244+ cmdline=" $cmdline --timeout ${OCF_RESKEY_io_timeout} "
245+ if [ -n " ${OCF_RESKEY_inject_errors} " ]; then
246+ cmdline=" $cmdline --inject-errors-percent ${OCF_RESKEY_inject_errors} "
247+ fi
248+ $STORAGEMON $cmdline
249+ if [ $? -ne 0 ]; then
250+ status=" red"
251+ else
252+ status=" green"
253+ fi
254+
255+ storage-mon_update_attribute $status
256+ return " $? "
213257 else
214- status=" green"
215- fi
258+ ocf_pidfile_status " ${PIDFILE} " > /dev/null 2>&1
259+ case " $? " in
260+ 0) rc=$OCF_SUCCESS ;;
261+ 1|2) rc=$OCF_NOT_RUNNING ;;
262+ * ) rc=$OCF_ERR_GENERIC ;;
263+ esac
264+
265+ if [ $rc -ne $OCF_SUCCESS ]; then
266+ return " $rc "
267+ fi
268+ if [ " $1 " = " pid_check_only" ]; then
269+ return " $rc "
270+ fi
216271
217- " $ATTRDUP " -n " #health-${OCF_RESOURCE_INSTANCE} " -U " $status " -d " 5s"
218- return $OCF_SUCCESS
272+ # generate client command line
273+ cmdline=" "
274+ cmdline=" $cmdline --client --attrname ${ATTRNAME} "
275+ while :
276+ do
277+ # 0 : Normal.
278+ # greater than 0 : monitoring error.
279+ # 255(-1) : communication system error.
280+ # 254(-2) : Not all checks completed for first device in daemon mode.
281+ $STORAGEMON $cmdline
282+ rc=$?
283+ case " $rc " in
284+ 254|255)
285+ # If there is a communication error or the initial check of all devices has not been completed,
286+ # it will loop and try to reconnect.
287+ # When everything ends with a communication error during monitor, a monitor timeout occurs.
288+ ocf_log debug " client monitor error : $rc "
289+ ;;
290+ 0)
291+ status=" green"
292+ break
293+ ;;
294+ * )
295+ status=" red"
296+ break
297+ ;;
298+ esac
299+ done
300+
301+ storage-mon_update_attribute $status
302+ return " $? "
303+ fi
219304}
220305
221306storage-mon_start () {
222- storage-mon_monitor
223- if [ $? -eq $OCF_SUCCESS ]; then
224- return $OCF_SUCCESS
307+ if ! ocf_is_true " $OCF_RESKEY_daemonize " ; then
308+ storage-mon_monitor
309+ if [ $? -eq $OCF_SUCCESS ]; then
310+ return $OCF_SUCCESS
311+ fi
312+ touch " ${OCF_RESKEY_state_file} "
313+ else
314+ storage-mon_init
315+ # generate command line
316+ cmdline=" "
317+ for DRIVE in ${OCF_RESKEY_drives} ; do
318+ cmdline=" $cmdline --device $DRIVE --score 1"
319+ done
320+ cmdline=" $cmdline --daemonize --timeout ${OCF_RESKEY_io_timeout} --interval ${OCF_RESKEY_check_interval} --pidfile ${PIDFILE} --attrname ${ATTRNAME} "
321+ if [ -n " ${OCF_RESKEY_inject_errors} " ]; then
322+ cmdline=" $cmdline --inject-errors-percent ${OCF_RESKEY_inject_errors} "
323+ fi
324+ $STORAGEMON $cmdline
325+ if [ " $? " -ne 0 ]; then
326+ return $OCF_ERR_GENERIC
327+ fi
225328 fi
226- touch " ${OCF_RESKEY_state_file} "
227329}
228330
229331storage-mon_stop () {
230332 storage-mon_monitor
231- if [ $? -eq $OCF_SUCCESS ]; then
232- rm " ${OCF_RESKEY_state_file} "
333+ rc=$?
334+
335+ if ! ocf_is_true " $OCF_RESKEY_daemonize " ; then
336+ if [ $rc -eq $OCF_SUCCESS ]; then
337+ rm " ${OCF_RESKEY_state_file} "
338+ fi
339+ else
340+ case " $rc " in
341+ $OCF_SUCCESS )
342+ ;;
343+ $OCF_NOT_RUNNING )
344+ return " $OCF_SUCCESS " ;;
345+ * )
346+ return " $rc " ;;
347+ esac
348+
349+ kill -TERM $( cat " ${PIDFILE} " )
350+ if [ " $? " -ne 0 ]; then
351+ return $OCF_ERR_GENERIC
352+ fi
353+
354+ while true ; do
355+ storage-mon_monitor pid_check_only
356+ rc=" $? "
357+ case " $rc " in
358+ $OCF_SUCCESS )
359+ ;;
360+ $OCF_NOT_RUNNING )
361+ return " $OCF_SUCCESS " ;;
362+ * )
363+ return " $rc " ;;
364+ esac
365+ sleep 1
366+ done
233367 fi
234368 return $OCF_SUCCESS
235369}
236370
237371storage-mon_validate () {
238372 storage-mon_init
239373
240- # Is the state directory writable?
241- state_dir=$( dirname " ${OCF_RESKEY_state_file} " )
242- touch " $state_dir /$$ "
243- if [ $? -ne 0 ]; then
244- return $OCF_ERR_CONFIGURED
374+ if ! ocf_is_true " $OCF_RESKEY_daemonize " ; then
375+ # Is the state directory writable?
376+ state_dir=$( dirname " ${OCF_RESKEY_state_file} " )
377+ touch " $state_dir /$$ "
378+ if [ $? -ne 0 ]; then
379+ return $OCF_ERR_CONFIGURED
380+ fi
381+ rm " $state_dir /$$ "
245382 fi
246- rm " $state_dir /$$ "
247383
248384 return $OCF_SUCCESS
249385}
0 commit comments