From c178f2eb7b74cf7219cb08c299eb5eb3d43036c5 Mon Sep 17 00:00:00 2001 From: m8in Date: Fri, 15 May 2026 16:21:10 +0200 Subject: [PATCH] Refactoring to remove printOwnDomain.sh in the future --- core/base.module.sh | 1 + script/monitor/check.sh | 19 +--- .../monitor/generic/COMPOSITION_SYNC_CHECK.sh | 104 ++++++++++++++++++ script/monitor/generic/ZFS_SYNC_CHECK.sh | 18 ++- 4 files changed, 119 insertions(+), 23 deletions(-) create mode 100755 script/monitor/generic/COMPOSITION_SYNC_CHECK.sh diff --git a/core/base.module.sh b/core/base.module.sh index 5d57427..3667f3c 100755 --- a/core/base.module.sh +++ b/core/base.module.sh @@ -110,6 +110,7 @@ function prepare.setCIS() { CIS[DOMAINSTATES]="${CIS[ROOT]}states/${CIS[DOMAIN]}/" CIS[COMPOSITIONS]="${CIS[DOMAINDEFINITIONS]:?"Missing DOMAINDEFINITIONS"}compositions/" + CIS[GENERICMONITORCHECKS]="${CIS[SCRIPTDIR]:?"Missing SCRIPTDIR"}monitor/generic/" CIS[SET]="normal" # Sets the write protection of array 'CIS' diff --git a/script/monitor/check.sh b/script/monitor/check.sh index 45035c0..b910cae 100755 --- a/script/monitor/check.sh +++ b/script/monitor/check.sh @@ -1,21 +1,14 @@ #!/bin/bash - -# Folders always ends with an tailing '/' -_SCRIPT="$(readlink -f "${0}" 2> /dev/null)" -_CIS_ROOT="${_SCRIPT%%/script/monitor/*}/" #Removes longest matching pattern '/script/monitor/*' from the end -_CORE_SCRIPTS="${_CIS_ROOT:?"Missing CIS_ROOT"}core/" -_CURRENT_DOMAIN="$("${_CORE_SCRIPTS:?"Missing CORE_SCRIPTS"}printOwnDomain.sh")" -_DEFINITIONS="${_CIS_ROOT:?"Missing CIS_ROOT"}definitions/${_CURRENT_DOMAIN:?"Missing CURRENT_DOMAIN"}/" - -# Checks for the entire domain -_DOMAIN_CHECKS="${_DEFINITIONS:?"Missing DEFINITIONS"}monitor/checks/" +source /cis/core/base.module.sh function doChecks(){ - local readonly _TMPDIR="${1:?"doChecks(): Missing parameter TMPDIR:"}" - - local _DATETIME=$(date +%H-%M-%S) + local _TMPDIR _DATETIME _DOMAIN_CHECKS + _TMPDIR="${1:?"doChecks(): Missing parameter TMPDIR:"}" + _DATETIME=$(date +%H-%M-%S) + _DOMAIN_CHECKS="${CIS[DOMAINDEFINITIONS]?"Missing CIS_DOMAINDEFINITIONS"}monitor/checks/" + readonly _TMPDIR _DATETIME _DOMAIN_CHECKS mkdir -p ${_TMPDIR} rm ${_TMPDIR}/* > /dev/null 2>&1 diff --git a/script/monitor/generic/COMPOSITION_SYNC_CHECK.sh b/script/monitor/generic/COMPOSITION_SYNC_CHECK.sh new file mode 100755 index 0000000..ec89dc6 --- /dev/null +++ b/script/monitor/generic/COMPOSITION_SYNC_CHECK.sh @@ -0,0 +1,104 @@ +#!/bin/bash +source /cis/core/base.module.sh + + + +base.set _REMOTE_HOST "${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)+(:[0-9]+)?$' +_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end +_REMOTE_PORT="${_REMOTE_HOST}:" +_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin +_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_PORT="${_REMOTE_PORT:-"22"}" +_REMOTE_USER="monitoring" +_SOCKET='~/.ssh/%r@%h:%p' + +# This is crucial: +# - default value for the filter part is extracted from the first parameter (FQDN) +# - but you can override this part to to adapt the test during a change of the domain. +# (e.g. the short hostname can be an option - or even a better default in the future) +base.set _GIVEN_REMOTE_HOSTNAME_FQDN "${2}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)?$' +_DEFINED_REMOTE_HOSTNAME_FQDN="${_GIVEN_REMOTE_HOSTNAME_FQDN:-"${_REMOTE_HOSTNAME_FQDN:?"Missing REMOTE_HOSTNAME_FQDN"}"}" +_ZFS_SNAPSHOT_FILTER="@SYNC_${_DEFINED_REMOTE_HOSTNAME_FQDN}" + +base.set _MODE "${3:-"normal"}" '^(debug|normal)$' +_NOW_UTC_UNIXTIME=$(date -u +%s) +_DEBUG_PATH="/tmp/monitor/" + + + +function checkOrStartSSHMaster() { + timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \ + && return 0 + + ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null + ssh -o ControlMaster=auto \ + -o ControlPath=${_SOCKET} \ + -o ControlPersist=65 \ + -p ${_REMOTE_PORT} \ + -f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \ + && return 0 + + echo "FAIL#SSH connection (setup ok?)" + return 1 +} + +function checkSync() { + checkOrStartSSHMaster \ + || return 1 + + [ "${_MODE}" == "debug" ] \ + && mkdir -p "${_DEBUG_PATH}" > /dev/null \ + && echo "Now: ${_NOW_UTC_UNIXTIME}" > ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt + + ! [ -d "${CIS[COMPOSITIONS]:?"Missing global parameter CIS_COMPOSITIONS"}" ] \ + && echo "WARN#no compositions" \ + && return 0 + + [ "${_MODE}" == "debug" ] \ + && echo "Snapshot filter: ${_ZFS_SNAPSHOT_FILTER}" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt + + # This retrieves the list of the interesting snapshots including creation timestamp + _SNAPSHOTS="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} zfs list -po creation,name -r -t snapshot zpool1/persistent | grep -F ${_ZFS_SNAPSHOT_FILTER})" + [ "${_MODE}" == "debug" ] \ + && echo "${_SNAPSHOTS}" > ${_DEBUG_PATH}SNAPSHOTS_${_REMOTE_HOSTNAME_FQDN}.txt + + [ -z "${_SNAPSHOTS}" ] \ + && echo "FAIL#no snapshots" \ + && return 1 + + echo "OK#Checks running" + + for _COMPOSITION_PATH in "${CIS[COMPOSITIONS]}"*; do + + # If remote host is found than it is responsible for this container-composition, otherwise skip + # (grep -E "^[[:blank:]]*something" means. Line has to start with "something", leading blank chars are ok.) + grep -E "^[[:blank:]]*${_REMOTE_HOSTNAME_SHORT}" "${_COMPOSITION_PATH}/composition-sync-hosts" &> /dev/null \ + || continue; + + _COMPOSITION_NAME="${_COMPOSITION_PATH##*/}" #Removes longest matching pattern '*/' from the begin + _LAST_SNAPSHOT_UNIXTIME="$(echo "${_SNAPSHOTS}" | grep ${_COMPOSITION_NAME} | tail -n 1 | cut -d' ' -f1)" + _SECONDS_BEHIND=$[ ${_NOW_UTC_UNIXTIME} - ${_LAST_SNAPSHOT_UNIXTIME} ] + + [ "${_MODE}" == "debug" ] \ + && echo "${_LAST_SNAPSHOT_UNIXTIME} ${_COMPOSITION_NAME} on ${_REMOTE_HOSTNAME_FQDN} behind: ${_SECONDS_BEHIND}s" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt + + [ "${_SECONDS_BEHIND}" -lt 40 ] \ + && continue + + [ "${_SECONDS_BEHIND}" -lt 60 ] \ + && echo "ZFSSYNC_of_${_REMOTE_HOSTNAME_SHORT}_LAGGING?WARN#${_COMPOSITION_NAME} ${_SECONDS_BEHIND}s" \ + && continue + + echo "ZFSSYNC_of_${_REMOTE_HOSTNAME_SHORT}_LAGGING?FAIL#${_COMPOSITION_NAME} ${_SECONDS_BEHIND}s" + done +} + + + +RESULTS="$(checkSync)" + +[ "${_MODE}" == "debug" ] \ + && echo "$RESULTS" > ${_DEBUG_PATH}RESULTS_${_REMOTE_HOSTNAME_FQDN}.txt + +echo "$RESULTS" diff --git a/script/monitor/generic/ZFS_SYNC_CHECK.sh b/script/monitor/generic/ZFS_SYNC_CHECK.sh index b524050..6f6646a 100755 --- a/script/monitor/generic/ZFS_SYNC_CHECK.sh +++ b/script/monitor/generic/ZFS_SYNC_CHECK.sh @@ -1,13 +1,9 @@ #!/bin/bash +source /cis/core/base.module.sh -_SCRIPT="$(readlink -f "${0}" 2> /dev/null)" -# Folders always ends with an tailing '/' -_CIS_ROOT="${_SCRIPT%%/script/monitor/*}/" #Removes longest matching pattern '/script/monitor/*' from the end -_DOMAIN="$("${_CIS_ROOT:?"Missing CIS_ROOT"}core/printOwnDomain.sh")" -_COMPOSITIONS="${_CIS_ROOT:?"Missing CIS_ROOT"}definitions/${_DOMAIN:?"Missing DOMAIN"}/compositions/" -_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" +base.set _REMOTE_HOST "${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)+(:[0-9]+)?$' _REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end _REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end _REMOTE_PORT="${_REMOTE_HOST}:" @@ -21,9 +17,11 @@ _SOCKET='~/.ssh/%r@%h:%p' # - default value for the filter part is extracted from the first parameter (FQDN) # - but you can override this part to to adapt the test during a change of the domain. # (e.g. the short hostname can be an option - or even a better default in the future) -_ZFS_SNAPSHOT_FILTER="@SYNC_${2:-"${_REMOTE_HOSTNAME_FQDN:?"Missing REMOTE_HOSTNAME_FQDN"}"}" +base.set _GIVEN_REMOTE_HOSTNAME_FQDN "${2}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)?$' +_DEFINED_REMOTE_HOSTNAME_FQDN="${_GIVEN_REMOTE_HOSTNAME_FQDN:-"${_REMOTE_HOSTNAME_FQDN:?"Missing REMOTE_HOSTNAME_FQDN"}"}" +_ZFS_SNAPSHOT_FILTER="@SYNC_${_DEFINED_REMOTE_HOSTNAME_FQDN}" -_MODE="${3:-"normal"}" +base.set _MODE "${3:-"normal"}" '^(debug|normal)$' _NOW_UTC_UNIXTIME=$(date -u +%s) _DEBUG_PATH="/tmp/monitor/" @@ -53,7 +51,7 @@ function checkSync() { && mkdir -p "${_DEBUG_PATH}" > /dev/null \ && echo "Now: ${_NOW_UTC_UNIXTIME}" > ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt - ! [ -d "${_COMPOSITIONS:?"Missing COMPOSITIONS"}" ] \ + ! [ -d "${CIS[COMPOSITIONS]:?"Missing global parameter CIS_COMPOSITIONS"}" ] \ && echo "WARN#no compositions" \ && return 0 @@ -71,7 +69,7 @@ function checkSync() { echo "OK#Checks running" - for _COMPOSITION_PATH in ${_COMPOSITIONS}*; do + for _COMPOSITION_PATH in "${CIS[COMPOSITIONS]}"*; do # If remote host is found than it is responsible for this container-composition, otherwise skip # (grep -E "^[[:blank:]]*something" means. Line has to start with "something", leading blank chars are ok.)