All monitoring checks are using ssh.module.sh

This commit is contained in:
m8in
2026-05-17 15:56:26 +02:00
parent bcf92589d3
commit b2107e3092
2 changed files with 72 additions and 108 deletions
@@ -1,67 +1,46 @@
#!/bin/bash
source /cis/core/base.module.sh
base.loadModule ssh
base.set _REMOTE_HOST "${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)+(:[0-9]+)?$'
function checkSync() {
local _REMOTE_HOST _MODE _GIVEN_REMOTE_HOSTNAME_FQDN
_REMOTE_HOST="${1:?"checkSync(): Missing first parameter REMOTE_HOST"}"
_MODE="${2:?"checkSync(): Missing second parameter MODE"}"
_GIVEN_REMOTE_HOSTNAME_FQDN="${3}"
readonly _REMOTE_HOST _MODE _GIVEN_REMOTE_HOSTNAME_FQDN
local _REMOTE_HOSTNAME_FQDN _REMOTE_HOSTNAME_SHORT _DEFINED_REMOTE_HOSTNAME_FQDN _ZFS_SNAPSHOT_FILTER _NOW_UTC_UNIXTIME _DEBUG_PATH
_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end
_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end
_REMOTE_PORT="${_REMOTE_HOST}:"
_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin
_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end
_REMOTE_PORT="${_REMOTE_PORT:-"22"}"
_REMOTE_USER="monitoring"
_SOCKET='~/.ssh/%r@%h:%p'
# This is crucial:
# - default value for the filter part is extracted from the first parameter (FQDN)
# - but you can override this part to to adapt the test during a change of the domain.
# (e.g. the short hostname can be an option - or even a better default in the future)
base.set _GIVEN_REMOTE_HOSTNAME_FQDN "${2}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)?$'
_DEFINED_REMOTE_HOSTNAME_FQDN="${_GIVEN_REMOTE_HOSTNAME_FQDN:-"${_REMOTE_HOSTNAME_FQDN:?"Missing REMOTE_HOSTNAME_FQDN"}"}"
_ZFS_SNAPSHOT_FILTER="@SYNC_${_DEFINED_REMOTE_HOSTNAME_FQDN}"
base.set _MODE "${3:-"normal"}" '^(debug|normal)$'
_NOW_UTC_UNIXTIME=$(date -u +%s)
_DEBUG_PATH="/tmp/monitor/"
function checkOrStartSSHMaster() {
timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \
&& return 0
ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null
ssh -o ControlMaster=auto \
-o ControlPath=${_SOCKET} \
-o ControlPersist=65 \
-p ${_REMOTE_PORT} \
-f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \
&& return 0
echo "FAIL#SSH connection (setup ok?)"
return 1
}
function checkSync() {
checkOrStartSSHMaster \
|| return 1
readonly _REMOTE_HOSTNAME_FQDN _REMOTE_HOSTNAME_SHORT _DEFINED_REMOTE_HOSTNAME_FQDN _ZFS_SNAPSHOT_FILTER _NOW_UTC_UNIXTIME _DEBUG_PATH
[ "${_MODE}" == "debug" ] \
&& mkdir -p "${_DEBUG_PATH}" > /dev/null \
&& echo "Now: ${_NOW_UTC_UNIXTIME}" > ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt
&& echo "Now: ${_NOW_UTC_UNIXTIME}" > ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOST}.txt
! [ -d "${CIS[COMPOSITIONS]:?"Missing global parameter CIS_COMPOSITIONS"}" ] \
&& echo "WARN#no compositions" \
&& return 0
[ "${_MODE}" == "debug" ] \
&& echo "Snapshot filter: ${_ZFS_SNAPSHOT_FILTER}" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt
&& echo "Snapshot filter: ${_ZFS_SNAPSHOT_FILTER}" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOST}.txt
# This retrieves the list of the interesting snapshots including creation timestamp
_SNAPSHOTS="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} zfs list -po creation,name -r -t snapshot zpool1/persistent | grep -F ${_ZFS_SNAPSHOT_FILTER})"
local _SNAPSHOTS=$(ssh.onHostRun "monitoring@${_REMOTE_HOST}" 'zfs list -po creation,name -r -t snapshot zpool1/persistent' | grep -F ${_ZFS_SNAPSHOT_FILTER})
[ "${_MODE}" == "debug" ] \
&& echo "${_SNAPSHOTS}" > ${_DEBUG_PATH}SNAPSHOTS_${_REMOTE_HOSTNAME_FQDN}.txt
&& echo "${_SNAPSHOTS}" > ${_DEBUG_PATH}SNAPSHOTS_${_REMOTE_HOST}.txt
[ -z "${_SNAPSHOTS}" ] \
&& echo "FAIL#no snapshots" \
@@ -69,6 +48,7 @@ function checkSync() {
echo "OK#Checks running"
local _COMPOSITION_PATH
for _COMPOSITION_PATH in "${CIS[COMPOSITIONS]}"*; do
# If remote host is found than it is responsible for this container-composition, otherwise skip
@@ -76,12 +56,12 @@ function checkSync() {
grep -E "^[[:blank:]]*${_REMOTE_HOSTNAME_SHORT}" "${_COMPOSITION_PATH}/composition-sync-hosts" &> /dev/null \
|| continue;
_COMPOSITION_NAME="${_COMPOSITION_PATH##*/}" #Removes longest matching pattern '*/' from the begin
_LAST_SNAPSHOT_UNIXTIME="$(echo "${_SNAPSHOTS}" | grep ${_COMPOSITION_NAME} | tail -n 1 | cut -d' ' -f1)"
_SECONDS_BEHIND=$[ ${_NOW_UTC_UNIXTIME} - ${_LAST_SNAPSHOT_UNIXTIME} ]
local _COMPOSITION_NAME="${_COMPOSITION_PATH##*/}" #Removes longest matching pattern '*/' from the begin
local _LAST_SNAPSHOT_UNIXTIME="$(echo "${_SNAPSHOTS}" | grep ${_COMPOSITION_NAME} | tail -n 1 | cut -d' ' -f1)"
local _SECONDS_BEHIND=$[ ${_NOW_UTC_UNIXTIME} - ${_LAST_SNAPSHOT_UNIXTIME} ]
[ "${_MODE}" == "debug" ] \
&& echo "${_LAST_SNAPSHOT_UNIXTIME} ${_COMPOSITION_NAME} on ${_REMOTE_HOSTNAME_FQDN} behind: ${_SECONDS_BEHIND}s" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt
&& echo "${_LAST_SNAPSHOT_UNIXTIME} ${_COMPOSITION_NAME} on ${_REMOTE_HOSTNAME_FQDN} behind: ${_SECONDS_BEHIND}s" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOST}.txt
[ "${_SECONDS_BEHIND}" -lt 40 ] \
&& continue
@@ -94,11 +74,13 @@ function checkSync() {
done
}
base.set REMOTE_HOST "${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)+(:[0-9]+)?$'
base.set GIVEN_REMOTE_HOSTNAME_FQDN "${2}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)?$'
base.set MODE "${3:-"normal"}" '^(debug|normal)$'
RESULTS=$(checkSync "${REMOTE_HOST}" "${MODE}" "${GIVEN_REMOTE_HOSTNAME_FQDN}")
RESULTS="$(checkSync)"
[ "${_MODE}" == "debug" ] \
&& echo "$RESULTS" > ${_DEBUG_PATH}RESULTS_${_REMOTE_HOSTNAME_FQDN}.txt
[ "${MODE}" == "debug" ] \
&& echo "$RESULTS" > ${_DEBUG_PATH}RESULTS_${REMOTE_HOST}.txt
echo "$RESULTS"
+25 -43
View File
@@ -1,67 +1,46 @@
#!/bin/bash
source /cis/core/base.module.sh
base.loadModule ssh
base.set _REMOTE_HOST "${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)+(:[0-9]+)?$'
function checkSync() {
local _REMOTE_HOST _MODE _GIVEN_REMOTE_HOSTNAME_FQDN
_REMOTE_HOST="${1:?"checkSync(): Missing first parameter REMOTE_HOST"}"
_MODE="${2:?"checkSync(): Missing second parameter MODE"}"
_GIVEN_REMOTE_HOSTNAME_FQDN="${3}"
readonly _REMOTE_HOST _MODE _GIVEN_REMOTE_HOSTNAME_FQDN
local _REMOTE_HOSTNAME_FQDN _REMOTE_HOSTNAME_SHORT _DEFINED_REMOTE_HOSTNAME_FQDN _ZFS_SNAPSHOT_FILTER _NOW_UTC_UNIXTIME _DEBUG_PATH
_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end
_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end
_REMOTE_PORT="${_REMOTE_HOST}:"
_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin
_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end
_REMOTE_PORT="${_REMOTE_PORT:-"22"}"
_REMOTE_USER="monitoring"
_SOCKET='~/.ssh/%r@%h:%p'
# This is crucial:
# - default value for the filter part is extracted from the first parameter (FQDN)
# - but you can override this part to to adapt the test during a change of the domain.
# (e.g. the short hostname can be an option - or even a better default in the future)
base.set _GIVEN_REMOTE_HOSTNAME_FQDN "${2}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)?$'
_DEFINED_REMOTE_HOSTNAME_FQDN="${_GIVEN_REMOTE_HOSTNAME_FQDN:-"${_REMOTE_HOSTNAME_FQDN:?"Missing REMOTE_HOSTNAME_FQDN"}"}"
_ZFS_SNAPSHOT_FILTER="@SYNC_${_DEFINED_REMOTE_HOSTNAME_FQDN}"
base.set _MODE "${3:-"normal"}" '^(debug|normal)$'
_NOW_UTC_UNIXTIME=$(date -u +%s)
_DEBUG_PATH="/tmp/monitor/"
function checkOrStartSSHMaster() {
timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \
&& return 0
ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null
ssh -o ControlMaster=auto \
-o ControlPath=${_SOCKET} \
-o ControlPersist=65 \
-p ${_REMOTE_PORT} \
-f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \
&& return 0
echo "FAIL#SSH connection (setup ok?)"
return 1
}
function checkSync() {
checkOrStartSSHMaster \
|| return 1
readonly _REMOTE_HOSTNAME_FQDN _REMOTE_HOSTNAME_SHORT _DEFINED_REMOTE_HOSTNAME_FQDN _ZFS_SNAPSHOT_FILTER _NOW_UTC_UNIXTIME _DEBUG_PATH
[ "${_MODE}" == "debug" ] \
&& mkdir -p "${_DEBUG_PATH}" > /dev/null \
&& echo "Now: ${_NOW_UTC_UNIXTIME}" > ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt
&& echo "Now: ${_NOW_UTC_UNIXTIME}" > ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOST}.txt
! [ -d "${CIS[COMPOSITIONS]:?"Missing global parameter CIS_COMPOSITIONS"}" ] \
&& echo "WARN#no compositions" \
&& return 0
[ "${_MODE}" == "debug" ] \
&& echo "Snapshot filter: ${_ZFS_SNAPSHOT_FILTER}" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt
&& echo "Snapshot filter: ${_ZFS_SNAPSHOT_FILTER}" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOST}.txt
# This retrieves the list of the interesting snapshots including creation timestamp
_SNAPSHOTS="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} zfs list -po creation,name -r -t snapshot zpool1/persistent | grep -F ${_ZFS_SNAPSHOT_FILTER})"
local _SNAPSHOTS=$(ssh.onHostRun "monitoring@${_REMOTE_HOST}" 'zfs list -po creation,name -r -t snapshot zpool1/persistent' | grep -F ${_ZFS_SNAPSHOT_FILTER})
[ "${_MODE}" == "debug" ] \
&& echo "${_SNAPSHOTS}" > ${_DEBUG_PATH}SNAPSHOTS_${_REMOTE_HOSTNAME_FQDN}.txt
&& echo "${_SNAPSHOTS}" > ${_DEBUG_PATH}SNAPSHOTS_${_REMOTE_HOST}.txt
[ -z "${_SNAPSHOTS}" ] \
&& echo "FAIL#no snapshots" \
@@ -69,6 +48,7 @@ function checkSync() {
echo "OK#Checks running"
local _COMPOSITION_PATH
for _COMPOSITION_PATH in "${CIS[COMPOSITIONS]}"*; do
# If remote host is found than it is responsible for this container-composition, otherwise skip
@@ -76,12 +56,12 @@ function checkSync() {
grep -E "^[[:blank:]]*${_REMOTE_HOSTNAME_SHORT}" "${_COMPOSITION_PATH}/zfssync-hosts" &> /dev/null \
|| continue;
_COMPOSITION_NAME="${_COMPOSITION_PATH##*/}" #Removes longest matching pattern '*/' from the begin
_LAST_SNAPSHOT_UNIXTIME="$(echo "${_SNAPSHOTS}" | grep ${_COMPOSITION_NAME} | tail -n 1 | cut -d' ' -f1)"
_SECONDS_BEHIND=$[ ${_NOW_UTC_UNIXTIME} - ${_LAST_SNAPSHOT_UNIXTIME} ]
local _COMPOSITION_NAME="${_COMPOSITION_PATH##*/}" #Removes longest matching pattern '*/' from the begin
local _LAST_SNAPSHOT_UNIXTIME="$(echo "${_SNAPSHOTS}" | grep ${_COMPOSITION_NAME} | tail -n 1 | cut -d' ' -f1)"
local _SECONDS_BEHIND=$[ ${_NOW_UTC_UNIXTIME} - ${_LAST_SNAPSHOT_UNIXTIME} ]
[ "${_MODE}" == "debug" ] \
&& echo "${_LAST_SNAPSHOT_UNIXTIME} ${_COMPOSITION_NAME} on ${_REMOTE_HOSTNAME_FQDN} behind: ${_SECONDS_BEHIND}s" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt
&& echo "${_LAST_SNAPSHOT_UNIXTIME} ${_COMPOSITION_NAME} on ${_REMOTE_HOSTNAME_FQDN} behind: ${_SECONDS_BEHIND}s" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOST}.txt
[ "${_SECONDS_BEHIND}" -lt 40 ] \
&& continue
@@ -94,11 +74,13 @@ function checkSync() {
done
}
base.set REMOTE_HOST "${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)+(:[0-9]+)?$'
base.set GIVEN_REMOTE_HOSTNAME_FQDN "${2}" '^([a-zA-Z0-9][a-zA-Z0-9.-]*)?$'
base.set MODE "${3:-"normal"}" '^(debug|normal)$'
RESULTS=$(checkSync "${REMOTE_HOST}" "${MODE}" "${GIVEN_REMOTE_HOSTNAME_FQDN}")
RESULTS="$(checkSync)"
[ "${_MODE}" == "debug" ] \
&& echo "$RESULTS" > ${_DEBUG_PATH}RESULTS_${_REMOTE_HOSTNAME_FQDN}.txt
[ "${MODE}" == "debug" ] \
&& echo "$RESULTS" > ${_DEBUG_PATH}RESULTS_${REMOTE_HOST}.txt
echo "$RESULTS"