diff --git a/script/monitor/checks/GENERIC_REMOTECHECK b/script/monitor/checks/GENERIC_REMOTECHECK deleted file mode 100755 index f3b69fc..0000000 --- a/script/monitor/checks/GENERIC_REMOTECHECK +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -# --connect-timeout SECONDS Maximum time allowed for connection -# -k Allow connections to SSL sites without certs (H) -# -L Follow redirects (H) -# --max-time SECONDS Maximum time allowed for the transfer -# -s Silent mode. Don't output anything -URL="${1:?"URL missing"}" -RESULTS="$(curl --connect-timeout 10 --max-time 10 -k -s "$URL" 2>/dev/null)" -CURTIME="$[ $(date +%s) - 10 * 60 ]" -TIME="$(echo "$RESULTS" | tail -n 1)" -if (echo $TIME | grep -E "[^0-9"] > /dev/null); then echo "FAIL"; exit; fi -RES="$(([ "$CURTIME" -gt "$TIME" ] && echo "TIMEOUT") || (echo "$RESULTS" | head -n 1))" -echo $RES -echo "$RESULTS" | tail -n +2 | head -n -1 - - diff --git a/script/monitor/checks/GENERIC_NGINX_CHECK.sh b/script/monitor/generic/NGINX_CHECK.sh similarity index 56% rename from script/monitor/checks/GENERIC_NGINX_CHECK.sh rename to script/monitor/generic/NGINX_CHECK.sh index b086410..d5e92b7 100755 --- a/script/monitor/checks/GENERIC_NGINX_CHECK.sh +++ b/script/monitor/generic/NGINX_CHECK.sh @@ -1,17 +1,36 @@ #!/bin/bash _REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" +_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end _REMOTE_PORT="${_REMOTE_HOST}:" -_REMOTE_PORT="${_REMOTE_PORT#*:}" -_REMOTE_PORT="${_REMOTE_PORT%%:*}" +_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin +_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end _REMOTE_PORT="${_REMOTE_PORT:-"22"}" _REMOTE_USER="monitoring" _SOCKET='~/.ssh/%r@%h:%p' +function checkOrStartSSHMaster() { + timeout --preserve-status 1 "ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN}" &> /dev/null \ + && echo "master checked" \ + && return 0 + + ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null + ssh -o ControlMaster=auto \ + -o ControlPath=${_SOCKET} \ + -o ControlPersist=65 \ + -p ${_REMOTE_PORT} \ + -f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \ + && return 0 + + echo "Fail: checkOrStartMaster()" + return 1 +} + function checkViaHTTP() { - _STATUS="$(curl -I http://${_REMOTE_HOST} 2>/dev/null | head -n 1 | cut -d$' ' -f2)" + _STATUS="$(curl -I http://${_REMOTE_HOSTNAME_FQDN} 2>/dev/null | head -n 1 | cut -d$' ' -f2)" [ "${_STATUS}" == "200" ] \ && echo "OK" \ && return 0 @@ -20,7 +39,7 @@ function checkViaHTTP() { } function checkViaHTTPS() { - _STATUS="$(curl -k -I https://${_REMOTE_HOST} 2>/dev/null | head -n 1 | cut -d$' ' -f2)" + _STATUS="$(curl -k -I https://${_REMOTE_HOSTNAME_FQDN} 2>/dev/null | head -n 1 | cut -d$' ' -f2)" [ "${_STATUS}" == "200" ] \ && echo "OK" \ && return 0 @@ -28,22 +47,6 @@ function checkViaHTTPS() { return 1 } -function checkOrStartSSHMaster() { - timeout --preserve-status 1 "ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOST}" &> /dev/null \ - && return 0 - - ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOST} &> /dev/null - ssh -o ControlMaster=auto \ - -o ControlPath=${_SOCKET} \ - -o ControlPersist=65 \ - -p ${_REMOTE_PORT} \ - -f ${_REMOTE_USER}@${_REMOTE_HOST} exit &> /dev/null \ - && return 0 - - echo "Fail: checkOrStartMaster()" - return 1 -} - #grep: # -E Use regexp, '.*' => any chars between 'Active:' and '(running)', the round brackets are escaped. @@ -54,8 +57,8 @@ function checkViaSSH() { checkOrStartSSHMaster \ || return 1 - _RESULT=$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOST} 'systemctl status nginx.service' | grep -E 'Active:.*\(running\)' | cut -d';' -f2) - ! [ -z "${_RESULT}" ] && echo "INFO#UPTIME:${_RESULT}" || echo "FAIL" + _RESULT=$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'systemctl status nginx.service' | grep -E 'Active:.*\(running\)' | cut -d';' -f2) + ! [ -z "${_RESULT}" ] && echo "OK#UPTIME:${_RESULT}" || echo "FAIL" } #checkViaHTTP && exit 0 diff --git a/script/monitor/checks/GENERIC_PING_CHECK.sh b/script/monitor/generic/PING_CHECK.sh similarity index 100% rename from script/monitor/checks/GENERIC_PING_CHECK.sh rename to script/monitor/generic/PING_CHECK.sh diff --git a/script/monitor/checks/GENERIC_URL_CHECK.sh b/script/monitor/generic/URL_CHECK.sh similarity index 100% rename from script/monitor/checks/GENERIC_URL_CHECK.sh rename to script/monitor/generic/URL_CHECK.sh diff --git a/script/monitor/generic/ZFS_SYNC_CHECK.sh b/script/monitor/generic/ZFS_SYNC_CHECK.sh new file mode 100755 index 0000000..9e61676 --- /dev/null +++ b/script/monitor/generic/ZFS_SYNC_CHECK.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +_SCRIPT="$(readlink -f "${0}" 2> /dev/null)" + +# Folders always ends with an tailing '/' +_CIS_ROOT="${_SCRIPT%%/script/monitor/*}/" #Removes longest matching pattern '/script/monitor/*' from the end +_DOMAIN="$("${_CIS_ROOT:?"Missing CIS_ROOT"}core/printOwnDomain.sh")" +_COMPOSITIONS="${_CIS_ROOT:?"Missing CIS_ROOT"}definitions/${_DOMAIN:?"Missing DOMAIN"}/compositions/" + +_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" +_MODE="${2:-"normal"}" +_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end +_REMOTE_PORT="${_REMOTE_HOST}:" +_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin +_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_PORT="${_REMOTE_PORT:-"22"}" +_REMOTE_USER="monitoring" +_SOCKET='~/.ssh/%r@%h:%p' + +_NOW_UTC_UNIXTIME=$(date -u +%s) +_DEBUG_PATH="/tmp/monitor/" + + + +function checkOrStartSSHMaster() { + timeout --preserve-status 1 "ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN}" &> /dev/null \ + && echo "master checked" \ + && return 0 + + ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null + ssh -o ControlMaster=auto \ + -o ControlPath=${_SOCKET} \ + -o ControlPersist=65 \ + -p ${_REMOTE_PORT} \ + -f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \ + && return 0 + + echo "Fail: checkOrStartMaster()" + return 1 +} + +function checkSync() { + checkOrStartSSHMaster \ + || return 1 + + [ "${_MODE}" == "debug" ] \ + && mkdir -p "${_DEBUG_PATH}" > /dev/null \ + && echo "Now: ${_NOW_UTC_UNIXTIME}" > ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt + + ! [ -d "${_COMPOSITIONS:?"Missing COMPOSITIONS"}" ] \ + && echo "OK#no compositions" \ + && return 0 + + _SNAPSHOTS="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} zfs list -po creation,name -r -t snapshot zpool1/persistent | grep -F @SYNC_${_REMOTE_HOSTNAME_FQDN})" + [ "${_MODE}" == "debug" ] \ + && echo "${_SNAPSHOTS}" > ${_DEBUG_PATH}SNAPSHOTS_${_REMOTE_HOSTNAME_FQDN}.txt + + [ -z "${_SNAPSHOTS}" ] \ + && echo "FAIL#no snapshots" \ + && return 1 + + echo "OK#Checks running" + + for _COMPOSITION_PATH in ${_COMPOSITIONS}*; do + + # Skip if remote host is not responsible for this container-composition + grep -vF "${_REMOTE_HOST}" "${_COMPOSITION_PATH}/zfssync-hosts" &> /dev/null \ + && continue; + + _COMPOSITION_NAME="${_COMPOSITION_PATH##*/}" #Removes longest matching pattern '*/' from the begin + _LAST_SNAPSHOT_UNIXTIME="$(echo "${_SNAPSHOTS}" | grep ${_COMPOSITION_NAME} | tail -n 1 | cut -d' ' -f1)" + _SECONDS_BEHIND=$[ ${_NOW_UTC_UNIXTIME} - ${_LAST_SNAPSHOT_UNIXTIME} ] + + [ "${_MODE}" == "debug" ] \ + && echo "${_LAST_SNAPSHOT_UNIXTIME} ${_COMPOSITION_NAME} on ${_REMOTE_HOSTNAME_FQDN} behind: ${_SECONDS_BEHIND}s" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt + + [ "${_SECONDS_BEHIND}" -lt 40 ] \ + && continue + + [ "${_SECONDS_BEHIND}" -lt 60 ] \ + && echo "ZFSSYNC_of_${_REMOTE_HOSTNAME_SHORT}_LAGGING?WARN#${_COMPOSITION_NAME} ${_SECONDS_BEHIND}s" \ + && continue + + echo "ZFSSYNC_of_${_REMOTE_HOSTNAME_SHORT}_LAGGING?FAIL#${_COMPOSITION_NAME} ${_SECONDS_BEHIND}s" + done +} + + + +RESULTS="$(checkSync)" + +[ "${_MODE}" == "debug" ] \ + && echo "$RESULTS" > ${_DEBUG_PATH}RESULTS_${_REMOTE_HOSTNAME_FQDN}.txt + +echo "$RESULTS" diff --git a/script/monitor/checks/GENERIC_ZFSPOOL_USAGE_CHECK.sh b/script/monitor/generic/ZFS_USAGE_CHECK.sh similarity index 67% rename from script/monitor/checks/GENERIC_ZFSPOOL_USAGE_CHECK.sh rename to script/monitor/generic/ZFS_USAGE_CHECK.sh index c9a8fa5..685c9a7 100755 --- a/script/monitor/checks/GENERIC_ZFSPOOL_USAGE_CHECK.sh +++ b/script/monitor/generic/ZFS_USAGE_CHECK.sh @@ -1,24 +1,28 @@ #!/bin/bash _REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" +_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end _REMOTE_PORT="${_REMOTE_HOST}:" -_REMOTE_PORT="${_REMOTE_PORT#*:}" -_REMOTE_PORT="${_REMOTE_PORT%%:*}" +_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin +_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end _REMOTE_PORT="${_REMOTE_PORT:-"22"}" _REMOTE_USER="monitoring" _SOCKET='~/.ssh/%r@%h:%p' + + function checkOrStartSSHMaster() { - timeout --preserve-status 1 "ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOST}" &> /dev/null \ + timeout --preserve-status 1 "ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN}" &> /dev/null \ && echo "master checked" \ && return 0 - ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOST} &> /dev/null + ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null ssh -o ControlMaster=auto \ -o ControlPath=${_SOCKET} \ -o ControlPersist=65 \ -p ${_REMOTE_PORT} \ - -f ${_REMOTE_USER}@${_REMOTE_HOST} exit &> /dev/null \ + -f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \ && return 0 echo "Fail: checkOrStartMaster()" @@ -29,7 +33,7 @@ function testSpace(){ checkOrStartSSHMaster \ || return 1 - local _RESULT="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOST} 'zpool list -H -o capacity,name')" + local _RESULT="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'zpool list -H -o capacity,name')" local _SPACE_USED=$(echo "${_RESULT}" | /usr/bin/tail -n 1 | /usr/bin/cut -f1) local _POOL=$(echo "${_RESULT}" | /usr/bin/tail -n 1 | /usr/bin/cut -f2)