mirror of
https://github.com/m8tin/cis.git
synced 2025-12-06 07:48:26 +01:00
Monitoring framework for minute-by-minute service testing, including live dashboard
This commit is contained in:
45
script/monitor/generic/CIS_OWN_DOMAIN_CHECK.sh
Executable file
45
script/monitor/generic/CIS_OWN_DOMAIN_CHECK.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
|
||||
_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}"
|
||||
_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_HOST}:"
|
||||
_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin
|
||||
_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_PORT:-"22"}"
|
||||
_REMOTE_USER="monitoring"
|
||||
_SOCKET='~/.ssh/%r@%h:%p'
|
||||
|
||||
|
||||
|
||||
function checkOrStartSSHMaster() {
|
||||
timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \
|
||||
&& return 0
|
||||
|
||||
ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null
|
||||
ssh -o ControlMaster=auto \
|
||||
-o ControlPath=${_SOCKET} \
|
||||
-o ControlPersist=65 \
|
||||
-p ${_REMOTE_PORT} \
|
||||
-f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \
|
||||
&& return 0
|
||||
|
||||
echo "FAIL#SSH connection (setup ok?)"
|
||||
return 1
|
||||
}
|
||||
|
||||
function testDomain(){
|
||||
checkOrStartSSHMaster \
|
||||
|| return 1
|
||||
|
||||
local _RESULT="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'bash /cis/core/printOwnDomain.sh' 2>&1 1>/dev/null)"
|
||||
|
||||
[ -z "${_RESULT}" ] \
|
||||
&& echo "OK" \
|
||||
&& return 0
|
||||
|
||||
echo "WARNING#Check hosts '/cis/core/printOwnDomain'"
|
||||
return 0
|
||||
}
|
||||
|
||||
testDomain && exit 0
|
||||
67
script/monitor/generic/NGINX_CHECK.sh
Executable file
67
script/monitor/generic/NGINX_CHECK.sh
Executable file
@@ -0,0 +1,67 @@
|
||||
#!/bin/bash
|
||||
|
||||
_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}"
|
||||
_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_HOST}:"
|
||||
_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin
|
||||
_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_PORT:-"22"}"
|
||||
_REMOTE_USER="monitoring"
|
||||
_SOCKET='~/.ssh/%r@%h:%p'
|
||||
|
||||
|
||||
|
||||
function checkOrStartSSHMaster() {
|
||||
timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \
|
||||
&& return 0
|
||||
|
||||
ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null
|
||||
ssh -o ControlMaster=auto \
|
||||
-o ControlPath=${_SOCKET} \
|
||||
-o ControlPersist=65 \
|
||||
-p ${_REMOTE_PORT} \
|
||||
-f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \
|
||||
&& return 0
|
||||
|
||||
echo "FAIL#SSH connection (setup ok?)"
|
||||
return 1
|
||||
}
|
||||
|
||||
function checkViaHTTP() {
|
||||
_STATUS="$(curl -I http://${_REMOTE_HOSTNAME_FQDN} 2>/dev/null | head -n 1 | cut -d$' ' -f2)"
|
||||
[ "${_STATUS}" == "200" ] \
|
||||
&& echo "OK" \
|
||||
&& return 0
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
function checkViaHTTPS() {
|
||||
_STATUS="$(curl -k -I https://${_REMOTE_HOSTNAME_FQDN} 2>/dev/null | head -n 1 | cut -d$' ' -f2)"
|
||||
[ "${_STATUS}" == "200" ] \
|
||||
&& echo "OK" \
|
||||
&& return 0
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
#grep:
|
||||
# -E Use regexp, '.*' => any chars between 'Active:' and '(running)', the round brackets are escaped.
|
||||
|
||||
#cut:
|
||||
# -d Delimiter, marker where to cut (here ;)
|
||||
# -f Index of column to show (One based, so there is no -f0)
|
||||
function checkViaSSH() {
|
||||
checkOrStartSSHMaster \
|
||||
|| return 1
|
||||
|
||||
_RESULT=$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'systemctl status nginx.service' | grep -E 'Active:.*\(running\)' | cut -d';' -f2)
|
||||
! [ -z "${_RESULT}" ] && echo "OK#UPTIME:${_RESULT}" || echo "FAIL"
|
||||
}
|
||||
|
||||
#checkViaHTTP && exit 0
|
||||
#checkViaHTTPS && exit 0
|
||||
checkViaSSH && exit 0
|
||||
|
||||
exit 1
|
||||
9
script/monitor/generic/PING_CHECK.sh
Executable file
9
script/monitor/generic/PING_CHECK.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
_SERVER="${1:?"FQDN of server missing"}"
|
||||
|
||||
# -4 Use IPv4
|
||||
# -W SECONDS Wait seconds for an answer
|
||||
# -c COUNT_VALUE Count of pings being executed
|
||||
_RESULT="$(ping -4 -W 1 -c 1 "${_SERVER}" | grep "time=" | cut -d'=' -f4)"
|
||||
! [ -z "${_RESULT}" ] && echo "OK#RTT: ${_RESULT}" || echo "FAIL#PLEASE USE FALLBACK!"
|
||||
37
script/monitor/generic/POSTGRES_CERTIFICATE_CHECK.sh
Executable file
37
script/monitor/generic/POSTGRES_CERTIFICATE_CHECK.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
function checkPostgresSSLCertificate() {
|
||||
local _SERVER
|
||||
_SERVER="${1:?"FQDN of server missing"}"
|
||||
readonly _SERVER
|
||||
|
||||
local _RESULT
|
||||
_RESULT="$(echo | openssl s_client -starttls postgres -connect "${_SERVER}":5432 -servername "${_SERVER}" 2> /dev/null | openssl x509 -noout -enddate | grep -F 'notAfter=' | cut -d'=' -f2)"
|
||||
readonly _RESULT
|
||||
|
||||
[ -z "${_RESULT}" ] \
|
||||
&& echo "FAIL#Unable to get cert's end date from ${_SERVER}:5432" \
|
||||
&& return 1
|
||||
|
||||
local _ENDDATE
|
||||
_ENDDATE="$(date --date="${_RESULT}" --utc +%s)"
|
||||
readonly _ENDDATE
|
||||
|
||||
! echo "${_ENDDATE}" | grep -q -E "^[0-9]*$" \
|
||||
&& echo "FAIL#Unable to parse end date of certificate" \
|
||||
&& return 1
|
||||
|
||||
local _NOW _REMAINING_DAYS
|
||||
_NOW="$(date --date now +%s)"
|
||||
_REMAINING_DAYS="$(( (_ENDDATE - _NOW) / 86400 ))"
|
||||
readonly _NOW _REMAINING_DAYS
|
||||
|
||||
[ -z "${_REMAINING_DAYS}" ] \
|
||||
&& echo "WARN#Only ${_REMAINING_DAYS} days left" \
|
||||
&& return 1
|
||||
|
||||
echo "OK#${_REMAINING_DAYS} days remaining"
|
||||
return 0
|
||||
}
|
||||
|
||||
checkPostgresSSLCertificate "${@}" && exit 0 || exit 1
|
||||
62
script/monitor/generic/URL_CHECK.sh
Executable file
62
script/monitor/generic/URL_CHECK.sh
Executable file
@@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
|
||||
#curl:
|
||||
# --connect-timeout SECONDS Maximum time allowed for connection
|
||||
# -k Allow connections to SSL sites without certs (H)
|
||||
# -L Follow redirects (H)
|
||||
# --max-time SECONDS Maximum time allowed for the transfer
|
||||
# -s Silent mode. Don't output anything
|
||||
# --head Show head information only
|
||||
# --no-progress-meter Clean output for grep
|
||||
|
||||
#grep:
|
||||
# -q Quite, no output just status codes
|
||||
# -F Interpret search term as plain text
|
||||
function checkUrl() {
|
||||
local _URL _SEARCH_STRING
|
||||
_URL="${1:?"URL of site missing"}"
|
||||
_SEARCH_STRING="${2}"
|
||||
readonly _URL _SEARCH_STRING
|
||||
|
||||
local _RESULT
|
||||
if [ -z "${_SEARCH_STRING}" ]; then
|
||||
_RESULT="$(curl --connect-timeout 10 --max-time 10 --no-progress-meter --verbose "${_URL}" 2>&1 | grep -o -E "(expire.*|HTTP.*200 OK)")"
|
||||
else
|
||||
_RESULT="$(curl --connect-timeout 10 --max-time 10 --no-progress-meter --verbose "${_URL}" 2>&1 | grep -o -E "(expire.*|HTTP.*200 OK|${_SEARCH_STRING})")"
|
||||
fi
|
||||
readonly _RESULT
|
||||
|
||||
! echo "${_RESULT}" | grep -q -F '200 OK' \
|
||||
&& echo "FAIL#Status code 200 not found" \
|
||||
&& return 1
|
||||
|
||||
! [ -z "${_SEARCH_STRING}" ] \
|
||||
&& ! echo "${_RESULT}" | grep -q -F "${_SEARCH_STRING}" \
|
||||
&& echo "FAIL#Search string not found" \
|
||||
&& return 1
|
||||
|
||||
local _ENDDATE
|
||||
_ENDDATE="$(echo "${_RESULT}" | grep -F 'expire' | cut -d':' -f2-)"
|
||||
_ENDDATE="$(date --date="${_ENDDATE}" --utc +%s)"
|
||||
readonly _ENDDATE
|
||||
|
||||
! echo "${_ENDDATE}" | grep -q -E "^[0-9]*$" \
|
||||
&& echo "FAIL#Unable to parse end date of certificate" \
|
||||
&& return 1
|
||||
|
||||
local _NOW _REMAINING_DAYS
|
||||
_NOW="$(date --date now +%s)"
|
||||
_REMAINING_DAYS="$(( (_ENDDATE - _NOW) / 86400 ))"
|
||||
readonly _NOW _REMAINING_DAYS
|
||||
|
||||
# less than 30 days remaining => should be warned
|
||||
[ "${_REMAINING_DAYS}" -le "30" ] \
|
||||
&& echo "WARN#Certificate: only ${_REMAINING_DAYS} days left" \
|
||||
&& return 1
|
||||
|
||||
echo "OK#Certificate: ${_REMAINING_DAYS} days remaining"
|
||||
return 0
|
||||
}
|
||||
|
||||
#((curl --connect-timeout 10 --max-time 10 -k -s --head --no-progress-meter "${_URL}" | grep -qF '200 OK') && echo OK) || echo FAIL
|
||||
checkUrl "${1}" "${2}" && exit 0 || exit 1
|
||||
50
script/monitor/generic/ZFS_POOL_CHECK.sh
Executable file
50
script/monitor/generic/ZFS_POOL_CHECK.sh
Executable file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
|
||||
_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}"
|
||||
_ZFS_POOL="${2:?"Name of zfs pool missing: e.g. zpool1"}"
|
||||
_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_HOST}:"
|
||||
_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin
|
||||
_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_PORT:-"22"}"
|
||||
_REMOTE_USER="monitoring"
|
||||
_SOCKET='~/.ssh/%r@%h:%p'
|
||||
|
||||
|
||||
|
||||
function checkOrStartSSHMaster() {
|
||||
timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \
|
||||
&& return 0
|
||||
|
||||
ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null
|
||||
ssh -o ControlMaster=auto \
|
||||
-o ControlPath=${_SOCKET} \
|
||||
-o ControlPersist=65 \
|
||||
-p ${_REMOTE_PORT} \
|
||||
-f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \
|
||||
&& return 0
|
||||
|
||||
echo "FAIL#SSH connection (setup ok?)"
|
||||
return 1
|
||||
}
|
||||
|
||||
function testPool(){
|
||||
checkOrStartSSHMaster \
|
||||
|| return 1
|
||||
|
||||
local _RESPONSE="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'zpool status ${_ZFS_POOL} | grep -F scrub')"
|
||||
local _RESULT=$(echo "${_RESPONSE}" | grep -F 'scrub repaired 0B' | grep -F '0 errors')
|
||||
_RESULT="${_RESULT#*on}" #Removes shortest matching pattern '*on' from the begin
|
||||
|
||||
[ -z "${_RESULT}" ] \
|
||||
&& echo "FAIL#CHECK POOL: ${_ZFS_POOL}" \
|
||||
&& return 0
|
||||
|
||||
echo "OK#Scrubbed on ${_RESULT}."
|
||||
return 0
|
||||
}
|
||||
|
||||
testPool && exit 0
|
||||
|
||||
exit 1
|
||||
106
script/monitor/generic/ZFS_SYNC_CHECK.sh
Executable file
106
script/monitor/generic/ZFS_SYNC_CHECK.sh
Executable file
@@ -0,0 +1,106 @@
|
||||
#!/bin/bash
|
||||
|
||||
_SCRIPT="$(readlink -f "${0}" 2> /dev/null)"
|
||||
|
||||
# Folders always ends with an tailing '/'
|
||||
_CIS_ROOT="${_SCRIPT%%/script/monitor/*}/" #Removes longest matching pattern '/script/monitor/*' from the end
|
||||
_DOMAIN="$("${_CIS_ROOT:?"Missing CIS_ROOT"}core/printOwnDomain.sh")"
|
||||
_COMPOSITIONS="${_CIS_ROOT:?"Missing CIS_ROOT"}definitions/${_DOMAIN:?"Missing DOMAIN"}/compositions/"
|
||||
|
||||
_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}"
|
||||
_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_HOST}:"
|
||||
_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin
|
||||
_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_PORT:-"22"}"
|
||||
_REMOTE_USER="monitoring"
|
||||
_SOCKET='~/.ssh/%r@%h:%p'
|
||||
|
||||
# This is crucial:
|
||||
# - default value for the filter part is extracted from the first parameter (FQDN)
|
||||
# - but you can override this part to to adapt the test during a change of the domain.
|
||||
# (e.g. the short hostname can be an option - or even a better default in the future)
|
||||
_ZFS_SNAPSHOT_FILTER="@SYNC_${2:-"${_REMOTE_HOSTNAME_FQDN:?"Missing REMOTE_HOSTNAME_FQDN"}"}"
|
||||
|
||||
_MODE="${3:-"normal"}"
|
||||
_NOW_UTC_UNIXTIME=$(date -u +%s)
|
||||
_DEBUG_PATH="/tmp/monitor/"
|
||||
|
||||
|
||||
|
||||
function checkOrStartSSHMaster() {
|
||||
timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \
|
||||
&& return 0
|
||||
|
||||
ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null
|
||||
ssh -o ControlMaster=auto \
|
||||
-o ControlPath=${_SOCKET} \
|
||||
-o ControlPersist=65 \
|
||||
-p ${_REMOTE_PORT} \
|
||||
-f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \
|
||||
&& return 0
|
||||
|
||||
echo "FAIL#SSH connection (setup ok?)"
|
||||
return 1
|
||||
}
|
||||
|
||||
function checkSync() {
|
||||
checkOrStartSSHMaster \
|
||||
|| return 1
|
||||
|
||||
[ "${_MODE}" == "debug" ] \
|
||||
&& mkdir -p "${_DEBUG_PATH}" > /dev/null \
|
||||
&& echo "Now: ${_NOW_UTC_UNIXTIME}" > ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt
|
||||
|
||||
! [ -d "${_COMPOSITIONS:?"Missing COMPOSITIONS"}" ] \
|
||||
&& echo "WARN#no compositions" \
|
||||
&& return 0
|
||||
|
||||
[ "${_MODE}" == "debug" ] \
|
||||
&& echo "Snapshot filter: ${_ZFS_SNAPSHOT_FILTER}" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt
|
||||
|
||||
# This retrieves the list of the interesting snapshots including creation timestamp
|
||||
_SNAPSHOTS="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} zfs list -po creation,name -r -t snapshot zpool1/persistent | grep -F ${_ZFS_SNAPSHOT_FILTER})"
|
||||
[ "${_MODE}" == "debug" ] \
|
||||
&& echo "${_SNAPSHOTS}" > ${_DEBUG_PATH}SNAPSHOTS_${_REMOTE_HOSTNAME_FQDN}.txt
|
||||
|
||||
[ -z "${_SNAPSHOTS}" ] \
|
||||
&& echo "FAIL#no snapshots" \
|
||||
&& return 1
|
||||
|
||||
echo "OK#Checks running"
|
||||
|
||||
for _COMPOSITION_PATH in ${_COMPOSITIONS}*; do
|
||||
|
||||
# If remote host is found than it is responsible for this container-composition, otherwise skip
|
||||
# (grep -E "^[[:blank:]]*something" means. Line has to start with "something", leading blank chars are ok.)
|
||||
grep -E "^[[:blank:]]*${_REMOTE_HOSTNAME_SHORT}" "${_COMPOSITION_PATH}/zfssync-hosts" &> /dev/null \
|
||||
|| continue;
|
||||
|
||||
_COMPOSITION_NAME="${_COMPOSITION_PATH##*/}" #Removes longest matching pattern '*/' from the begin
|
||||
_LAST_SNAPSHOT_UNIXTIME="$(echo "${_SNAPSHOTS}" | grep ${_COMPOSITION_NAME} | tail -n 1 | cut -d' ' -f1)"
|
||||
_SECONDS_BEHIND=$[ ${_NOW_UTC_UNIXTIME} - ${_LAST_SNAPSHOT_UNIXTIME} ]
|
||||
|
||||
[ "${_MODE}" == "debug" ] \
|
||||
&& echo "${_LAST_SNAPSHOT_UNIXTIME} ${_COMPOSITION_NAME} on ${_REMOTE_HOSTNAME_FQDN} behind: ${_SECONDS_BEHIND}s" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt
|
||||
|
||||
[ "${_SECONDS_BEHIND}" -lt 40 ] \
|
||||
&& continue
|
||||
|
||||
[ "${_SECONDS_BEHIND}" -lt 60 ] \
|
||||
&& echo "ZFSSYNC_of_${_REMOTE_HOSTNAME_SHORT}_LAGGING?WARN#${_COMPOSITION_NAME} ${_SECONDS_BEHIND}s" \
|
||||
&& continue
|
||||
|
||||
echo "ZFSSYNC_of_${_REMOTE_HOSTNAME_SHORT}_LAGGING?FAIL#${_COMPOSITION_NAME} ${_SECONDS_BEHIND}s"
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
|
||||
RESULTS="$(checkSync)"
|
||||
|
||||
[ "${_MODE}" == "debug" ] \
|
||||
&& echo "$RESULTS" > ${_DEBUG_PATH}RESULTS_${_REMOTE_HOSTNAME_FQDN}.txt
|
||||
|
||||
echo "$RESULTS"
|
||||
57
script/monitor/generic/ZFS_USAGE_CHECK.sh
Executable file
57
script/monitor/generic/ZFS_USAGE_CHECK.sh
Executable file
@@ -0,0 +1,57 @@
|
||||
#!/bin/bash
|
||||
|
||||
_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}"
|
||||
_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_HOST}:"
|
||||
_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin
|
||||
_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end
|
||||
_REMOTE_PORT="${_REMOTE_PORT:-"22"}"
|
||||
_REMOTE_USER="monitoring"
|
||||
_SOCKET='~/.ssh/%r@%h:%p'
|
||||
|
||||
|
||||
|
||||
function checkOrStartSSHMaster() {
|
||||
timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \
|
||||
&& return 0
|
||||
|
||||
ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null
|
||||
ssh -o ControlMaster=auto \
|
||||
-o ControlPath=${_SOCKET} \
|
||||
-o ControlPersist=65 \
|
||||
-p ${_REMOTE_PORT} \
|
||||
-f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \
|
||||
&& return 0
|
||||
|
||||
echo "FAIL#SSH connection (setup ok?)"
|
||||
return 1
|
||||
}
|
||||
|
||||
function testSpace(){
|
||||
checkOrStartSSHMaster \
|
||||
|| return 1
|
||||
|
||||
local _RESULT="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'zpool list -H -o capacity,name')"
|
||||
local _SPACE_USED=$(echo "${_RESULT}" | /usr/bin/tail -n 1 | /usr/bin/cut -f1)
|
||||
local _POOL=$(echo "${_RESULT}" | /usr/bin/tail -n 1 | /usr/bin/cut -f2)
|
||||
|
||||
[ -z "${_SPACE_USED}" ] \
|
||||
&& echo "FAIL#NO value" \
|
||||
&& return 0
|
||||
|
||||
[ "${1:?"Missing OK_THRESHOLD"}" -ge "${_SPACE_USED%\%*}" ] \
|
||||
&& echo "OK#${_SPACE_USED} used ${_POOL}." \
|
||||
&& return 0
|
||||
|
||||
[ "${2:?"Missing INFO_THRESHOLD"}" -ge "${_SPACE_USED%\%*}" ] \
|
||||
&& echo "INFO#${_SPACE_USED} already used ${_POOL}." \
|
||||
&& return 0
|
||||
|
||||
echo "FAIL#${_SPACE_USED} used ${_POOL}!"
|
||||
return 0
|
||||
}
|
||||
|
||||
testSpace 80 90 && exit 0
|
||||
|
||||
exit 1
|
||||
Reference in New Issue
Block a user