diff --git a/script/monitor/README.md b/script/monitor/README.md new file mode 100644 index 0000000..818af5f --- /dev/null +++ b/script/monitor/README.md @@ -0,0 +1,33 @@ + +Monitoring - How it works +========================= + +Basics +------ + +You have to set up the monitoring host first. That host will monitor your other machines. +Execute `/cis/script/monitor/setupMonitoringHost.sh` to start the process. + +As usual you can configure this feature via definitions. +``` +# Path of this feature's scripts : '/cis/script /monitor' +# Path of the corresponding definitions: '/cis/definitions/YOUR.DOMAIN/monitor' +ls -lha '/cis/script/monitor' +ls -lha '/cis/definitions/YOUR.DOMAIN/monitor' +``` + +You can modify the appearance and place your own `check.css` or `logo.png` into the definitions folder: + + - /cis/definitions/YOUR.DOMAIN/monitor/check.css + - /cis/definitions/YOUR.DOMAIN/monitor/logo.png + +After the change, you have to call `/cis/script/monitor/setupMonitoringHost.sh` again, +because it creates links in '/var/www/html/' and gives the definitions priority over the script. +Additional you need to configure a webserver to publish the site. + + + +Dashboard +--------- + +You can set up an dashboard following this manual [SETUP_DASHBOARD.md](SETUP_DASHBOARD.md) diff --git a/script/monitor/SETUP_DASHBOARD.md b/script/monitor/SETUP_DASHBOARD.md new file mode 100644 index 0000000..55a46d4 --- /dev/null +++ b/script/monitor/SETUP_DASHBOARD.md @@ -0,0 +1,126 @@ + +How to setup a monitoring dashboard +=================================== + +Inspired by: https://pimylifeup.com/ubuntu-chromium-kiosk/ + +Steps +----- + + + +### 1.) Install Ubuntu Server (no desktop) on your computer than set hostname and timezone. + +```sh +hostnamectl set-hostname check.local +timedatectl set-timezone Europe/Berlin +``` + + + +### 2.) Install minimal GUI and Tools. + +```sh +apt install ubuntu-desktop-minimal +apt install language-pack-gnome-de +apt install xdotool +apt install dbus-x11 +``` + + + +### 3.) Create a kiosk user with home-directory. + +```sh +useradd -m kiosk +``` + +and disable Welocme-Screen +```sh +echo "yes" > /home/kiosk/.config/gnome-initial-setup-done +``` + + + +### 4.) Edit following file `nano /etc/gdm3/custom.conf` to turn of wayland and turn on autologin for user 'kiosk'. + +``` +[daemon] +# Uncomment the line below to force the login screen to use Xorg +#WaylandEnable=false + +WaylandEnable=false + +# Enabling automatic login +# AutomaticLoginEnable = true +# AutomaticLogin = user1 + +AutomaticLoginEnable = true +AutomaticLogin = kiosk +``` + + + +### 5.) Configure GUI of user kiosk to prevent monitor from sleeping + +```sh +#gsettings list-recursively + +# Does not work +#sudo -u kiosk gsettings set org.gnome.desktop.session idle-delay 0 + +# Set idle-delay from "uint32 300" to "uint32 0", needs 'apt install dbus-x11' +# You can check the value in "GUI-Session of kiosk -> Settings -> Power" +sudo -u kiosk dbus-launch dconf write /org/gnome/desktop/session/idle-delay "uint32 0" +``` + + + +### 6.) Create custom service to start firefox loading the page. + +Therefore create a file `/etc/systemd/system/kiosk.service` with this content: + +``` +[Unit] +Description=Firefox Kiosk +Wants=graphical.target +After=graphical.target + +[Service] +Environment=DISPLAY=:0 +# Set firefox language, needs 'apt install language-pack-gnome-de' +Environment=LANG=de_DE.UTF-8 +Type=simple +# Always a fresh firefox ('-' allow error if common does not exist) +ExecStartPre=-/usr/bin/rm -r /home/kiosk/snap/firefox/common +# Move Mouse (should also work on small screens), needs 'apt install dbus-x11' +ExecStartPre=/usr/bin/xdotool mousemove 4096 2160 +# See: https://wiki.mozilla.org/Firefox/CommandLineOptions (just -kiosk URL => Start-Assistant, so use -url too) +ExecStart=/usr/bin/firefox -fullscreen -kiosk -url http://monitor.example.net/check.html +Restart=always +RestartSec=30 +User=kiosk +Group=kiosk + +[Install] +WantedBy=graphical.target +``` + + + +### 7.) Enable the service and reboot + +```sh +systemctl enable kiosk +reboot +``` + + + +Troubleshouting +--------------- + +``` +systemctl disable pd-mapper.service +apt purge cloud-init -y && apt autoremove --purge -y +``` diff --git a/script/monitor/check.css b/script/monitor/check.css new file mode 100644 index 0000000..f1f5ba3 --- /dev/null +++ b/script/monitor/check.css @@ -0,0 +1,77 @@ +html, body { + --background-theme-color: #001EA0; + --cell-space: 20px; + --logo-height: 50px; + + background-color: #cccccc; + font-family: Verdana; + font-size: 14pt; + color: #ffffff; + height: 100%; + margin: 0; +} +@media screen and (orientation: portrait) { + body { + zoom: 200% + } +} +#header { + background-color: var(--background-theme-color); + position: sticky; + top: 0; + height: calc(var(--logo-height) + (2 * var(--cell-space))); + width: 100%; +} +#header img { + height: var(--logo-height); + margin: var(--cell-space); + vertical-align: middle; +} +#header h1 { + display: inline; + font-weight: normal; + vertical-align: middle; +} +#content { + min-height: 100%; +} +#footer { + background-color: var(--background-theme-color); + position: sticky; + bottom: 0px; + padding: var(--cell-space); + text-align: center; + vertical-align: middle; + font-size: 22pt; +} +#checks { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(350px, 1fr)); + padding: var(--cell-space); + grid-gap: var(--cell-space); +} +#checks > div { + border: 1px solid black; + border-radius: 10px; + padding: 10px; + text-align: center; + box-shadow: 0 2px 5px 0 rgba(0, 0, 0, 0.3), 0 2px 10px 0 rgba(0, 0, 0, 0.2); +} +#checks > div.ok { + background-color: #66aa22; + color: #222222; +} +#checks > div.info { + background-color: #88cc44; + color: #222222; +} +#checks > div.warn { + background-color: #ffdd00; + color: #222222; +} +#checks > div.fail { + background-color: #ff0000; +} +#checks > div.timeout { + background-color: var(--background-theme-color); +} diff --git a/script/monitor/check.html b/script/monitor/check.html new file mode 100644 index 0000000..df93be0 --- /dev/null +++ b/script/monitor/check.html @@ -0,0 +1,122 @@ + + + + Monitoring Dashboard + + + + +
+
+
Loading...
+
+
+ + + + + diff --git a/script/monitor/check.sh b/script/monitor/check.sh new file mode 100755 index 0000000..e21ee9b --- /dev/null +++ b/script/monitor/check.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +# Folders always ends with an tailing '/' +_SCRIPT="$(readlink -f "${0}" 2> /dev/null)" +_CIS_ROOT="${_SCRIPT%%/script/monitor/*}/" #Removes longest matching pattern '/script/monitor/*' from the end +_CORE_SCRIPTS="${_CIS_ROOT:?"Missing CIS_ROOT"}core/" +_CURRENT_DOMAIN="$("${_CORE_SCRIPTS:?"Missing CORE_SCRIPTS"}printOwnDomain.sh")" +_DEFINITIONS="${_CIS_ROOT:?"Missing CIS_ROOT"}definitions/${_CURRENT_DOMAIN:?"Missing CURRENT_DOMAIN"}/" + +# Checks for the entire domain +_DOMAIN_CHECKS="${_DEFINITIONS:?"Missing DEFINITIONS"}monitor/checks/" + + + +function doChecks(){ + local readonly _TMPDIR="${1:?"doChecks(): Missing parameter TMPDIR:"}" + + local _DATETIME=$(date +%H-%M-%S) + + mkdir -p ${_TMPDIR} + rm ${_TMPDIR}/* > /dev/null 2>&1 + + for check in ${_DOMAIN_CHECKS}*.on + do + local _CHECK_FILENAME="${check##*/}" + echo -n "${_CHECK_FILENAME%%.on}?" > "${_TMPDIR}/${_CHECK_FILENAME}" + timeout -k 10s 20s bash ${check} >> "${_TMPDIR}/${_CHECK_FILENAME}" 2> /dev/null || echo "TIMEOUT#Timeout" >> "${_TMPDIR}/${_CHECK_FILENAME}" & + done + wait + + local _FAILED=0 + echo "CHECK?RESULT[#MESSAGE]:" + echo "-----------------------" + for resultFile in ${_TMPDIR}/* + do + cat "${resultFile}" + grep -q "FAIL" ${resultFile} && _FAILED=$(expr ${_FAILED} + 1) + done + echo "MISSED?${_FAILED}#${_DATETIME}" + + rm -r ${_TMPDIR} > /dev/null 2>&1 + return 0 +} + +function usage(){ + printf "\nUsage: /monitoring/check.sh " + echo + echo "possible commands:" + echo + echo "- all" + echo " Executes all checks." + echo "- auto " + echo " Executes quiet all checks and saves the result in the given out_file." + echo " (e.g. add the following line to crontab: '* * * * * /cis/script/monitor/check.sh auto /var/www/html/check.txt'" + echo " to update the file '/var/www/html/check.txt' every minute as 'check.html' needs it.)" + return 0 +} + +main(){ + case "${1:-""}" in + all) + echo "Checks werden ausgeführt..." \ + && echo \ + && doChecks "/tmp/checks" color \ + && echo \ + && echo "Success" \ + && return 0 + ;; + auto) + # If just a filename is given it is created in /tmp, because of 'cd /tmp' + cd /tmp \ + && doChecks "/tmp/checks$(date +%N)" > "$2.new" \ + && mv -f "$2.new" "$2" \ + && return 0 + return 1 + ;; + *) + [ "${1:+isset}" == "isset" ] \ + && echo "Parameter '${1}' ist kein gültiger Befehl." + usage + return 0 + ;; + esac + + return 1 +} + +main "$@" || exit 1 diff --git a/script/monitor/checks/EXAMPLE_CHECK.off b/script/monitor/checks/EXAMPLE_CHECK.off new file mode 100644 index 0000000..527c3a2 --- /dev/null +++ b/script/monitor/checks/EXAMPLE_CHECK.off @@ -0,0 +1,9 @@ +#!/bin/bash + +_CHECK="$(readlink -f "${0}" 2> /dev/null)" + +# Folders always ends with an tailing '/' +_CIS_ROOT="${_CHECK%%/definitions/*}/" #Removes longest matching pattern '/definitions/*' from the end +_GENERIC_CHECKS="${_CIS_ROOT:?"Missing CIS_ROOT"}script/monitor/generic/" + +${_GENERIC_CHECKS:?"Missing GENERIC_CHECKS"}OVERRIDDEN_DOMAIN_CHECK.sh "your-host.your-domain.net" diff --git a/script/monitor/generic/CIS_OWN_DOMAIN_CHECK.sh b/script/monitor/generic/CIS_OWN_DOMAIN_CHECK.sh new file mode 100755 index 0000000..48810d1 --- /dev/null +++ b/script/monitor/generic/CIS_OWN_DOMAIN_CHECK.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" +_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end +_REMOTE_PORT="${_REMOTE_HOST}:" +_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin +_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_PORT="${_REMOTE_PORT:-"22"}" +_REMOTE_USER="monitoring" +_SOCKET='~/.ssh/%r@%h:%p' + + + +function checkOrStartSSHMaster() { + timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \ + && return 0 + + ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null + ssh -o ControlMaster=auto \ + -o ControlPath=${_SOCKET} \ + -o ControlPersist=65 \ + -p ${_REMOTE_PORT} \ + -f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \ + && return 0 + + echo "FAIL#SSH connection (setup ok?)" + return 1 +} + +function testDomain(){ + checkOrStartSSHMaster \ + || return 1 + + local _RESULT="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'bash /cis/core/printOwnDomain.sh' 2>&1 1>/dev/null)" + + [ -z "${_RESULT}" ] \ + && echo "OK" \ + && return 0 + + echo "WARNING#Check hosts '/cis/core/printOwnDomain'" + return 0 +} + +testDomain && exit 0 diff --git a/script/monitor/generic/NGINX_CHECK.sh b/script/monitor/generic/NGINX_CHECK.sh new file mode 100755 index 0000000..faa6522 --- /dev/null +++ b/script/monitor/generic/NGINX_CHECK.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" +_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end +_REMOTE_PORT="${_REMOTE_HOST}:" +_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin +_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_PORT="${_REMOTE_PORT:-"22"}" +_REMOTE_USER="monitoring" +_SOCKET='~/.ssh/%r@%h:%p' + + + +function checkOrStartSSHMaster() { + timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \ + && return 0 + + ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null + ssh -o ControlMaster=auto \ + -o ControlPath=${_SOCKET} \ + -o ControlPersist=65 \ + -p ${_REMOTE_PORT} \ + -f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \ + && return 0 + + echo "FAIL#SSH connection (setup ok?)" + return 1 +} + +function checkViaHTTP() { + _STATUS="$(curl -I http://${_REMOTE_HOSTNAME_FQDN} 2>/dev/null | head -n 1 | cut -d$' ' -f2)" + [ "${_STATUS}" == "200" ] \ + && echo "OK" \ + && return 0 + + return 1 +} + +function checkViaHTTPS() { + _STATUS="$(curl -k -I https://${_REMOTE_HOSTNAME_FQDN} 2>/dev/null | head -n 1 | cut -d$' ' -f2)" + [ "${_STATUS}" == "200" ] \ + && echo "OK" \ + && return 0 + + return 1 +} + +#grep: +# -E Use regexp, '.*' => any chars between 'Active:' and '(running)', the round brackets are escaped. + +#cut: +# -d Delimiter, marker where to cut (here ;) +# -f Index of column to show (One based, so there is no -f0) +function checkViaSSH() { + checkOrStartSSHMaster \ + || return 1 + + _RESULT=$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'systemctl status nginx.service' | grep -E 'Active:.*\(running\)' | cut -d';' -f2) + ! [ -z "${_RESULT}" ] && echo "OK#UPTIME:${_RESULT}" || echo "FAIL" +} + +#checkViaHTTP && exit 0 +#checkViaHTTPS && exit 0 +checkViaSSH && exit 0 + +exit 1 diff --git a/script/monitor/generic/PING_CHECK.sh b/script/monitor/generic/PING_CHECK.sh new file mode 100755 index 0000000..275230b --- /dev/null +++ b/script/monitor/generic/PING_CHECK.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +_SERVER="${1:?"FQDN of server missing"}" + +# -4 Use IPv4 +# -W SECONDS Wait seconds for an answer +# -c COUNT_VALUE Count of pings being executed +_RESULT="$(ping -4 -W 1 -c 1 "${_SERVER}" | grep "time=" | cut -d'=' -f4)" +! [ -z "${_RESULT}" ] && echo "OK#RTT: ${_RESULT}" || echo "FAIL#PLEASE USE FALLBACK!" diff --git a/script/monitor/generic/POSTGRES_CERTIFICATE_CHECK.sh b/script/monitor/generic/POSTGRES_CERTIFICATE_CHECK.sh new file mode 100755 index 0000000..0a8cad0 --- /dev/null +++ b/script/monitor/generic/POSTGRES_CERTIFICATE_CHECK.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +function checkPostgresSSLCertificate() { + local _SERVER + _SERVER="${1:?"FQDN of server missing"}" + readonly _SERVER + + local _RESULT + _RESULT="$(echo | openssl s_client -starttls postgres -connect "${_SERVER}":5432 -servername "${_SERVER}" 2> /dev/null | openssl x509 -noout -enddate | grep -F 'notAfter=' | cut -d'=' -f2)" + readonly _RESULT + + [ -z "${_RESULT}" ] \ + && echo "FAIL#Unable to get cert's end date from ${_SERVER}:5432" \ + && return 1 + + local _ENDDATE + _ENDDATE="$(date --date="${_RESULT}" --utc +%s)" + readonly _ENDDATE + + ! echo "${_ENDDATE}" | grep -q -E "^[0-9]*$" \ + && echo "FAIL#Unable to parse end date of certificate" \ + && return 1 + + local _NOW _REMAINING_DAYS + _NOW="$(date --date now +%s)" + _REMAINING_DAYS="$(( (_ENDDATE - _NOW) / 86400 ))" + readonly _NOW _REMAINING_DAYS + + [ -z "${_REMAINING_DAYS}" ] \ + && echo "WARN#Only ${_REMAINING_DAYS} days left" \ + && return 1 + + echo "OK#${_REMAINING_DAYS} days remaining" + return 0 +} + +checkPostgresSSLCertificate "${@}" && exit 0 || exit 1 diff --git a/script/monitor/generic/URL_CHECK.sh b/script/monitor/generic/URL_CHECK.sh new file mode 100755 index 0000000..ff30cc1 --- /dev/null +++ b/script/monitor/generic/URL_CHECK.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +#curl: +# --connect-timeout SECONDS Maximum time allowed for connection +# -k Allow connections to SSL sites without certs (H) +# -L Follow redirects (H) +# --max-time SECONDS Maximum time allowed for the transfer +# -s Silent mode. Don't output anything +# --head Show head information only +# --no-progress-meter Clean output for grep + +#grep: +# -q Quite, no output just status codes +# -F Interpret search term as plain text +function checkUrl() { + local _URL _SEARCH_STRING + _URL="${1:?"URL of site missing"}" + _SEARCH_STRING="${2}" + readonly _URL _SEARCH_STRING + + local _RESULT + if [ -z "${_SEARCH_STRING}" ]; then + _RESULT="$(curl --connect-timeout 10 --max-time 10 --no-progress-meter --verbose "${_URL}" 2>&1 | grep -o -E "(expire.*|HTTP.*200 OK)")" + else + _RESULT="$(curl --connect-timeout 10 --max-time 10 --no-progress-meter --verbose "${_URL}" 2>&1 | grep -o -E "(expire.*|HTTP.*200 OK|${_SEARCH_STRING})")" + fi + readonly _RESULT + + ! echo "${_RESULT}" | grep -q -F '200 OK' \ + && echo "FAIL#Status code 200 not found" \ + && return 1 + + ! [ -z "${_SEARCH_STRING}" ] \ + && ! echo "${_RESULT}" | grep -q -F "${_SEARCH_STRING}" \ + && echo "FAIL#Search string not found" \ + && return 1 + + local _ENDDATE + _ENDDATE="$(echo "${_RESULT}" | grep -F 'expire' | cut -d':' -f2-)" + _ENDDATE="$(date --date="${_ENDDATE}" --utc +%s)" + readonly _ENDDATE + + ! echo "${_ENDDATE}" | grep -q -E "^[0-9]*$" \ + && echo "FAIL#Unable to parse end date of certificate" \ + && return 1 + + local _NOW _REMAINING_DAYS + _NOW="$(date --date now +%s)" + _REMAINING_DAYS="$(( (_ENDDATE - _NOW) / 86400 ))" + readonly _NOW _REMAINING_DAYS + + # less than 30 days remaining => should be warned + [ "${_REMAINING_DAYS}" -le "30" ] \ + && echo "WARN#Certificate: only ${_REMAINING_DAYS} days left" \ + && return 1 + + echo "OK#Certificate: ${_REMAINING_DAYS} days remaining" + return 0 +} + +#((curl --connect-timeout 10 --max-time 10 -k -s --head --no-progress-meter "${_URL}" | grep -qF '200 OK') && echo OK) || echo FAIL +checkUrl "${1}" "${2}" && exit 0 || exit 1 diff --git a/script/monitor/generic/ZFS_POOL_CHECK.sh b/script/monitor/generic/ZFS_POOL_CHECK.sh new file mode 100755 index 0000000..164e2da --- /dev/null +++ b/script/monitor/generic/ZFS_POOL_CHECK.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" +_ZFS_POOL="${2:?"Name of zfs pool missing: e.g. zpool1"}" +_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end +_REMOTE_PORT="${_REMOTE_HOST}:" +_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin +_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_PORT="${_REMOTE_PORT:-"22"}" +_REMOTE_USER="monitoring" +_SOCKET='~/.ssh/%r@%h:%p' + + + +function checkOrStartSSHMaster() { + timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \ + && return 0 + + ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null + ssh -o ControlMaster=auto \ + -o ControlPath=${_SOCKET} \ + -o ControlPersist=65 \ + -p ${_REMOTE_PORT} \ + -f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \ + && return 0 + + echo "FAIL#SSH connection (setup ok?)" + return 1 +} + +function testPool(){ + checkOrStartSSHMaster \ + || return 1 + + local _RESPONSE="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'zpool status ${_ZFS_POOL} | grep -F scrub')" + local _RESULT=$(echo "${_RESPONSE}" | grep -F 'scrub repaired 0B' | grep -F '0 errors') + _RESULT="${_RESULT#*on}" #Removes shortest matching pattern '*on' from the begin + + [ -z "${_RESULT}" ] \ + && echo "FAIL#CHECK POOL: ${_ZFS_POOL}" \ + && return 0 + + echo "OK#Scrubbed on ${_RESULT}." + return 0 +} + +testPool && exit 0 + +exit 1 diff --git a/script/monitor/generic/ZFS_SYNC_CHECK.sh b/script/monitor/generic/ZFS_SYNC_CHECK.sh new file mode 100755 index 0000000..b524050 --- /dev/null +++ b/script/monitor/generic/ZFS_SYNC_CHECK.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +_SCRIPT="$(readlink -f "${0}" 2> /dev/null)" + +# Folders always ends with an tailing '/' +_CIS_ROOT="${_SCRIPT%%/script/monitor/*}/" #Removes longest matching pattern '/script/monitor/*' from the end +_DOMAIN="$("${_CIS_ROOT:?"Missing CIS_ROOT"}core/printOwnDomain.sh")" +_COMPOSITIONS="${_CIS_ROOT:?"Missing CIS_ROOT"}definitions/${_DOMAIN:?"Missing DOMAIN"}/compositions/" + +_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" +_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end +_REMOTE_PORT="${_REMOTE_HOST}:" +_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin +_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_PORT="${_REMOTE_PORT:-"22"}" +_REMOTE_USER="monitoring" +_SOCKET='~/.ssh/%r@%h:%p' + +# This is crucial: +# - default value for the filter part is extracted from the first parameter (FQDN) +# - but you can override this part to to adapt the test during a change of the domain. +# (e.g. the short hostname can be an option - or even a better default in the future) +_ZFS_SNAPSHOT_FILTER="@SYNC_${2:-"${_REMOTE_HOSTNAME_FQDN:?"Missing REMOTE_HOSTNAME_FQDN"}"}" + +_MODE="${3:-"normal"}" +_NOW_UTC_UNIXTIME=$(date -u +%s) +_DEBUG_PATH="/tmp/monitor/" + + + +function checkOrStartSSHMaster() { + timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \ + && return 0 + + ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null + ssh -o ControlMaster=auto \ + -o ControlPath=${_SOCKET} \ + -o ControlPersist=65 \ + -p ${_REMOTE_PORT} \ + -f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \ + && return 0 + + echo "FAIL#SSH connection (setup ok?)" + return 1 +} + +function checkSync() { + checkOrStartSSHMaster \ + || return 1 + + [ "${_MODE}" == "debug" ] \ + && mkdir -p "${_DEBUG_PATH}" > /dev/null \ + && echo "Now: ${_NOW_UTC_UNIXTIME}" > ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt + + ! [ -d "${_COMPOSITIONS:?"Missing COMPOSITIONS"}" ] \ + && echo "WARN#no compositions" \ + && return 0 + + [ "${_MODE}" == "debug" ] \ + && echo "Snapshot filter: ${_ZFS_SNAPSHOT_FILTER}" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt + + # This retrieves the list of the interesting snapshots including creation timestamp + _SNAPSHOTS="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} zfs list -po creation,name -r -t snapshot zpool1/persistent | grep -F ${_ZFS_SNAPSHOT_FILTER})" + [ "${_MODE}" == "debug" ] \ + && echo "${_SNAPSHOTS}" > ${_DEBUG_PATH}SNAPSHOTS_${_REMOTE_HOSTNAME_FQDN}.txt + + [ -z "${_SNAPSHOTS}" ] \ + && echo "FAIL#no snapshots" \ + && return 1 + + echo "OK#Checks running" + + for _COMPOSITION_PATH in ${_COMPOSITIONS}*; do + + # If remote host is found than it is responsible for this container-composition, otherwise skip + # (grep -E "^[[:blank:]]*something" means. Line has to start with "something", leading blank chars are ok.) + grep -E "^[[:blank:]]*${_REMOTE_HOSTNAME_SHORT}" "${_COMPOSITION_PATH}/zfssync-hosts" &> /dev/null \ + || continue; + + _COMPOSITION_NAME="${_COMPOSITION_PATH##*/}" #Removes longest matching pattern '*/' from the begin + _LAST_SNAPSHOT_UNIXTIME="$(echo "${_SNAPSHOTS}" | grep ${_COMPOSITION_NAME} | tail -n 1 | cut -d' ' -f1)" + _SECONDS_BEHIND=$[ ${_NOW_UTC_UNIXTIME} - ${_LAST_SNAPSHOT_UNIXTIME} ] + + [ "${_MODE}" == "debug" ] \ + && echo "${_LAST_SNAPSHOT_UNIXTIME} ${_COMPOSITION_NAME} on ${_REMOTE_HOSTNAME_FQDN} behind: ${_SECONDS_BEHIND}s" >> ${_DEBUG_PATH}SECONDS_BEHIND_${_REMOTE_HOSTNAME_FQDN}.txt + + [ "${_SECONDS_BEHIND}" -lt 40 ] \ + && continue + + [ "${_SECONDS_BEHIND}" -lt 60 ] \ + && echo "ZFSSYNC_of_${_REMOTE_HOSTNAME_SHORT}_LAGGING?WARN#${_COMPOSITION_NAME} ${_SECONDS_BEHIND}s" \ + && continue + + echo "ZFSSYNC_of_${_REMOTE_HOSTNAME_SHORT}_LAGGING?FAIL#${_COMPOSITION_NAME} ${_SECONDS_BEHIND}s" + done +} + + + +RESULTS="$(checkSync)" + +[ "${_MODE}" == "debug" ] \ + && echo "$RESULTS" > ${_DEBUG_PATH}RESULTS_${_REMOTE_HOSTNAME_FQDN}.txt + +echo "$RESULTS" diff --git a/script/monitor/generic/ZFS_USAGE_CHECK.sh b/script/monitor/generic/ZFS_USAGE_CHECK.sh new file mode 100755 index 0000000..b8ffc6c --- /dev/null +++ b/script/monitor/generic/ZFS_USAGE_CHECK.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +_REMOTE_HOST="${1:?"FQDN of server missing: e.g. host.example.net[:port]"}" +_REMOTE_HOSTNAME_FQDN="${_REMOTE_HOST%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_HOSTNAME_SHORT="${_REMOTE_HOSTNAME_FQDN%%.*}" #Removes longest matching pattern '.*' from the end +_REMOTE_PORT="${_REMOTE_HOST}:" +_REMOTE_PORT="${_REMOTE_PORT#*:}" #Removes shortest matching pattern '*:' from the begin +_REMOTE_PORT="${_REMOTE_PORT%%:*}" #Removes longest matching pattern ':*' from the end +_REMOTE_PORT="${_REMOTE_PORT:-"22"}" +_REMOTE_USER="monitoring" +_SOCKET='~/.ssh/%r@%h:%p' + + + +function checkOrStartSSHMaster() { + timeout --preserve-status 1 ssh -O check -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 2>&1 | grep -q -F 'Master running' \ + && return 0 + + ssh -O stop -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} &> /dev/null + ssh -o ControlMaster=auto \ + -o ControlPath=${_SOCKET} \ + -o ControlPersist=65 \ + -p ${_REMOTE_PORT} \ + -f ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} exit &> /dev/null \ + && return 0 + + echo "FAIL#SSH connection (setup ok?)" + return 1 +} + +function testSpace(){ + checkOrStartSSHMaster \ + || return 1 + + local _RESULT="$(ssh -S ${_SOCKET} -p ${_REMOTE_PORT} ${_REMOTE_USER}@${_REMOTE_HOSTNAME_FQDN} 'zpool list -H -o capacity,name')" + local _SPACE_USED=$(echo "${_RESULT}" | /usr/bin/tail -n 1 | /usr/bin/cut -f1) + local _POOL=$(echo "${_RESULT}" | /usr/bin/tail -n 1 | /usr/bin/cut -f2) + + [ -z "${_SPACE_USED}" ] \ + && echo "FAIL#NO value" \ + && return 0 + + [ "${1:?"Missing OK_THRESHOLD"}" -ge "${_SPACE_USED%\%*}" ] \ + && echo "OK#${_SPACE_USED} used ${_POOL}." \ + && return 0 + + [ "${2:?"Missing INFO_THRESHOLD"}" -ge "${_SPACE_USED%\%*}" ] \ + && echo "INFO#${_SPACE_USED} already used ${_POOL}." \ + && return 0 + + echo "FAIL#${_SPACE_USED} used ${_POOL}!" + return 0 +} + +testSpace 80 90 && exit 0 + +exit 1 diff --git a/script/monitor/logo.png b/script/monitor/logo.png new file mode 100644 index 0000000..4864f1d Binary files /dev/null and b/script/monitor/logo.png differ diff --git a/script/monitor/setupMonitoringHost.sh b/script/monitor/setupMonitoringHost.sh new file mode 100755 index 0000000..de1bfa4 --- /dev/null +++ b/script/monitor/setupMonitoringHost.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +[ "$(id -u)" != "0" ] \ + && sudo "${0}" \ + && exit 0 + + + +_SETUP="$(readlink -f "${0}" 2> /dev/null)" + +# Folders always ends with an tailing '/' +_CIS_ROOT="${_SETUP%%/script/monitor/*}/" #Removes longest matching pattern '/script/monitor/*' from the end +_DOMAIN="$("${_CIS_ROOT:?"Missing CIS_ROOT"}core/printOwnDomain.sh")" +_DEFINITIONS="${_CIS_ROOT:?"Missing CIS_ROOT"}definitions/${_DOMAIN:?"Missing DOMAIN"}/" + + + +function checkPreconditions() { + [ -d "${_DEFINITIONS:?"Missing DEFINITIONS"}monitor/checks" ] \ + && return 0 + + echo "No folder for your defined checks found: ${_DEFINITIONS:?"Missing DEFINITIONS"}monitor/checks" + echo "Please create it and add all your custom monitoring checks there, following this convention: 'NAME_OF_THE_CHECK.on'" + echo "A check has to be switched 'on' to be executed, so you can rename a check to 'NAME_OF_THE_CHECK.off' and it will be ignored." + echo + echo "You can copy the file '/cis/script/monitor/checks/EXAMPLE_CHECK.off' to your check definitions folder and modify it." + return 1 +} + + + +function printSelectedDefinition() { + local _FILE_DEFINED_DOMAIN _FILE_DEFINED_DEFAULT + _FILE_DEFINED_DOMAIN="${_DEFINITIONS:?"Missing DEFINITIONS"}monitor/${1:?"Missing CURRENT_FULLFILE"}" + _FILE_DEFINED_DEFAULT="${_CIS_ROOT:?"Missing CIS_ROOT"}script/monitor/${1:?"Missing CURRENT_FULLFILE"}" + readonly _FILE_DEFINED_DOMAIN _FILE_DEFINED_DEFAULT + + [ -s "${_FILE_DEFINED_DOMAIN}" ] \ + && echo "${_FILE_DEFINED_DOMAIN}" \ + && return 0 + + [ -s "${_FILE_DEFINED_DEFAULT}" ] \ + && echo "${_FILE_DEFINED_DEFAULT}" \ + && return 0 + + return 1 +} + +function setupPublicFile() { + ! [ -d "/var/www/html" ] \ + && echo "Missing folder '/var/www/html'. Is a webserver installed?" \ + && return 1 + + [ -L "/var/www/html/${1:?"Missing filename"}" ] \ + && [ "$(readlink -f /var/www/html/${1:?"Missing filename"})" == "$(printSelectedDefinition ${1:?"Missing filename"})" ] \ + && echo "Link '/var/www/html/${1:?"Missing filename"}' already exists pointing to the expected file:" \ + && echo " - '$(readlink -f /var/www/html/${1:?"Missing filename"})'" \ + && return 0 + + ln -f -s "$(printSelectedDefinition ${1:?"Missing filename"})" "/var/www/html/${1:?"Missing filename"}" \ + && echo "Link '/var/www/html/${1:?"Missing filename"}' created successfully:" \ + && echo " - '$(readlink -f /var/www/html/${1:?"Missing filename"})'" \ + && return 0 +} + +echo "Setup the monitoring host that monitors the others ... " \ + && checkPreconditions \ + && setupPublicFile "check.html" \ + && setupPublicFile "check.css" \ + && setupPublicFile "logo.png" \ + && exit 0 + +exit 1 diff --git a/script/monitor/setupServiceProvidingHost.sh b/script/monitor/setupServiceProvidingHost.sh new file mode 100755 index 0000000..ea34daf --- /dev/null +++ b/script/monitor/setupServiceProvidingHost.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +[ "$(id -u)" != "0" ] \ + && sudo "${0}" \ + && exit 0 + + + +_SETUP="$(readlink -f "${0}" 2> /dev/null)" + +# Folders always ends with an tailing '/' +_CIS_ROOT="${_SETUP%%/script/monitor/*}/" #Removes longest matching pattern '/script/monitor/*' from the end +_CORE_SCRIPTS="${_CIS_ROOT:?"Missing CIS_ROOT"}core/" +_DOMAIN="$("${_CIS_ROOT:?"Missing CIS_ROOT"}core/printOwnDomain.sh")" +_DEFINITIONS="${_CIS_ROOT:?"Missing CIS_ROOT"}definitions/${_DOMAIN:?"Missing DOMAIN"}/" + + + +echo "Setup the user and permission to enable the monitoring this host ... " \ + && "${_CORE_SCRIPTS:?"Missing CORE_SCRIPTS"}addNormalUser.sh" monitoring \ + && echo \ + && "${_CORE_SCRIPTS:?"Missing CORE_SCRIPTS"}defineAuthorizedKeysOfUser.sh" "${_DEFINITIONS}" monitoring \ + && exit 0 + +exit 1