From 699a2e16e8a83b0962f3265eb3cde6b446c81565 Mon Sep 17 00:00:00 2001 From: Robi Meier Date: Tue, 19 Mar 2024 22:02:37 +0100 Subject: [PATCH] Move and rename files and add state machine --- .gitignore | 3 + README.md | 56 +++++++------ .dyndns_env.EXAMPLE => config/dyndns.EXAMPLE | 0 .../file_monitor.EXAMPLE | 0 .../monitoring.EXAMPLE | 0 .../post_startup.EXAMPLE | 0 config/state_machine.EXAMPLE | 2 + .../system_health_check.EXAMPLE | 0 .../tg_notify.EXAMPLE | 0 .../zfs_health_check.EXAMPLE | 0 dyndns.sh | 17 ++-- file_monitor.sh | 22 +++--- functions/logging.sh | 19 +++++ helpers/state_machine.sh | 78 +++++++++++++++++++ .../tg_notify.sh | 18 ++--- monitoring.sh | 22 ++---- run-before-shutdown.service | 2 +- system_health_check.sh | 29 ++++--- zfs_health_check.sh | 23 +++--- 19 files changed, 189 insertions(+), 102 deletions(-) rename .dyndns_env.EXAMPLE => config/dyndns.EXAMPLE (100%) rename .file_monitor_env.EXAMPLE => config/file_monitor.EXAMPLE (100%) rename .monitoring_env.EXAMPLE => config/monitoring.EXAMPLE (100%) rename .post_startup_env.EXAMPLE => config/post_startup.EXAMPLE (100%) create mode 100644 config/state_machine.EXAMPLE rename .system_health_check_env.EXAMPLE => config/system_health_check.EXAMPLE (100%) rename .telegram_notification_env.EXAMPLE => config/tg_notify.EXAMPLE (100%) rename .zfs_health_check_env.EXAMPLE => config/zfs_health_check.EXAMPLE (100%) create mode 100755 functions/logging.sh create mode 100755 helpers/state_machine.sh rename telegram_notification.sh => helpers/tg_notify.sh (72%) diff --git a/.gitignore b/.gitignore index b9f89cb..dc8881f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ .last_changelog_read storage/ +log/ +config/* +!config/*.EXAMPLE # This file is unused atm docker_health_check.sh diff --git a/README.md b/README.md index 5f8154e..23cf09b 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ The covered tasks range from file change tracking via http/ssh monitoring to zfs ## Installation The scripts in this repo ***must*** be checked out into `/root/scripts`. +This is subject to change. +The scripts should be installable wherever. ```bash cd /root @@ -30,21 +32,25 @@ This will be helpful when updating the admin scripts later on. ### Config Files -For each script there is a `.[script_name]_env.EXAMPLE` file, which you must copy (remove `.EXAMPLE` part) and edit while providing your own information. +Config files are located in the `config/` directory. +Each script has its own cofiguration file, in this repo there only are the `config/[script_name].EXAMPLE` example configuration files. +For each script you want to use, you must copy the example and fill in your own data. +For example (`monitoring.sh`): ```bash -SCRIPT_NAME=zfs_health_check -cp /root/scripts/.${SCRIPT_NAME}_env.EXAMPLE /root/scripts/.${SCRIPT_NAME}_env -vim /root/scripts/.${SCRIPT_NAME}_env +cd /root/scripts +cp config/monitoring.EXAMPLE config/monitoring +vim config/monitoring ``` -If you want to use the example configuration, you could symbolic link the files instead of just copying them. -This really only makes sense for `.system_health_check`. + +If you want to use the provided example configuration, you could symbolic link the files instead of just copying them. +This really only makes sense for `system_health_check`. The command for this is: ```bash cd /root/scripts -ln -s .system_health_check.EXAMPLE .system_health_check +ln -s config/system_health_check.EXAMPLE config/system_health_check ``` ### Shutdown Notification @@ -64,7 +70,7 @@ To install the [startup helper script](#using-startup-helper) into the regular u ```bash USRNAME=radioelephant cp /root/scripts/post_startup.sh /home/$USRNAME/post_startup.sh -cp /root/scripts/.post_startup_env.EXAMPLE /home/$USRNAME/.post_startup_env +cp /root/scripts/config/post_startup.EXAMPLE /home/$USRNAME/.post_startup_env chown $USRNAME:$USRNAME /home/$USRNAME/post_startup.sh chown $USRNAME:$USRNAME /home/$USRNAME/.post_startup_env vim /home/$USRNAME/.post_startup_env @@ -83,10 +89,10 @@ cd /root/scripts git pull ``` -For most of the scripts you only need to check if the `.[script_name]_env.EXAMPLE` has changed and contains different keys than your copied `.[script_name]_env` file. -For your convenience, changes to environment variable files will be documented in the [CHANGELOG](CHANGELOG.md). -If you followed the instructions in this README, then you will find the last time you updated this repository in the `.last_changelog_read` file. -Read it with `cat /root/scripts/.last_changelog_read`. +For most of the scripts you only need to check if the example configuration file (i.e. `config/monitoring.EXAMPLE`) has changed and contains different keys than your copy (i.e. `config/monitoring`). +For your convenience, changes to configuration files will be documented in the [CHANGELOG](CHANGELOG.md). +If you followed the instructions in this README, then you will find the last time you pulled this repository and read the [CHANGELOG](CHANGELOG.md) in the `.last_changelog_read` file. +Get the value with `cat /root/scripts/.last_changelog_read`. **Make sure to update the last reading time file after reading the CHANGELOG with `date > /root/scripts/.last_changelog_read`** @@ -123,9 +129,11 @@ This will be noted in the CHANGELOG for your convenience. The check and monitoring scripts in this repo can be run periodically be run and if any problems are detected, they produce output. The output of these scripts can be redirected and used however you like. -Typically I redirect the output to the `telegram_notification.sh` script which notifies me of any noisy scripts. +Typically I redirect the output to the `helpers/tg_notify.sh` script which notifies me of any noisy scripts. +In case of expected repeating failures, I first redirect the ouput to `helpers/state_machine.sh "keyword"` which silences repeated messages. +The "state machine" only saves a copy of the last message per keyword and compares it to the next message. -Regardless of any problems each script also logs its executions in `/root/logs`. +Regardless of any problems each script also logs its executions under `logs/` Make sure you created this folder during [installation](#installation). ### Crontab Scheduling @@ -137,20 +145,20 @@ If you are unsure about the cron schedule, use [Crontab Guru](https://crontab.gu My current crontab looks like this: ```crontab -* * * * * bash -c '/root/scripts/file_monitor.sh | /root/scripts/telegram_notification.sh' -*/2 * * * * bash -c '/root/scripts/monitoring.sh | /root/scripts/telegram_notification.sh' -*/4 * * * * bash -c '/root/scripts/dyndns.sh | /root/scripts/telegram_notification.sh' -*/3 * * * * bash -c '/root/scripts/system_health_check.sh | /root/scripts/telegram_notification.sh' -15 * * * * bash -c '/root/scripts/docker_health_check.sh | /root/scripts/telegram_notification.sh' -*/15 * * * * bash -c '/root/scripts/zfs_health_check.sh | /root/scripts/telegram_notification.sh' +* * * * * bash -c 'cd /root/scripts && ./file_monitor.sh | ./helpers/tg_notify.sh' +*/2 * * * * bash -c 'cd /root/scripts && ./monitoring.sh | ./helpers/state_machine.sh "monitoring" | ./helpers/tg_notify.sh' +*/4 * * * * bash -c 'cd /root/scripts && ./dyndns.sh | ./helpers/tg_notify.sh' +*/3 * * * * bash -c 'cd /root/scripts && ./system_health_check.sh | ./helpers/state_machine.sh "system" | ./helpers/tg_notify.sh' +15 * * * * bash -c 'cd /root/scripts && ./docker_health_check.sh | ./helpers/state_machine.sh "docker" | ./helpers/tg_notify.sh' +*/15 * * * * bash -c 'cd /root/scripts && ./zfs_health_check.sh | ./helpers/state_machine.sh "zfs" | ./helpers/tg_notify.sh' -@reboot sleep 10 && /root/scripts/telegram_notification.sh '[STARTUP] System just booted' -@reboot sleep 30 && bash -c '/root/scripts/zfs_health_check.sh | /root/scripts/telegram_notification.sh' +@reboot sleep 10 && /root/scripts/helpers/tg_notify.sh '[STARTUP] System just booted' +@reboot sleep 30 && bash -c 'cd /root/scripts && ./zfs_health_check.sh | ./helpers/state_machine.sh "zfs" | ./helpers/tg_notify.sh' ``` -Adapt this to your needs, you might also implement other checks and only use the `telegram_notification.sh` script from this repo. +Adapt this to your needs, you might also implement other checks and only use the `helpers/tg_notify.sh` script from this repo. Or you might implement your own notification script to notify you via another service. -The `telegram_notification.sh` can easily be adapted (just remove comment) to forward all notifications to `STDOUT` which typically makes cron send a mail. +The `helpers/tg_notify.sh` can easily be adapted (just remove comment) to forward all notifications to `STDOUT` which typically makes cron send a mail. ### Using Startup Helper diff --git a/.dyndns_env.EXAMPLE b/config/dyndns.EXAMPLE similarity index 100% rename from .dyndns_env.EXAMPLE rename to config/dyndns.EXAMPLE diff --git a/.file_monitor_env.EXAMPLE b/config/file_monitor.EXAMPLE similarity index 100% rename from .file_monitor_env.EXAMPLE rename to config/file_monitor.EXAMPLE diff --git a/.monitoring_env.EXAMPLE b/config/monitoring.EXAMPLE similarity index 100% rename from .monitoring_env.EXAMPLE rename to config/monitoring.EXAMPLE diff --git a/.post_startup_env.EXAMPLE b/config/post_startup.EXAMPLE similarity index 100% rename from .post_startup_env.EXAMPLE rename to config/post_startup.EXAMPLE diff --git a/config/state_machine.EXAMPLE b/config/state_machine.EXAMPLE new file mode 100644 index 0000000..d0eda51 --- /dev/null +++ b/config/state_machine.EXAMPLE @@ -0,0 +1,2 @@ +STORAGE_PATH=/root/scripts/storage/state_machine # NO trailing slash +RENOTIFY_AGE_SEC=7200 # In seconds (2h) diff --git a/.system_health_check_env.EXAMPLE b/config/system_health_check.EXAMPLE similarity index 100% rename from .system_health_check_env.EXAMPLE rename to config/system_health_check.EXAMPLE diff --git a/.telegram_notification_env.EXAMPLE b/config/tg_notify.EXAMPLE similarity index 100% rename from .telegram_notification_env.EXAMPLE rename to config/tg_notify.EXAMPLE diff --git a/.zfs_health_check_env.EXAMPLE b/config/zfs_health_check.EXAMPLE similarity index 100% rename from .zfs_health_check_env.EXAMPLE rename to config/zfs_health_check.EXAMPLE diff --git a/dyndns.sh b/dyndns.sh index 485399b..ede5e12 100755 --- a/dyndns.sh +++ b/dyndns.sh @@ -1,18 +1,15 @@ #!/bin/bash -logfile=/root/logs/dyndns.log -log_identifier="[DNS]" -log() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" >> $logfile -} -log_echo() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" | tee -a $logfile -} - +# Load configuration set -o allexport -source /root/scripts/.dyndns_env +source /root/scripts/config/dyndns set +o allexport +# Import logging functionality +logfile=/root/scripts/log/dyndns.log +log_identifier="DNS" +source /root/scripts/functions/logging.sh + url="https://${USERNAME}:${PASSWORD}@infomaniak.com/nic/update?hostname=" log "Updating DynDNS for ${MAIN_DOMAIN}" diff --git a/file_monitor.sh b/file_monitor.sh index d4c7c6f..3d6063a 100755 --- a/file_monitor.sh +++ b/file_monitor.sh @@ -8,30 +8,28 @@ # Author: Robin Meier - robin@meier.si ################################################################################ -logfile=/root/logs/file_monitor.log -log_identifier="[FILE]" -log() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" >> $logfile -} -log_echo() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" | tee -a $logfile -} - +# Load configuration set -o allexport -source /root/scripts/.file_monitor_env +source /root/scripts/config/file_monitor set +o allexport +# Import logging functionality +logfile=/root/scripts/log/file_monitor.log +log_identifier="FILE" +source /root/scripts/functions/logging.sh + +# Make sure directory exists mkdir -p /root/scripts/storage/file_monitor for file in $FILES do # Touch storage file if not existing if [ ! -f /root/scripts/storage/file_monitor/${file//\//_} ]; then - touch /root/scripts/storage/file_monitor/${file//\//_} ]; + touch /root/scripts/storage/file_monitor/${file//\//_} fi if [ "$file" -nt "/root/scripts/storage/file_monitor/${file//\//_}" ]; then log_echo "[CHANGE] $file" - touch /root/scripts/storage/file_monitor/${file//\//_} ]; + touch /root/scripts/storage/file_monitor/${file//\//_} fi done diff --git a/functions/logging.sh b/functions/logging.sh new file mode 100755 index 0000000..435bc26 --- /dev/null +++ b/functions/logging.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +if [ -z "$logfile" ]; then + echo "logfile variable missing" + exit 1 +fi +if [ -z "$log_identifier" ]; then + echo "log_identifier variable missing" + exit 1 +fi + +log() { + echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] [$log_identifier]" >> $logfile +} + +log_echo() { + log $@ + echo -e "[$log_identifier] $@" +} diff --git a/helpers/state_machine.sh b/helpers/state_machine.sh new file mode 100755 index 0000000..97c2bb7 --- /dev/null +++ b/helpers/state_machine.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +################################################################################ +# STATE_MACHINE.SH +# ---------------- +# This script saves the last message for a certain key and compares the next +# message for the same key to not have any repeating notifications. +# +# Author: Robin Meier - robin@meier.si +################################################################################ + +set -o allexport +source /root/scripts/config/state_machine +set +o allexport + +mkdir -p $STORAGE_PATH # Make sure STORAGE_PATH exists + +# Get input from standard input or via first parameter +if [[ $# -eq 0 ]]; then + echo "[ERROR] Not enough arguments!" + exit 1 +elif [[ $# -eq 1 ]]; then + MESSAGE=$(timeout 32 cat) + KEY=$1 +elif [[ $# -eq 2 ]]; then + KEY=$1 + MESSAGE=$2 +else + echo "[ERROR] Too many arguments!" + exit 1 +fi + +# Check if KEY is empty +if [[ -z "${KEY}" ]]; then + echo "[ERROR] KEY argument is missing!" + exit 1 +fi + +KEY_FILE="${STORAGE_PATH}/${KEY}.txt" + +if [[ -f $KEY_FILE && -z "${MESSAGE}" ]]; then + # Previous message present and empty message now + OLD_MESSAGE=$(cat $KEY_FILE) + echo "✅ Resolved" + echo "$OLD_MESSAGE" + rm $KEY_FILE + exit 0 +elif [[ -f $KEY_FILE ]]; then + # Message and previous message present + OLD_MESSAGE=$(cat $KEY_FILE) + # Compare contents + if [[ "$OLD_MESSAGE" == "$MESSAGE" ]]; then + # Check last notification + if [ "$(( $(date +"%s") - $(stat -c "%Y" "$KEY_FILE") ))" -gt "$RENOTIFY_AGE_SEC" ]; then + touch $KEY_FILE + echo "‼️Renotify" + else + exit 0 + fi + else + echo "$MESSAGE" > $KEY_FILE + echo "⁉️Changed" + fi +else + if [[ -z "${MESSAGE}" ]]; then + # No message present + exit 0 + fi + + # New message present, create KEY_FILE, continue to relaying + echo "$MESSAGE" > $KEY_FILE + echo "❗New" +fi + +# Relay message if made it until here (Quotes are important here, so lines dont get .join(' ')-ed) +echo "$MESSAGE" + +exit 0 diff --git a/telegram_notification.sh b/helpers/tg_notify.sh similarity index 72% rename from telegram_notification.sh rename to helpers/tg_notify.sh index 9130181..dc08084 100755 --- a/telegram_notification.sh +++ b/helpers/tg_notify.sh @@ -1,24 +1,23 @@ #!/bin/bash ################################################################################ -# TELEGRAM_NOTIFICATION.SH -# ------------------------ -# This script takes input via stdin or parameters, removes timestamps from each -# line and replaces newlines with telegram compatible ones and then sends the -# message to a chat +# TG_NOTIFY.SH +# ------------ +# This script takes input via stdin or parameters, replaces newlines with +# telegram compatible ones and then sends the message to a chat # # Author: Robin Meier - robin@meier.si ################################################################################ set -o allexport -source /root/scripts/.telegram_notification_env +source /root/scripts/config/tg_notify set +o allexport BOT_API_URL=https://api.telegram.org/bot${BOT_TOKEN} # Get input from standard input or via first parameter if [[ $# -eq 0 ]]; then - MESSAGE=$(timeout 30 cat) + MESSAGE=$(timeout 32 cat) elif [[ $# -eq 1 ]]; then MESSAGE=$1 elif [[ $# -eq 2 ]]; then @@ -33,11 +32,6 @@ if [[ -z "${MESSAGE}" ]]; then exit 0 fi -# Strip timestamps from message -if [ "${MESSAGE:0:12}" == "$(echo '' | ts "[%Y-%m-%d")" ]; then - MESSAGE=$(echo -e "$MESSAGE" | cut -c 23-) -fi - # Replace newlines in message for telegram TG_MESSAGE=${MESSAGE//$'\n'/\%0A} diff --git a/monitoring.sh b/monitoring.sh index e8cd89e..e05ebe4 100755 --- a/monitoring.sh +++ b/monitoring.sh @@ -1,18 +1,14 @@ #!/bin/bash -logfile=/root/logs/monitoring.log -log_identifier="[MON]" -log() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" >> $logfile -} -log_echo() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" | tee -a $logfile -} - +# Load configuration set -o allexport -source /root/scripts/.monitoring_env +source /root/scripts/config/monitoring set +o allexport +# Import logging functionality +logfile=/root/scripts/log/monitoring.log +log_identifier="MON" +source /root/scripts/functions/logging.sh problems=0 @@ -23,28 +19,24 @@ do if [[ $(nc -w 2 ${ssh_host//:/ } <<< "\0" ) =~ "OpenSSH" ]] ; then log "[SSH] [OK] ${ssh_host} is reachable" else - # TODO: Rate limit fail messages, also add is back up message log_echo "[SSH] [FAIL] ${ssh_host} not reachable" problems=1 fi done -# TODO: HTTP Status Code 200 Monitoring for http_host in $HTTP_MONITORING do status_code=$(curl --write-out %{http_code} --silent --output /dev/null $http_host) if [[ "$status_code" -eq 200 ]] ; then log "[WEB] [OK] ${http_host}" else - # TODO: Rate limit fail messages, also add is back up message log_echo "[WEB] [FAIL] ${http_host} status code is ${status_code}" problems=1 fi done - if [[ "$problems" -eq "0" ]]; then log "Monitoring Run Successful" else - log_echo "Monitoring Run Failed" + log "Monitoring Run Failed" fi diff --git a/run-before-shutdown.service b/run-before-shutdown.service index 14911e2..06d80c0 100644 --- a/run-before-shutdown.service +++ b/run-before-shutdown.service @@ -5,7 +5,7 @@ Before=shutdown.target [Service] Type=oneshot -ExecStart=/root/scripts/telegram_notification.sh "[SHUTDOWN] System going down" +ExecStart=/root/scripts/helpers/tg_notify.sh "[SHUTDOWN] System going down" TimeoutStartSec=0 [Install] diff --git a/system_health_check.sh b/system_health_check.sh index d11bd79..3b5c701 100755 --- a/system_health_check.sh +++ b/system_health_check.sh @@ -8,19 +8,15 @@ # Author: Robin Meier - robin@meier.si ################################################################################ -logfile=/root/logs/system_health_check.log -log_identifier="[SYS]" -log() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" >> $logfile -} -log_echo() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" | tee -a $logfile -} - +# Load configuration set -o allexport -source /root/scripts/.system_health_check_env +source /root/scripts/config/system_health_check set +o allexport +# Import logging functionality +logfile=/root/scripts/log/system_health_check.log +log_identifier="SYS" +source /root/scripts/functions/logging.sh problems=0 @@ -28,8 +24,9 @@ log "Starting System Health Check" # RAM usage percentage ram=$(free | awk '/Mem/{printf("%.2f"), $3/$2*100}') -if [ $(echo "$ram > $RAM_LIMT" | bc -l) -eq 1 ]; then - log_echo "[RAM] usage is ${ram}%! (Limit: $RAM_LIMIT)" +if [ $(echo "$ram > $RAM_LIMIT" | bc -l) -eq 1 ]; then + log_echo "[RAM] usage is abobe limit of ${RAM_LIMIT}%!" + log "[RAM] usage is ${ram}%! (Limit: $RAM_LIMIT)" problems=1 else log "[RAM] usage is ${ram}%" @@ -38,7 +35,8 @@ fi # CPU usage percentage cpu=$(top -bn1 | grep "Cpu(s)" | awk '{print $2 + $4}') if [ $(echo "$cpu > $CPU_LIMIT" | bc -l) -eq 1 ]; then - log_echo "[CPU] load is ${cpu}%! (Limit: $CPU_LIMIT)" + log_echo "[CPU] load is above limit of ${CPU_LIMIT}%!" + log "[CPU] load is ${cpu}%! (Limit: $CPU_LIMIT)" problems=1 else log "[CPU] load is ${cpu}%" @@ -50,7 +48,8 @@ fi # Temperature avg_cpu_temp=$(sensors | awk '/^Core /{++r; gsub(/[^[:digit:]]+/, "", $3); s+=$3} END{print s/(10*r)}') if [ $(echo "$avg_cpu_temp > $TEMP_LIMIT" | bc -l) -eq 1 ]; then - log_echo "[TEMP] is ${avg_cpu_temp}°C! (Limit: $TEMP_LIMIT)" + log_echo "[TEMP] is above limit of ${TEMP_LIMIT}°C!" + log "[TEMP] is ${avg_cpu_temp}°C! (Limit: $TEMP_LIMIT)" problems=1 else log "[TEMP] is ${avg_cpu_temp}°C" @@ -71,5 +70,5 @@ fi if [ ${problems} -eq 0 ]; then log "System Health Check Successful" else - log_echo "System Health Check Found Problems" + log "System Health Check Found Problems" fi diff --git a/zfs_health_check.sh b/zfs_health_check.sh index 6b5f676..fc348b4 100755 --- a/zfs_health_check.sh +++ b/zfs_health_check.sh @@ -10,21 +10,18 @@ # Author: Robin Meier - robin@meier.si ################################################################################ -logfile=/root/logs/zfs_health_check.log -log_identifier="[ZFS]" -log() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" >> $logfile -} -log_echo() { - echo -e $@ | ts "[%Y-%m-%d %H:%M:%S] $log_identifier" | tee -a $logfile -} +# Load configuration +set -o allexport +source /root/scripts/config/zfs_health_check +set +o allexport + +# Import logging functionality +logfile=/root/scripts/log/zfs_health_check.log +log_identifier="ZFS" +source /root/scripts/functions/logging.sh problems=0 -set -o allexport -source /root/scripts/.zfs_health_check_env -set +o allexport - log "Starting ZFS Health Check" # Pool Status @@ -123,5 +120,5 @@ done if [ ${problems} -eq 0 ]; then log "ZFS Health Check Successful" else - log_echo "ZFS Health Check Found Problems" + log "ZFS Health Check Found Problems" fi