77 lines
2.2 KiB
Bash
Executable File
77 lines
2.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
################################################################################
|
|
# SYSTEM_HEALTH_CHECK.SH
|
|
# ----------------------
|
|
# This script checks the system health and reports any problems on stdout
|
|
#
|
|
# Author: Robin Meier - robin@meier.si
|
|
################################################################################
|
|
|
|
script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
|
|
|
# Load configuration
|
|
set -o allexport
|
|
source ${script_dir}/config/system_health_check
|
|
set +o allexport
|
|
|
|
# Import logging functionality
|
|
logfile=${script_dir}/log/system_health_check.log
|
|
log_identifier="SYS"
|
|
source ${script_dir}/functions/logging.sh
|
|
|
|
problems=0
|
|
|
|
log "Starting System Health Check"
|
|
|
|
# RAM usage percentage
|
|
ram=$(free | awk '/Mem/{printf("%.2f"), $3/$2*100}')
|
|
if [ $(echo "$ram > $RAM_LIMIT" | bc -l) -eq 1 ]; then
|
|
log_echo "[RAM] usage is abobe limit of ${RAM_LIMIT}%!"
|
|
log "[RAM] usage is ${ram}%! (Limit: $RAM_LIMIT)"
|
|
problems=1
|
|
else
|
|
log "[RAM] usage is ${ram}%"
|
|
fi
|
|
|
|
# CPU usage percentage
|
|
cpu=$(top -bn1 | grep "Cpu(s)" | awk '{print $2 + $4}')
|
|
if [ $(echo "$cpu > $CPU_LIMIT" | bc -l) -eq 1 ]; then
|
|
log_echo "[CPU] load is above limit of ${CPU_LIMIT}%!"
|
|
log "[CPU] load is ${cpu}%! (Limit: $CPU_LIMIT)"
|
|
problems=1
|
|
else
|
|
log "[CPU] load is ${cpu}%"
|
|
fi
|
|
|
|
# Load?
|
|
# TODO: Maybe check `load`
|
|
|
|
# Temperature
|
|
avg_cpu_temp=$(sensors | awk '/^Core /{++r; gsub(/[^[:digit:]]+/, "", $3); s+=$3} END{print s/(10*r)}')
|
|
if [ $(echo "$avg_cpu_temp > $TEMP_LIMIT" | bc -l) -eq 1 ]; then
|
|
log_echo "[TEMP] is above limit of ${TEMP_LIMIT}°C!"
|
|
log "[TEMP] is ${avg_cpu_temp}°C! (Limit: $TEMP_LIMIT)"
|
|
problems=1
|
|
else
|
|
log "[TEMP] is ${avg_cpu_temp}°C"
|
|
fi
|
|
|
|
# Failed Services
|
|
failed_services=$(systemctl status | grep -i 'Failed:\s.*\sunits' | awk '{print $2}')
|
|
if [ $failed_services -eq 0 ]; then
|
|
log "[SRV] No failed services"
|
|
else
|
|
log_echo "[SRV] [ERROR] Failed Services:"
|
|
/bin/systemctl --failed | ts "[%Y-%m-%d %H:%M:%S] $log_identifier [SRV]" | tee -a $logfile
|
|
log_echo "[SRV] You should run \"systemctl status/start <servicename>\" to find out more or start the unit."
|
|
problems=1
|
|
fi
|
|
|
|
# Finish
|
|
if [ ${problems} -eq 0 ]; then
|
|
log "System Health Check Successful"
|
|
else
|
|
log "System Health Check Found Problems"
|
|
fi
|