#!/bin/bash
# Title: cpu-load-calc.sh
# Description: Calculate a xymon client's cpu load (Run on Xymon Server periodically with cron)
# Dependency: Requires 'bc' package
# Last Change: 2018-05-22
# Recent Changes:-Updated awk search to look for [nproc] at the beginning of the line
#=======================
# Customize Here
#=======================
# Warning and Critical Load Multipliers (num of procs * multiplier)
load_warn_multiplier=1.0
load_crit_multiplier=1.5
# Directory to save auto load thresholds
auto_load_dir="/etc/xymon/analysis.d/auto-cpuload.d"
# Xymon server's hostdata directory
xymon_hostdata_dir="/var/lib/xymon/hostdata"
# Xymon server's main analysis config file
xymon_analysis_cfg="/etc/xymon/analysis.cfg"
#=======================
# End of Customize
#=======================
#=======================
# Pre-Run Error Checking
#=======================
## Dependency Check ##
which bc &> /dev/null
if [[ $? -eq 1 ]]; then
echo ">> Error! Dependent package 'bc' (byte code) not detected. Exiting..."
exit 1
fi
## Does the Auto Load Directory exist?
if [[ ! -d ${auto_load_dir} ]]; then
echo ">> Error! The directory (${auto_load_dir}) does not exist or is not a directory. Exiting..."
exit
fi
## Write Access Check
touch ${auto_load_dir}/testfile &> /dev/null
if [[ $? -eq 1 ]]; then
echo ">> Error! User '$(whoami)' does not have write access to ${auto_load_dir}! Exiting..."
exit 1
else
rm -f ${auto_load_dir}/testfile &> /dev/null
fi
## Check if the auto_load_dir is included in main analysis config file
grep "^directory ${auto_load_dir}" ${xymon_analysis_cfg} &> /dev/null
if [[ $? -eq 1 ]]; then
echo -e ">> Warning! Auto load directory (${auto_load_dir}) is not included in ${xymon_analysis_cfg}. Continuing, but auto CPU load settings will not take affect until 'directory ${auto_load_dir}' is added to ${xymon_analysis_cfg}.\n"
fi
#=======================
# End of Pre-Run Error Checking
#=======================
#===============================
# Functions; Main starts after
#===============================
function show_usage
{
echo -e "\n####==== Xymon Client Auto Load Thresholds ====####"
echo -e "\nDescripton: Calculate a xymon client's cpu load."
echo -e "\n--Usage"
echo -e "$0 => No arguments, configure with no verbosity."
echo -e "$0 -v => Verbose output."
echo -e "$0 -r => Refresh CPU load data (force hostdata update)."
echo -e "$0 -h => Display usage."
}
# Force snapshots of hostdata
function force_hostdata
{
# Use node name passed as argument
node_name=${1}
# Lie to Xymon that the node's cpu is green, then yellow, forcing a hostdata snapshot
xymon 127.0.0.1 "status ${node_name}.cpu green $(date)"
xymon 127.0.0.1 "status ${node_name}.cpu yellow $(date)"
}
#=======================
# Get Script Arguments
#=======================
# Reset POSIX variable in case it has been used previously in this shell
OPTIND=1
# By default, no verbose output
verbose_output="no"
refresh_cpus="no"
while getopts "hrv" opt; do
case "${opt}" in
h) # -h (help) argument
show_usage
exit 0
;;
r) # -r (refersh cpus) argument
refresh_cpus="yes"
;;
v) # -v (verbose) argument
verbose_output="yes"
;;
*) # invalid argument
show_usage
exit 0
;;
esac
done
#=======================
# Main Program
#=======================
echo -e "== Xymon Client Auto Load Thresholds =="
echo -e "Load Warning Multiplier: ${load_warn_multiplier}"
echo -e "Load Critical Multiplier: ${load_crit_multiplier}"
echo -e "Saving configs to: ${auto_load_dir}"
# For each node reporting host data
for node in $(ls ${xymon_hostdata_dir}); do
if [[ ${verbose_output} == "yes" ]]; then
echo -e "\n>> Working on node: ${node}"
fi
if [[ ${refresh_cpus} == "yes" ]]; then
if [[ ${verbose_output} == "yes" ]]; then
echo -e "\n-> Refreshing hostdata..."
fi
# Force an update of hostdata
force_hostdata ${node}
fi
# Get the number of procs reported from node's most recent host data file
node_num_procs="$(cat ${xymon_hostdata_dir}/${node}/$(ls -tr ${xymon_hostdata_dir}/${node}/ | tail -1) | awk '/^\[nproc]/ { getline; print }')"
# If node_num_procs is empty or not a number, move to the next node
if [[ -z ${node_num_procs} || ! ${node_num_procs} =~ [0-9][0-9]* ]]; then
# Did not find 'nproc' in the host data file or no number from nproc returned
if [[ ${verbose_output} == "yes" ]]; then
echo "-> Warning! Could not find 'nproc' in ${node}'s host data file or no number returned. Skipping..."
fi
continue
fi
# Calculate the warning and critical load thresholds (normalize as a floating point with bc)
load_warning=$(echo "${node_num_procs} * ${load_warn_multiplier}" | bc)
load_critical=$(echo "${node_num_procs} * ${load_crit_multiplier}" | bc)
if [[ ${verbose_output} == "yes" ]]; then
echo -e "-> Number of Procs: ${node_num_procs}"
echo -e "-> Warning at: ${load_warning}"
echo -e "-> Critical at: ${load_critical}"
echo -e "-> Creating node analysis drop in file..."
fi
# Create analysis drop in file
echo "# ${node}'s CPU Load Thresholds (Warning Critical)" > ${auto_load_dir}/${node}.cfg
echo "HOST=${node}" >> ${auto_load_dir}/${node}.cfg
echo " LOAD ${load_warning} ${load_critical}" >> ${auto_load_dir}/${node}.cfg
done
echo -e "\n== Auto Load Thresholds Complete =="
exit 0