monitors:hardware_sensors

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
monitors:hardware_sensors [2022/12/11 11:05] – [Source] doktoil_makreshmonitors:hardware_sensors [2022/12/11 11:12] (current) – [Source] doktoil_makresh
Line 18: Line 18:
 Add hardware to you $XYMONHOME/server/hosts line for the host running this script Add hardware to you $XYMONHOME/server/hosts line for the host running this script
  
 +===== Source =====
 +=== hobbit-hardware.sh ===
 +<hidden onHidden="Show Code ⇲" onVisible="Hide Code ⇱">
 +<code bash>
 #!/bin/bash #!/bin/bash
  
Line 24: Line 28:
 # Title:     xymon-hardware # Title:     xymon-hardware
 # Author:    Damien Martins  ( doctor |at| makelofine |dot| org) # Author:    Damien Martins  ( doctor |at| makelofine |dot| org)
-# Date:      2018-11-01+# Date:      2022-07-13
 # Purpose:   Check Uni* hardware sensors # Purpose:   Check Uni* hardware sensors
 # Platforms: Uni* having lm-sensor and hddtemp utilities # Platforms: Uni* having lm-sensor and hddtemp utilities
 # Tested:    Xymon 4.3.4 / hddtemp version 0.3-beta15 (Debian Lenny and Etch packages) / sensors version 3.0.2 with libsensors version 3.0.2 (Debian Lenny package) / sensors version 3.0.1 with libsensors version 3.0.1 (Debian Etch package) # Tested:    Xymon 4.3.4 / hddtemp version 0.3-beta15 (Debian Lenny and Etch packages) / sensors version 3.0.2 with libsensors version 3.0.2 (Debian Lenny package) / sensors version 3.0.1 with libsensors version 3.0.1 (Debian Etch package)
-  
-#TODO for v0.7 
-#       -To be independent of /etc/sensors.conf -> we get raw values, and we set right ones from those, and define thresolds in xymon-hardware.cfg file 
-# -Support for multiples sensors 
-# -Support for independant temperatures thresolds for each disk 
-# 
-# History : 
-# 01 nov 2018 - Steffan ?? 
-# v0.5.1 : Adds support for spare drive (not reported as failed anymore) 
-# 27 sep 2013 - Damien Martins 
-# v0.5 : Adds support for HP monitoring tools (hpacucli) 
-# 27 jun 2013 - Damien Martins and Xavier Carol i Rosell 
-# v0.4 : Fixes hddtemp output handling (print last field instead of field N)  
-# 09 sep 2011 - Damien Martins 
-# v0.3 : Adds support for OpenManage Physical disks, temps 
-# 17 feb 2010 - Damien Martins 
-# v0.2.2 : Minor code optimizations 
-# 22 jan 2010 - Damien Martins 
-# v0.2.1 : Minor bug fix 
-# 14 nov 2009 - Damien Martins 
-# v0.2 : -Getting sensor probe no more hard coded 
-# -More verbosity when commands fail 
-# -Disk temperature thresolds in xymon-hardware.cfg file. 
-# -Support smartctl to replace hddtemp (if needed) 
-# -Possibility to disable lm-sensors 
-# -Possibility to choose smartctl chipset 
-# 25 jun 2009 - Damien Martins 
-#       v0.1.2 : -New error messages (more verbose, more accurate) 
-# 18 jun 2009 - Damien Martins 
-#       v0.1.1 : -Bug fixes 
-# 15 jan 2009 - Damien Martins 
-#        v0.1 : First lines, trying to get : 
-#       -temperatures value, and defined thresolds 
-#       -fan rotation speed and thresold 
-#       -voltages and thresolds 
-#       -HDD temperature (thresold is not include, so we set it in this file) 
    
 ################################################################################# #################################################################################
Line 140: Line 108:
 DISK_WARNING_TEMP=$($GREP ^DISK_WARNING_TEMP= $CONFIG_FILE | $SED s/^DISK_WARNING_TEMP=//) DISK_WARNING_TEMP=$($GREP ^DISK_WARNING_TEMP= $CONFIG_FILE | $SED s/^DISK_WARNING_TEMP=//)
 DISK_PANIC_TEMP=$($GREP ^DISK_PANIC_TEMP= $CONFIG_FILE | $SED s/^DISK_PANIC_TEMP=//) DISK_PANIC_TEMP=$($GREP ^DISK_PANIC_TEMP= $CONFIG_FILE | $SED s/^DISK_PANIC_TEMP=//)
- 
-function set_disk_entries_values() 
-{ 
-  ENTRIES=$1 
-  if [ "$(echo $ENTRIES | "$AWK" -F, '{print NF}')" -eq 1 ] ; then 
-     LOCAL_DISK_WARNING_TEMP=$DISK_WARNING_TEMP 
-     LOCAL_DISK_PANIC_TEMP=$DISK_PANIC_TEMP 
-  elif [ "$(echo $ENTRIES | "$AWK" -F, '{print NF}')" -eq 2 ] ; then 
-    LOCAL_DISK_WARNING_TEMP=$DISK_WARNING_TEMP 
-    LOCAL_DISK_PANIC_TEMP=$(echo $ENTRIES | "$AWK" -F, '{print $2}') 
-  elif [ "$(echo $ENTRIES | "$AWK" -F, '{print NF}')" -eq 3 ] ; then 
-    LOCAL_DISK_WARNING_TEMP=$(echo $ENTRIES | "$AWK" -F, '{print $2}') 
-    LOCAL_DISK_PANIC_TEMP=$(echo $ENTRIES | "$AWK" -F, '{print $3}') 
-  fi 
-} 
  
 function use_hddtemp () function use_hddtemp ()
 { {
-  for ENTRIES in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=// ) ; do +for DISK in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=//) ; do
-  DISK=$(echo $ENTRIES | "$AWK" -F, '{print $1}'+
- set_disk_entries_values $ENTRIES+
  HDD_TEMP="$($CMD_HDDTEMP $DISK | $SED s/..$// | $AWK '{print $NF}')"  HDD_TEMP="$($CMD_HDDTEMP $DISK | $SED s/..$// | $AWK '{print $NF}')"
  if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then  if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then
Line 166: Line 117:
  LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).  LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).
 It seems S.M.A.R.T. is no more responding !!!" It seems S.M.A.R.T. is no more responding !!!"
- echo "La température de $DISK n'est pas un nombre :/+ echo "La temp�rature de $DISK n'est pas un nombre :/
 HDD_TEMP : $HDD_TEMP" HDD_TEMP : $HDD_TEMP"
- elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_PANIC_TEMP" ] ; then+ elif [ "$HDD_TEMP" -ge "$DISK_PANIC_TEMP" ] ; then
  RED=1  RED=1
- LINE="&red Disk temperature is CRITICAL (Panic is $LOCAL_DISK_PANIC_TEMP) :+ LINE="&red Disk temperature is CRITICAL (Panic is $DISK_PANIC_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
- elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_WARNING_TEMP" ] ; then+ elif [ "$HDD_TEMP" -ge "$DISK_WARNING_TEMP" ] ; then
  YELLOW="1"  YELLOW="1"
- LINE="&yellow Disk temperature is HIGH (Warning is $LOCAL_DISK_WARNING_TEMP) :+ LINE="&yellow Disk temperature is HIGH (Warning is $DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
- elif [ "$HDD_TEMP" -lt "$LOCAL_DISK_WARNING_TEMP" ] ; then + elif [ "$HDD_TEMP" -lt "$DISK_WARNING_TEMP" ] ; then 
- LINE="&green Disk temperature is OK (Warning is $LOCAL_DISK_WARNING_TEMP) :+ LINE="&green Disk temperature is OK (Warning is $DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
  fi  fi
Line 192: Line 143:
  SMARTCTL_ARGS="-A"  SMARTCTL_ARGS="-A"
 fi fi
-for ENTRIES in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=//) ; do +for DISK in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=//) ; do
- DISK=$(echo $ENTRIES | "$AWK" -F, '{print $1}'+
- set_disk_entries_values $ENTRIES+
  HDD_TEMP="$($SMARTCTL $SMARTCTL_ARGS $DISK | $GREP "^194" | $AWK '{print $10}')"  HDD_TEMP="$($SMARTCTL $SMARTCTL_ARGS $DISK | $GREP "^194" | $AWK '{print $10}')"
         if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then         if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then
Line 200: Line 149:
                 LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).                 LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).
 It seems S.M.A.R.T. is no more responding !!!" It seems S.M.A.R.T. is no more responding !!!"
-        echo "La température de $DISK n'est pas un nombre :/+        echo "La temp�rature de $DISK n'est pas un nombre :/
 HDD_TEMP : $HDD_TEMP" HDD_TEMP : $HDD_TEMP"
-        elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_PANIC_TEMP" ] ; then+        elif [ "$HDD_TEMP" -ge "$DISK_PANIC_TEMP" ] ; then
                 RED=1                 RED=1
-                LINE="&red Disk temperature is CRITICAL (Panic is $LOCAL_DISK_PANIC_TEMP) :+                LINE="&red Disk temperature is CRITICAL (Panic is $DISK_PANIC_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
-        elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_WARNING_TEMP" ] ; then+        elif [ "$HDD_TEMP" -ge "$DISK_WARNING_TEMP" ] ; then
                 YELLOW="1"                 YELLOW="1"
-                LINE="&yellow Disk temperature is HIGH (Warning is $LOCAL_DISK_WARNING_TEMP) :+                LINE="&yellow Disk temperature is HIGH (Warning is $DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
-        elif [ "$HDD_TEMP" -lt "$LOCAL_DISK_WARNING_TEMP" ] ; then +        elif [ "$HDD_TEMP" -lt "$DISK_WARNING_TEMP" ] ; then 
-                LINE="&green Disk temperature is OK (Warning is $LOCAL_DISK_WARNING_TEMP) :+                LINE="&green Disk temperature is OK (Warning is $DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
         fi         fi
Line 245: Line 194:
 unset MIN MAX PANIC VALUE WARNING unset MIN MAX PANIC VALUE WARNING
 } }
- 
 function test_fan () function test_fan ()
 { {
Line 495: Line 443:
         SLOT=$(echo $OUTPUT | awk '{print $2}')         SLOT=$(echo $OUTPUT | awk '{print $2}')
         STATUS=$(echo $OUTPUT | awk '{print $NF}' | sed s/\)//)         STATUS=$(echo $OUTPUT | awk '{print $NF}' | sed s/\)//)
- if [ "$STATUS" == "spare" ] ; then 
-                STATUS=$(echo $OUTPUT | cut -d',' -f4 | sed 's/ //g') 
-        fi 
         if [ $TYPE == "logical" ] ; then         if [ $TYPE == "logical" ] ; then
                 RAID=$(echo $OUTPUT | awk '{print $6}')                 RAID=$(echo $OUTPUT | awk '{print $6}')
Line 554: Line 499:
 fi fi
 "$BB" "$BBDISP" "status "$MACHINE"."$TEST" "$FINAL_STATUS" $("$DATE") "$BB" "$BBDISP" "status "$MACHINE"."$TEST" "$FINAL_STATUS" $("$DATE")
 +
 $("$CAT" "$MSG_FILE") $("$CAT" "$MSG_FILE")
 " "
 +</code>
 +</hidden>
 +
 ===== Known  Bugs and Issues ===== ===== Known  Bugs and Issues =====
 None None
Line 593: Line 542:
   * **2022-07-13 v0.6**   * **2022-07-13 v0.6**
     * Add support for disks independent temperatures     * Add support for disks independent temperatures
 +</code>
  
  • monitors/hardware_sensors.txt
  • Last modified: 2022/12/11 11:12
  • by doktoil_makresh