monitors:hardware_sensors

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Next revisionBoth sides next revision
monitors:hardware_sensors [2013/09/27 12:22] – [Installation] doctor_madnessmonitors:hardware_sensors [2022/12/11 11:05] – [Source] doktoil_makresh
Line 1: Line 1:
 ====== Hardware monitoring ====== ====== Hardware monitoring ======
  
-^ Author | [[ doctor@makelofine.org | Damien Martins ]] | +^ Author         | [[doctor@makelofine.org| Damien Martins ]]                    
-^ Compatibility | Xymon 4.2.2/4.3.12 | +^ Compatibility  | Xymon 4.2.2/4.3.12                                            
-^ Requirements | sh (or bash), hddtemp, smartmontools | +^ Requirements   | sh (or bash), hddtemp, smartmontools                          
-^ Download | https://www.makelofine.org/xymon-plugins/xymon-hardware-v0.5.tar.bz2 +^ Download       Part of https://github.com/doktoil-makresh/xymon-plugins.git  
-^ Last Update | 2013-09-27 |+^ Last Update    2022-07-13                                                    |
  
 ===== Description ===== ===== Description =====
Line 18: Line 18:
 Add hardware to you $XYMONHOME/server/hosts line for the host running this script Add hardware to you $XYMONHOME/server/hosts line for the host running this script
  
-===== Source ===== 
-=== hobbit-hardware.sh === 
-<hidden onHidden="Show Code ⇲" onVisible="Hide Code ⇱"> 
-<code bash> 
 #!/bin/bash #!/bin/bash
  
 # ALL THIS SCRIPT IS UNDER GPL LICENSE # ALL THIS SCRIPT IS UNDER GPL LICENSE
-# Version 0.5+# Version 0.6
 # Title:     xymon-hardware # Title:     xymon-hardware
 # Author:    Damien Martins  ( doctor |at| makelofine |dot| org) # Author:    Damien Martins  ( doctor |at| makelofine |dot| org)
-# Date:      2013-09-27+# Date:      2018-11-01
 # Purpose:   Check Uni* hardware sensors # Purpose:   Check Uni* hardware sensors
 # Platforms: Uni* having lm-sensor and hddtemp utilities # Platforms: Uni* having lm-sensor and hddtemp utilities
 # Tested:    Xymon 4.3.4 / hddtemp version 0.3-beta15 (Debian Lenny and Etch packages) / sensors version 3.0.2 with libsensors version 3.0.2 (Debian Lenny package) / sensors version 3.0.1 with libsensors version 3.0.1 (Debian Etch package) # Tested:    Xymon 4.3.4 / hddtemp version 0.3-beta15 (Debian Lenny and Etch packages) / sensors version 3.0.2 with libsensors version 3.0.2 (Debian Lenny package) / sensors version 3.0.1 with libsensors version 3.0.1 (Debian Etch package)
    
-#TODO for v0.6+#TODO for v0.7
 #       -To be independent of /etc/sensors.conf -> we get raw values, and we set right ones from those, and define thresolds in xymon-hardware.cfg file #       -To be independent of /etc/sensors.conf -> we get raw values, and we set right ones from those, and define thresolds in xymon-hardware.cfg file
 # -Support for multiples sensors # -Support for multiples sensors
Line 39: Line 35:
 # #
 # History : # History :
 +# 01 nov 2018 - Steffan ??
 +# v0.5.1 : Adds support for spare drive (not reported as failed anymore)
 # 27 sep 2013 - Damien Martins # 27 sep 2013 - Damien Martins
-# v0.5 : Add support for HP monitoring tools (hpacucli)+# v0.5 : Adds support for HP monitoring tools (hpacucli)
 # 27 jun 2013 - Damien Martins and Xavier Carol i Rosell # 27 jun 2013 - Damien Martins and Xavier Carol i Rosell
-# v0.4 : Fix hddtemp output handling (print last field instead of field N) +# v0.4 : Fixes hddtemp output handling (print last field instead of field N) 
 # 09 sep 2011 - Damien Martins # 09 sep 2011 - Damien Martins
-# v0.3 : Add support for OpenManage Physical disks, temps+# v0.3 : Adds support for OpenManage Physical disks, temps
 # 17 feb 2010 - Damien Martins # 17 feb 2010 - Damien Martins
 # v0.2.2 : Minor code optimizations # v0.2.2 : Minor code optimizations
Line 142: Line 140:
 DISK_WARNING_TEMP=$($GREP ^DISK_WARNING_TEMP= $CONFIG_FILE | $SED s/^DISK_WARNING_TEMP=//) DISK_WARNING_TEMP=$($GREP ^DISK_WARNING_TEMP= $CONFIG_FILE | $SED s/^DISK_WARNING_TEMP=//)
 DISK_PANIC_TEMP=$($GREP ^DISK_PANIC_TEMP= $CONFIG_FILE | $SED s/^DISK_PANIC_TEMP=//) DISK_PANIC_TEMP=$($GREP ^DISK_PANIC_TEMP= $CONFIG_FILE | $SED s/^DISK_PANIC_TEMP=//)
 +
 +function set_disk_entries_values()
 +{
 +  ENTRIES=$1
 +  if [ "$(echo $ENTRIES | "$AWK" -F, '{print NF}')" -eq 1 ] ; then
 +     LOCAL_DISK_WARNING_TEMP=$DISK_WARNING_TEMP
 +     LOCAL_DISK_PANIC_TEMP=$DISK_PANIC_TEMP
 +  elif [ "$(echo $ENTRIES | "$AWK" -F, '{print NF}')" -eq 2 ] ; then
 +    LOCAL_DISK_WARNING_TEMP=$DISK_WARNING_TEMP
 +    LOCAL_DISK_PANIC_TEMP=$(echo $ENTRIES | "$AWK" -F, '{print $2}')
 +  elif [ "$(echo $ENTRIES | "$AWK" -F, '{print NF}')" -eq 3 ] ; then
 +    LOCAL_DISK_WARNING_TEMP=$(echo $ENTRIES | "$AWK" -F, '{print $2}')
 +    LOCAL_DISK_PANIC_TEMP=$(echo $ENTRIES | "$AWK" -F, '{print $3}')
 +  fi
 +}
  
 function use_hddtemp () function use_hddtemp ()
 { {
-for DISK in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=//) ; do+  for ENTRIES in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=// ) ; do 
 +  DISK=$(echo $ENTRIES | "$AWK" -F, '{print $1}'
 + set_disk_entries_values $ENTRIES
  HDD_TEMP="$($CMD_HDDTEMP $DISK | $SED s/..$// | $AWK '{print $NF}')"  HDD_TEMP="$($CMD_HDDTEMP $DISK | $SED s/..$// | $AWK '{print $NF}')"
  if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then  if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then
Line 151: Line 166:
  LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).  LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).
 It seems S.M.A.R.T. is no more responding !!!" It seems S.M.A.R.T. is no more responding !!!"
- echo "La temp�rature de $DISK n'est pas un nombre :/+ echo "La température de $DISK n'est pas un nombre :/
 HDD_TEMP : $HDD_TEMP" HDD_TEMP : $HDD_TEMP"
- elif [ "$HDD_TEMP" -ge "$DISK_PANIC_TEMP" ] ; then+ elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_PANIC_TEMP" ] ; then
  RED=1  RED=1
- LINE="&red Disk temperature is CRITICAL (Panic is $DISK_PANIC_TEMP) :+ LINE="&red Disk temperature is CRITICAL (Panic is $LOCAL_DISK_PANIC_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
- elif [ "$HDD_TEMP" -ge "$DISK_WARNING_TEMP" ] ; then+ elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_WARNING_TEMP" ] ; then
  YELLOW="1"  YELLOW="1"
- LINE="&yellow Disk temperature is HIGH (Warning is $DISK_WARNING_TEMP) :+ LINE="&yellow Disk temperature is HIGH (Warning is $LOCAL_DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
- elif [ "$HDD_TEMP" -lt "$DISK_WARNING_TEMP" ] ; then + elif [ "$HDD_TEMP" -lt "$LOCAL_DISK_WARNING_TEMP" ] ; then 
- LINE="&green Disk temperature is OK (Warning is $DISK_WARNING_TEMP) :+ LINE="&green Disk temperature is OK (Warning is $LOCAL_DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
  fi  fi
Line 177: Line 192:
  SMARTCTL_ARGS="-A"  SMARTCTL_ARGS="-A"
 fi fi
-for DISK in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=//) ; do+for ENTRIES in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=//) ; do 
 + DISK=$(echo $ENTRIES | "$AWK" -F, '{print $1}'
 + set_disk_entries_values $ENTRIES
  HDD_TEMP="$($SMARTCTL $SMARTCTL_ARGS $DISK | $GREP "^194" | $AWK '{print $10}')"  HDD_TEMP="$($SMARTCTL $SMARTCTL_ARGS $DISK | $GREP "^194" | $AWK '{print $10}')"
         if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then         if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then
Line 183: Line 200:
                 LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).                 LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).
 It seems S.M.A.R.T. is no more responding !!!" It seems S.M.A.R.T. is no more responding !!!"
-        echo "La temp�rature de $DISK n'est pas un nombre :/+        echo "La température de $DISK n'est pas un nombre :/
 HDD_TEMP : $HDD_TEMP" HDD_TEMP : $HDD_TEMP"
-        elif [ "$HDD_TEMP" -ge "$DISK_PANIC_TEMP" ] ; then+        elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_PANIC_TEMP" ] ; then
                 RED=1                 RED=1
-                LINE="&red Disk temperature is CRITICAL (Panic is $DISK_PANIC_TEMP) :+                LINE="&red Disk temperature is CRITICAL (Panic is $LOCAL_DISK_PANIC_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
-        elif [ "$HDD_TEMP" -ge "$DISK_WARNING_TEMP" ] ; then+        elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_WARNING_TEMP" ] ; then
                 YELLOW="1"                 YELLOW="1"
-                LINE="&yellow Disk temperature is HIGH (Warning is $DISK_WARNING_TEMP) :+                LINE="&yellow Disk temperature is HIGH (Warning is $LOCAL_DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
-        elif [ "$HDD_TEMP" -lt "$DISK_WARNING_TEMP" ] ; then +        elif [ "$HDD_TEMP" -lt "$LOCAL_DISK_WARNING_TEMP" ] ; then 
-                LINE="&green Disk temperature is OK (Warning is $DISK_WARNING_TEMP) :+                LINE="&green Disk temperature is OK (Warning is $LOCAL_DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
         fi         fi
Line 228: Line 245:
 unset MIN MAX PANIC VALUE WARNING unset MIN MAX PANIC VALUE WARNING
 } }
 +
 function test_fan () function test_fan ()
 { {
Line 477: Line 495:
         SLOT=$(echo $OUTPUT | awk '{print $2}')         SLOT=$(echo $OUTPUT | awk '{print $2}')
         STATUS=$(echo $OUTPUT | awk '{print $NF}' | sed s/\)//)         STATUS=$(echo $OUTPUT | awk '{print $NF}' | sed s/\)//)
 + if [ "$STATUS" == "spare" ] ; then
 +                STATUS=$(echo $OUTPUT | cut -d',' -f4 | sed 's/ //g')
 +        fi
         if [ $TYPE == "logical" ] ; then         if [ $TYPE == "logical" ] ; then
                 RAID=$(echo $OUTPUT | awk '{print $6}')                 RAID=$(echo $OUTPUT | awk '{print $6}')
Line 533: Line 554:
 fi fi
 "$BB" "$BBDISP" "status "$MACHINE"."$TEST" "$FINAL_STATUS" $("$DATE") "$BB" "$BBDISP" "status "$MACHINE"."$TEST" "$FINAL_STATUS" $("$DATE")
- 
 $("$CAT" "$MSG_FILE") $("$CAT" "$MSG_FILE")
 " "
-</code> 
-</hidden> 
- 
 ===== Known  Bugs and Issues ===== ===== Known  Bugs and Issues =====
 None None
Line 574: Line 591:
   * **2013-09-27 v0.5**   * **2013-09-27 v0.5**
     * Add support for HP monitoring tool (hpacucli)     * Add support for HP monitoring tool (hpacucli)
 +  * **2022-07-13 v0.6**
 +    * Add support for disks independent temperatures
 +
 +
  • monitors/hardware_sensors.txt
  • Last modified: 2022/12/11 11:12
  • by doktoil_makresh