monitors:hardware_sensors

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revisionBoth sides next revision
monitors:hardware_sensors [2022/12/11 11:04] – [Changelog] doktoil_makreshmonitors:hardware_sensors [2022/12/11 11:05] – [Source] doktoil_makresh
Line 18: Line 18:
 Add hardware to you $XYMONHOME/server/hosts line for the host running this script Add hardware to you $XYMONHOME/server/hosts line for the host running this script
  
-===== Source ===== 
-=== hobbit-hardware.sh === 
-<hidden onHidden="Show Code ⇲" onVisible="Hide Code ⇱"> 
-<code bash> 
 #!/bin/bash #!/bin/bash
  
 # ALL THIS SCRIPT IS UNDER GPL LICENSE # ALL THIS SCRIPT IS UNDER GPL LICENSE
-# Version 0.5+# Version 0.6
 # Title:     xymon-hardware # Title:     xymon-hardware
 # Author:    Damien Martins  ( doctor |at| makelofine |dot| org) # Author:    Damien Martins  ( doctor |at| makelofine |dot| org)
-# Date:      2013-09-27+# Date:      2018-11-01
 # Purpose:   Check Uni* hardware sensors # Purpose:   Check Uni* hardware sensors
 # Platforms: Uni* having lm-sensor and hddtemp utilities # Platforms: Uni* having lm-sensor and hddtemp utilities
 # Tested:    Xymon 4.3.4 / hddtemp version 0.3-beta15 (Debian Lenny and Etch packages) / sensors version 3.0.2 with libsensors version 3.0.2 (Debian Lenny package) / sensors version 3.0.1 with libsensors version 3.0.1 (Debian Etch package) # Tested:    Xymon 4.3.4 / hddtemp version 0.3-beta15 (Debian Lenny and Etch packages) / sensors version 3.0.2 with libsensors version 3.0.2 (Debian Lenny package) / sensors version 3.0.1 with libsensors version 3.0.1 (Debian Etch package)
    
-#TODO for v0.6+#TODO for v0.7
 #       -To be independent of /etc/sensors.conf -> we get raw values, and we set right ones from those, and define thresolds in xymon-hardware.cfg file #       -To be independent of /etc/sensors.conf -> we get raw values, and we set right ones from those, and define thresolds in xymon-hardware.cfg file
 # -Support for multiples sensors # -Support for multiples sensors
Line 39: Line 35:
 # #
 # History : # History :
 +# 01 nov 2018 - Steffan ??
 +# v0.5.1 : Adds support for spare drive (not reported as failed anymore)
 # 27 sep 2013 - Damien Martins # 27 sep 2013 - Damien Martins
-# v0.5 : Add support for HP monitoring tools (hpacucli)+# v0.5 : Adds support for HP monitoring tools (hpacucli)
 # 27 jun 2013 - Damien Martins and Xavier Carol i Rosell # 27 jun 2013 - Damien Martins and Xavier Carol i Rosell
-# v0.4 : Fix hddtemp output handling (print last field instead of field N) +# v0.4 : Fixes hddtemp output handling (print last field instead of field N) 
 # 09 sep 2011 - Damien Martins # 09 sep 2011 - Damien Martins
-# v0.3 : Add support for OpenManage Physical disks, temps+# v0.3 : Adds support for OpenManage Physical disks, temps
 # 17 feb 2010 - Damien Martins # 17 feb 2010 - Damien Martins
 # v0.2.2 : Minor code optimizations # v0.2.2 : Minor code optimizations
Line 142: Line 140:
 DISK_WARNING_TEMP=$($GREP ^DISK_WARNING_TEMP= $CONFIG_FILE | $SED s/^DISK_WARNING_TEMP=//) DISK_WARNING_TEMP=$($GREP ^DISK_WARNING_TEMP= $CONFIG_FILE | $SED s/^DISK_WARNING_TEMP=//)
 DISK_PANIC_TEMP=$($GREP ^DISK_PANIC_TEMP= $CONFIG_FILE | $SED s/^DISK_PANIC_TEMP=//) DISK_PANIC_TEMP=$($GREP ^DISK_PANIC_TEMP= $CONFIG_FILE | $SED s/^DISK_PANIC_TEMP=//)
 +
 +function set_disk_entries_values()
 +{
 +  ENTRIES=$1
 +  if [ "$(echo $ENTRIES | "$AWK" -F, '{print NF}')" -eq 1 ] ; then
 +     LOCAL_DISK_WARNING_TEMP=$DISK_WARNING_TEMP
 +     LOCAL_DISK_PANIC_TEMP=$DISK_PANIC_TEMP
 +  elif [ "$(echo $ENTRIES | "$AWK" -F, '{print NF}')" -eq 2 ] ; then
 +    LOCAL_DISK_WARNING_TEMP=$DISK_WARNING_TEMP
 +    LOCAL_DISK_PANIC_TEMP=$(echo $ENTRIES | "$AWK" -F, '{print $2}')
 +  elif [ "$(echo $ENTRIES | "$AWK" -F, '{print NF}')" -eq 3 ] ; then
 +    LOCAL_DISK_WARNING_TEMP=$(echo $ENTRIES | "$AWK" -F, '{print $2}')
 +    LOCAL_DISK_PANIC_TEMP=$(echo $ENTRIES | "$AWK" -F, '{print $3}')
 +  fi
 +}
  
 function use_hddtemp () function use_hddtemp ()
 { {
-for DISK in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=//) ; do+  for ENTRIES in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=// ) ; do 
 +  DISK=$(echo $ENTRIES | "$AWK" -F, '{print $1}'
 + set_disk_entries_values $ENTRIES
  HDD_TEMP="$($CMD_HDDTEMP $DISK | $SED s/..$// | $AWK '{print $NF}')"  HDD_TEMP="$($CMD_HDDTEMP $DISK | $SED s/..$// | $AWK '{print $NF}')"
  if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then  if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then
Line 151: Line 166:
  LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).  LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).
 It seems S.M.A.R.T. is no more responding !!!" It seems S.M.A.R.T. is no more responding !!!"
- echo "La temp�rature de $DISK n'est pas un nombre :/+ echo "La température de $DISK n'est pas un nombre :/
 HDD_TEMP : $HDD_TEMP" HDD_TEMP : $HDD_TEMP"
- elif [ "$HDD_TEMP" -ge "$DISK_PANIC_TEMP" ] ; then+ elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_PANIC_TEMP" ] ; then
  RED=1  RED=1
- LINE="&red Disk temperature is CRITICAL (Panic is $DISK_PANIC_TEMP) :+ LINE="&red Disk temperature is CRITICAL (Panic is $LOCAL_DISK_PANIC_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
- elif [ "$HDD_TEMP" -ge "$DISK_WARNING_TEMP" ] ; then+ elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_WARNING_TEMP" ] ; then
  YELLOW="1"  YELLOW="1"
- LINE="&yellow Disk temperature is HIGH (Warning is $DISK_WARNING_TEMP) :+ LINE="&yellow Disk temperature is HIGH (Warning is $LOCAL_DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
- elif [ "$HDD_TEMP" -lt "$DISK_WARNING_TEMP" ] ; then + elif [ "$HDD_TEMP" -lt "$LOCAL_DISK_WARNING_TEMP" ] ; then 
- LINE="&green Disk temperature is OK (Warning is $DISK_WARNING_TEMP) :+ LINE="&green Disk temperature is OK (Warning is $LOCAL_DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
  fi  fi
Line 177: Line 192:
  SMARTCTL_ARGS="-A"  SMARTCTL_ARGS="-A"
 fi fi
-for DISK in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=//) ; do+for ENTRIES in $("$GREP" "^DISK=" "$CONFIG_FILE" | "$SED" s/^DISK=//) ; do 
 + DISK=$(echo $ENTRIES | "$AWK" -F, '{print $1}'
 + set_disk_entries_values $ENTRIES
  HDD_TEMP="$($SMARTCTL $SMARTCTL_ARGS $DISK | $GREP "^194" | $AWK '{print $10}')"  HDD_TEMP="$($SMARTCTL $SMARTCTL_ARGS $DISK | $GREP "^194" | $AWK '{print $10}')"
         if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then         if [ ! "$(echo $HDD_TEMP | grep "^[ [:digit:] ]*$")" ] ; then
Line 183: Line 200:
                 LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).                 LINE="&red Disk $DISK temperature is UNKNOWN (HDD_TEMP VALUE IS : $HDD_TEMP).
 It seems S.M.A.R.T. is no more responding !!!" It seems S.M.A.R.T. is no more responding !!!"
-        echo "La temp�rature de $DISK n'est pas un nombre :/+        echo "La température de $DISK n'est pas un nombre :/
 HDD_TEMP : $HDD_TEMP" HDD_TEMP : $HDD_TEMP"
-        elif [ "$HDD_TEMP" -ge "$DISK_PANIC_TEMP" ] ; then+        elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_PANIC_TEMP" ] ; then
                 RED=1                 RED=1
-                LINE="&red Disk temperature is CRITICAL (Panic is $DISK_PANIC_TEMP) :+                LINE="&red Disk temperature is CRITICAL (Panic is $LOCAL_DISK_PANIC_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
-        elif [ "$HDD_TEMP" -ge "$DISK_WARNING_TEMP" ] ; then+        elif [ "$HDD_TEMP" -ge "$LOCAL_DISK_WARNING_TEMP" ] ; then
                 YELLOW="1"                 YELLOW="1"
-                LINE="&yellow Disk temperature is HIGH (Warning is $DISK_WARNING_TEMP) :+                LINE="&yellow Disk temperature is HIGH (Warning is $LOCAL_DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
-        elif [ "$HDD_TEMP" -lt "$DISK_WARNING_TEMP" ] ; then +        elif [ "$HDD_TEMP" -lt "$LOCAL_DISK_WARNING_TEMP" ] ; then 
-                LINE="&green Disk temperature is OK (Warning is $DISK_WARNING_TEMP) :+                LINE="&green Disk temperature is OK (Warning is $LOCAL_DISK_WARNING_TEMP) :
 "$DISK"_temperature: ${HDD_TEMP}" "$DISK"_temperature: ${HDD_TEMP}"
         fi         fi
Line 228: Line 245:
 unset MIN MAX PANIC VALUE WARNING unset MIN MAX PANIC VALUE WARNING
 } }
 +
 function test_fan () function test_fan ()
 { {
Line 477: Line 495:
         SLOT=$(echo $OUTPUT | awk '{print $2}')         SLOT=$(echo $OUTPUT | awk '{print $2}')
         STATUS=$(echo $OUTPUT | awk '{print $NF}' | sed s/\)//)         STATUS=$(echo $OUTPUT | awk '{print $NF}' | sed s/\)//)
 + if [ "$STATUS" == "spare" ] ; then
 +                STATUS=$(echo $OUTPUT | cut -d',' -f4 | sed 's/ //g')
 +        fi
         if [ $TYPE == "logical" ] ; then         if [ $TYPE == "logical" ] ; then
                 RAID=$(echo $OUTPUT | awk '{print $6}')                 RAID=$(echo $OUTPUT | awk '{print $6}')
Line 533: Line 554:
 fi fi
 "$BB" "$BBDISP" "status "$MACHINE"."$TEST" "$FINAL_STATUS" $("$DATE") "$BB" "$BBDISP" "status "$MACHINE"."$TEST" "$FINAL_STATUS" $("$DATE")
- 
 $("$CAT" "$MSG_FILE") $("$CAT" "$MSG_FILE")
 " "
-</code> 
-</hidden> 
- 
 ===== Known  Bugs and Issues ===== ===== Known  Bugs and Issues =====
 None None
  • monitors/hardware_sensors.txt
  • Last modified: 2022/12/11 11:12
  • by doktoil_makresh