#!/bin/ksh
# HBA monitoring script
# Author : Vernon Everett - everett.vernon(at)gmaildotcom
# Development History
# Date | Author | Summary
#---------------------------------------------------------------
# 10/08/2010 | Vernon Everett | Initial release.
# 11/08/2010 | Vernon Everett | Added override variables
# | Added mult-path, SCSI details, remote port info
# |
#---------------------------------------------------------------
if [ -x /usr/bin/zonename ]
then
[ $(/usr/bin/zonename) == "global" ] || exit 0 # I only run on global zones
fi
TEMPFILE=$BBTMP/hba.$$
FCINFO="/opt/csw/bin/sudo /usr/sbin/fcinfo"
MPATHADM="/usr/sbin/mpathadm" # sudo probably not needed
COLOUR=green
# Define what to check and default threshholds for the error counts
CHECKSPEED=true
CHECKONLINE=true
CHECKERRS=true
CHECKMPATH=true
LIST_MPATH=true
LIST_SCSI=true
LIST_REMOTE=true
ERR_YELLOW=3
ERR_RED=100
MPATHFAILCOL=yellow
# Now we define them, let's over-ride them if defined in clientlocal.cfg
# Add lines like this in clientlocal.cfg to override the defaults.
# HBA:export CHECKERRS=false
# HBA:export ERR_RED=20
LOGFETCH=${BBTMP}/logfetch.$(uname -n).cfg
if [ -f $LOGFETCH ]
then
grep "^HBA:" $LOGFETCH | cut -d":" -f2 \
| while read NEW_DEF
do
$NEW_DEF
done
fi
date > $TEMPFILE.out
$FCINFO hba-port | grep "No Adapters Found" > /dev/null
if [ $? -eq 0 ]
then
# There are no adapters to work with.
echo "No Adapters Found" >> $TEMPFILE.out
# Let's skip the rest of the crap
else
$FCINFO hba-port | grep "HBA Port WWN:" \
| cut -d":" -f2 \
| while read WWN
do
$FCINFO hba-port -l $WWN
done >> $TEMPFILE
if [ "$CHECKONLINE" = "true" ]
then
cat $TEMPFILE | while read LINE
do
ONLINE=$(echo "$LINE" | grep "State:" | cut -d":" -f2 | sed 's/^[ ]*//;s/[ ]*$//' )
if [ -n "$ONLINE" ]
then
if [ "$ONLINE" = "online" ]
then
echo "&green $LINE" >> $TEMPFILE.online
else
echo "&red $LINE" >> $TEMPFILE.online
COLOUR=red
fi
else
echo "$LINE" >> $TEMPFILE.online
fi
done
[ "$COLOUR" = "red" ] && echo "&red HBA not online" >> $TEMPFILE.out
mv $TEMPFILE.online $TEMPFILE
fi
if [ "$CHECKSPEED" = "true" ]
then
cat $TEMPFILE | while read LINE
do
echo "$LINE" | grep "^HBA" > /dev/null && MAXSPEED="" && CURRSPEED="" && SPEEDS=""
SPEEDS=$(echo "$LINE" | grep "Supported Speeds:")
[ -n "$SPEEDS" ] && MAXSPEED=$(echo "$SPEEDS" | awk '{ print $NF }')
CURRSPEED=$(echo "$LINE" | grep "Current Speed:" | awk '{ print $NF }')
if [ -n "$CURRSPEED" -a "$CURRSPEED" != "$MAXSPEED" ]
then
[ "$COLOUR" != "red" ] && COLOUR="yellow"
echo "&yellow Some HBAs not at optimal speed" >> $TEMPFILE.out
echo "$LINE" | sed "s/Current/\&yellow Current/g" >> $TEMPFILE.speed
MAXSPEED=""
SPEEDS=""
CURRSPEED=""
else
echo "$LINE" | sed "s/Current/\&green Current/g" >> $TEMPFILE.speed
fi
done
mv $TEMPFILE.speed $TEMPFILE
fi
TCOLOUR=$COLOUR
COLOUR=green
if [ "$CHECKERRS" = "true" ]
then
cat $TEMPFILE | while read LINE
do
LCOL=green
ERRLINE=$(echo "$LINE" | grep "Count:")
if [ -n "$ERRLINE" ]
then
ERRCOUNT=$(echo "$ERRLINE" | cut -d":" -f2)
[ $ERRCOUNT -lt $ERR_YELLOW ] && LCOL=green
[ $ERRCOUNT -ge $ERR_YELLOW ] && LCOL=yellow
[ $ERRCOUNT -ge $ERR_RED ] && LCOL=red
echo "&$LCOL $LINE" >> $TEMPFILE.err
else
echo "$LINE" >> $TEMPFILE.err
fi
[ "$LCOL" = "red" ]&& COLOUR=red
[ "$LCOL" = "yellow" -a "$COLOUR" != "red" ] && COLOUR=yellow
done
[ "$COLOUR" = "red" ] && echo "&red Critical error count detected" >> $TEMPFILE.out
[ "$COLOUR" = "yellow" ] && echo "&yellow High error count detected" >> $TEMPFILE.out
mv $TEMPFILE.err $TEMPFILE
fi
[ "$TCOLOUR" = "red" ] && COLOUR="red"
[ "$TCOLOUR" = "yellow" -a "$COLOUR" != "red" ] && COLOUR= "yellow"
if [ "$CHECKMPATH" = "true" -o "$LIST_MPATH" = "true" ]
then
rm $TEMPFILE.badpath 2> /dev/null # Make sure it's not there
echo "<hr width="50%" size="3" />" >> $TEMPFILE.path.out
echo "" >> $TEMPFILE.path.out
echo "<b>Multi-Pathing</b>" >> $TEMPFILE.path.out
echo "" >> $TEMPFILE.path.out
$MPATHADM list lu > $TEMPFILE.path
if [ "$CHECKMPATH" = "true" ]
then
cp $TEMPFILE.path $TEMPFILE.path.colours
cat $TEMPFILE.path | awk '{ FS=":" ; print $NF }' \
| nawk 'ORS=NR%3?" ":"\n"' \
| while read DEV TOTPATH OPERPATH
do
LCOL=green
if [ $TOTPATH -ne $OPERPATH ]
then
LCOL=$MPATHFAILCOL
MPATH=bad
[ $COLOUR != "red" ] && COLOUR=$MPATHFAILCOL
fi
SEDDEV=$(echo $DEV | sed "s/\//\\\\\//g")
sed "s/$SEDDEV/\&$LCOL&/g" $TEMPFILE.path.colours > $TEMPFILE.path.tmp
mv $TEMPFILE.path.tmp $TEMPFILE.path.colours
done
mv $TEMPFILE.path.colours $TEMPFILE.path
[ "$MPATH" = "bad" ] && echo "&$MPATHFAILCOL Multipath error detected" >> $TEMPFILE.out
fi
cat $TEMPFILE.path >> $TEMPFILE.path.out
mv $TEMPFILE.path.out $TEMPFILE.path
echo >> $TEMPFILE.path
fi
cat $TEMPFILE | while read LINE
do
echo "$LINE" | grep "HBA Port" > /dev/null
if [ $? -eq 0 ]
then
echo "<b>" >> $TEMPFILE.out
echo "$LINE</b>" >> $TEMPFILE.out
else
echo "$LINE" >> $TEMPFILE.out
fi
done
[ "$CHECKMPATH" = "true" -o "$LIST_MPATH" = "true" ] && cat $TEMPFILE.path >> $TEMPFILE.out
rm $TEMPFILE.path 2>/dev/null
if [ "$LIST_REMOTE" = "true" ]
then
echo "<hr width="50%" size="3" />" >> $TEMPFILE.out
echo "" >> $TEMPFILE.out
echo "<b>Remote Port Listing</b>" >> $TEMPFILE.out
echo "" >> $TEMPFILE.out
$FCINFO hba-port | grep "HBA Port WWN:" \
| cut -d":" -f2 \
| while read WWN
do
$FCINFO remote-port -p $WWN
done >> $TEMPFILE.out
fi
if [ "$LIST_SCSI" = "true" ]
then
FIRST=true
echo "<hr width="50%" size="3" />" >> $TEMPFILE.out
echo "<b>SCSI Device Information</b>" >> $TEMPFILE.out
echo "" >> $TEMPFILE.out
$FCINFO hba-port | grep "HBA Port WWN:" \
| cut -d":" -f2 \
| while read WWN;
do
$FCINFO remote-port -s -p $WWN
done > $TEMPFILE
cat $TEMPFILE | while read LINE
do
echo $LINE | grep "^Remote Port WWN" >/dev/null
RES=$?
[ $RES -eq 0 -a "$FIRST" = "false" ] && echo "<hr width="50%" size="3" />" >> $TEMPFILE.out
[ $RES -eq 0 -a "$FIRST" = "true" ] && FIRST=false
echo "$LINE" >> $TEMPFILE.out
done
fi
fi
$BB $BBDISP "status $MACHINE.hba $COLOUR $(cat $TEMPFILE.out)"
rm $TEMPFILE $TEMPFILE.out 2> /dev/null