Differences
This shows you the differences between two versions of the page.
— |
monitors:netbackup [2009/11/23 06:04] (current) |
||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== netbackup ====== | ||
+ | |||
+ | ^ Author | [[ grossi@acantho.net | Gianluca Rossi ]] | | ||
+ | ^ Compatibility | Xymon 4.2 | | ||
+ | ^ Requirements | Netbackup 4.5 and Netbackup 6, sudo | | ||
+ | ^ Download | None | | ||
+ | ^ Last Update | 2007-02-15 | | ||
+ | |||
+ | ===== Description ===== | ||
+ | |||
+ | Monitor Vertias NetBackup status. | ||
+ | |||
+ | ===== Installation ===== | ||
+ | |||
+ | ==== Client Configuration ==== | ||
+ | - Add the following to sudoers on the NetBackup Master Server.<code> | ||
+ | Cmnd_Alias NETBACKUP = /opt/openv/netbackup/bin/admincmd/bpdbjobs, \ | ||
+ | /opt/openv/netbackup/bin/admincmd/bperror, \ | ||
+ | /opt/openv/netbackup/bin/admincmd/bpmedialist, \ | ||
+ | /opt/openv/volmgr/bin/vmquery | ||
+ | |||
+ | hobbit ALL=NOPASSWD:NETBACKUP | ||
+ | </code> | ||
+ | - Add nb* scripts to client's ext folder | ||
+ | * nb.cfg : common config file. you must edit this file first | ||
+ | * nb-media.sh : check available and frozen media, you should not edit this file | ||
+ | * nb.sh : check netbackup messages, you should not edit this file | ||
+ | - Add the following to client hobbitlaunch.cfg <code> | ||
+ | [nbmedia] | ||
+ | ENVFILE $HOBBITCLIENTHOME/etc/hobbitclient.cfg | ||
+ | CMD $HOBBITCLIENTHOME/ext/nb-media.sh | ||
+ | LOGFILE $HOBBITCLIENTHOME/logs/nb-media.log | ||
+ | INTERVAL 5m | ||
+ | |||
+ | [nblog] | ||
+ | ENVFILE $HOBBITCLIENTHOME/etc/hobbitclient.cfg | ||
+ | CMD $HOBBITCLIENTHOME/ext/nb.sh | ||
+ | LOGFILE $HOBBITCLIENTHOME/logs/nb.log | ||
+ | INTERVAL 5m | ||
+ | </code> | ||
+ | |||
+ | ==== Server Configuration ==== | ||
+ | |||
+ | None. | ||
+ | |||
+ | ===== Source ===== | ||
+ | |||
+ | ==== nb.sh ==== | ||
+ | <hidden onHidden="Show Code ⇲" onVisible="Hide Code ⇱"> | ||
+ | <code bash> | ||
+ | #!/bin/bash | ||
+ | # Netbackup messages monitoring script | ||
+ | # Gianluca Rossi - grossi@acantho.net | ||
+ | |||
+ | # TEST NAME: THIS WILL BECOME A COLUMN ON THE DISPLAY | ||
+ | # IT SHOULD BE AS SHORT AS POSSIBLE TO SAVE SPACE... | ||
+ | # NOTE YOU CAN ALSO CREATE A HELP FILE FOR YOUR TEST | ||
+ | # WHICH SHOULD BE PUT IN www/help/$TEST.html. IT WILL | ||
+ | # BE LINKED INTO THE DISPLAY AUTOMATICALLY. | ||
+ | # | ||
+ | TEST="nbackup" | ||
+ | |||
+ | # | ||
+ | # BBHOME CAN BE SET MANUALLY WHEN TESTING. | ||
+ | # OTHERWISE IT SHOULD BE SET FROM THE BB ENVIRONMENT | ||
+ | # | ||
+ | # BBHOME=/home/sean/bb; export BBHOME # FOR TESTING | ||
+ | |||
+ | if test "$BBHOME" = "" | ||
+ | then | ||
+ | echo "BBHOME is not set... exiting" | ||
+ | exit 1 | ||
+ | fi | ||
+ | |||
+ | if test ! "$BBTMP" # GET DEFINITIONS IF NEEDED | ||
+ | then | ||
+ | # echo "*** LOADING BBDEF ***" | ||
+ | . $BBHOME/etc/bbdef.sh # INCLUDE STANDARD DEFINITIONS | ||
+ | fi | ||
+ | |||
+ | |||
+ | # | ||
+ | # include Netbackup configuration | ||
+ | # | ||
+ | . $BBHOME/ext/nb.cfg | ||
+ | |||
+ | |||
+ | WARNING="`$BPERROR -U -s WARNING -hoursago 1 2>/dev/null | sed '1d'| head -10`" | ||
+ | ERROR="`$BPERROR -U -s ERROR -hoursago 1 2>/dev/null | sed '1d'| head -30`" | ||
+ | CRITICAL="`$BPERROR -U -s CRITICAL -hoursago 1 2>/dev/null | sed '1d'| head -30`" | ||
+ | INFO="`$BPERROR -U -s INFO -hoursago 1 2>/dev/null | sed '1d' | head -10`" | ||
+ | LINE="" | ||
+ | |||
+ | COLOR="green" | ||
+ | STATUS="Netbackup OK" | ||
+ | |||
+ | if [ ! -z "$INFO" ] | ||
+ | then | ||
+ | LINE=`printf "\n - - - INFO MESSAGES- - -\n${INFO}\n${LINE}"` | ||
+ | fi | ||
+ | |||
+ | if [ ! -z "$WARNING" ] | ||
+ | then | ||
+ | LINE=`printf "\n - - - WARNING MESSAGES - - -\n${WARNING}\n${LINE}"` | ||
+ | COLOR="yellow" | ||
+ | STATUS="Netbackup - Warning" | ||
+ | fi | ||
+ | |||
+ | if [ ! -z "$ERROR" ] | ||
+ | then | ||
+ | LINE=`printf "\n - - - ERROR MESSAGES - - -\n${ERROR}\n${LINE}"` | ||
+ | COLOR="red" | ||
+ | STATUS="Netbackup - Error" | ||
+ | fi | ||
+ | |||
+ | |||
+ | if [ ! -z "$CRITICAL" ] | ||
+ | then | ||
+ | LINE=`printf "\n - - - CRITICAL MESSAGES - - -\n${CRITICAL}\n${LINE}"` | ||
+ | COLOR="red" | ||
+ | STATUS="Netbackup - Critical" | ||
+ | fi | ||
+ | |||
+ | |||
+ | |||
+ | # | ||
+ | # AT THIS POINT WE HAVE OUR RESULTS. NOW WE HAVE TO SEND IT TO | ||
+ | # THE BBDISPLAY TO BE DISPLAYED... | ||
+ | # | ||
+ | $BB $BBDISP "status $MACHINE.$TEST $COLOR `date` - $STATUS $LINE" | ||
+ | |||
+ | </code> | ||
+ | </hidden> | ||
+ | |||
+ | ==== nb-media.sh ==== | ||
+ | <hidden onHidden="Show Code ⇲" onVisible="Hide Code ⇱"> | ||
+ | <code bash> | ||
+ | #!/bin/bash | ||
+ | # | ||
+ | # Veritas NetBackup hobbit plugin | ||
+ | # Gianluca Rossi - grossi@acantho.net | ||
+ | # 1 - check available media | ||
+ | # 2 - check DOWNed drive | ||
+ | # 3 - check FROZEN media | ||
+ | # | ||
+ | # TEST NAME: THIS WILL BECOME A COLUMN ON THE DISPLAY | ||
+ | # IT SHOULD BE AS SHORT AS POSSIBLE TO SAVE SPACE... | ||
+ | # NOTE YOU CAN ALSO CREATE A HELP FILE FOR YOUR TEST | ||
+ | # WHICH SHOULD BE PUT IN www/help/$TEST.html. IT WILL | ||
+ | # BE LINKED INTO THE DISPLAY AUTOMATICALLY. | ||
+ | # | ||
+ | TEST="nbmedia" | ||
+ | |||
+ | # | ||
+ | # BBHOME CAN BE SET MANUALLY WHEN TESTING. | ||
+ | # OTHERWISE IT SHOULD BE SET FROM THE BB ENVIRONMENT | ||
+ | # | ||
+ | # BBHOME=/home/sean/bb; export BBHOME # FOR TESTING | ||
+ | |||
+ | if test "$BBHOME" = "" | ||
+ | then | ||
+ | echo "BBHOME is not set... exiting" | ||
+ | exit 1 | ||
+ | fi | ||
+ | |||
+ | if test ! "$BBTMP" # GET DEFINITIONS IF NEEDED | ||
+ | then | ||
+ | # echo "*** LOADING BBDEF ***" | ||
+ | . $BBHOME/etc/bbdef.sh # INCLUDE STANDARD DEFINITIONS | ||
+ | fi | ||
+ | |||
+ | # | ||
+ | # include Netbackup configuration | ||
+ | # | ||
+ | . $BBHOME/ext/nb.cfg | ||
+ | |||
+ | |||
+ | LINE=" | ||
+ | " | ||
+ | ERR=0 | ||
+ | #set -x | ||
+ | # | ||
+ | # how many media are available on the scratch pool? | ||
+ | # | ||
+ | $VMQUERY -pn $SCRATCH_POOL | egrep 'robot number:' | sort >$BBTMP/vmquery.$$ | ||
+ | uniq < $BBTMP/vmquery.$$ > $BBTMP/vmqueryuniq.$$ | ||
+ | while read line | ||
+ | do | ||
+ | set $line | ||
+ | ROBOT_NUMBER=$3 | ||
+ | NUM_MEDIA=`grep $ROBOT_NUMBER $BBTMP/vmquery.$$ | wc -l` | ||
+ | COLOR="green" | ||
+ | if [ $NUM_MEDIA -lt $WARN_MEDIA ] ; then | ||
+ | ERR=`expr $ERR + 1` | ||
+ | COLOR="yellow" | ||
+ | fi | ||
+ | if [ $NUM_MEDIA -lt $ERR_MEDIA ] ; then | ||
+ | ERR=1000 | ||
+ | COLOR="red" | ||
+ | fi | ||
+ | LINE="${LINE} | ||
+ | &${COLOR}${NUM_MEDIA} Media available on robot $ROBOT_NUMBER" | ||
+ | done < $BBTMP/vmqueryuniq.$$ | ||
+ | rm -f $BBTMP/vmquery.$$ $BBTMP/vmqueryuniq.$$ | ||
+ | |||
+ | |||
+ | # | ||
+ | # check for DOWN drive | ||
+ | # | ||
+ | $VMDAREQ -a >$BBTMP/vmdareq.$$ | ||
+ | # | ||
+ | NUMDRIVE=`cat $BBTMP/vmdareq.$$ | egrep '^[0-9a-zA-Z]' | wc -l` | ||
+ | NUMDOWN=`cat $BBTMP/vmdareq.$$ | grep 'DOWN$' | wc -l` | ||
+ | VMSTATUS=`cat $BBTMP/vmdareq.$$ | egrep '^[0-9a-zA-Z]|DOWN$'` | ||
+ | if [ $NUMDOWN -gt $WARN_DOWN ]; then | ||
+ | ERR=`expr $ERR + 1` | ||
+ | COLOR="yellow" | ||
+ | fi | ||
+ | if [ $NUMDOWN -gt $ERR_DOWN ]; then | ||
+ | ERR=1000 | ||
+ | COLOR="red" | ||
+ | fi | ||
+ | |||
+ | LINE="${LINE} | ||
+ | &${COLOR}${NUMDRIVE} Total drive. ${NUMDOWN} are down. | ||
+ | ${VMSTATUS} | ||
+ | " | ||
+ | rm -f $BBTMP/vmdareq.$$ | ||
+ | |||
+ | |||
+ | # | ||
+ | # check for frozen media on every media server | ||
+ | # | ||
+ | for host in $MEDIA_SERVERS | ||
+ | do | ||
+ | $BPMEDIA -summary -h $host | sed '1d' | head -6 > $BBTMP/media.$$ | ||
+ | FROZEN=`cat $BBTMP/media.$$ | tail -1 | awk '{ print $4}'` | ||
+ | COLOR="green" | ||
+ | if [ $FROZEN -gt $WARN_FROZEN ] ; then | ||
+ | COLOR="yellow" | ||
+ | ERR=`expr $ERR + 1` | ||
+ | fi | ||
+ | if [ $FROZEN -gt $ERR_FROZEN ] ; then | ||
+ | COLOR="red" | ||
+ | ERR=1000 | ||
+ | fi | ||
+ | O=`tail +2 $BBTMP/media.$$ | sed "s/^MEDIA/\&$COLOR/"` | ||
+ | LINE="${LINE} | ||
+ | ******************************************************************************* | ||
+ | $O" | ||
+ | done | ||
+ | |||
+ | rm -f $BBTMP/media.$$ | ||
+ | |||
+ | if [ $ERR -ge 1000 ] ; then | ||
+ | ERR="red" | ||
+ | STATUS="Media Error" | ||
+ | elif [ "$ERR" -gt 0 ] ; then | ||
+ | ERR="yellow" | ||
+ | STATUS="Media Warning" | ||
+ | else | ||
+ | ERR="green" | ||
+ | STATUS="Media OK" | ||
+ | fi | ||
+ | |||
+ | |||
+ | # | ||
+ | # AT THIS POINT WE HAVE OUR RESULTS. NOW WE HAVE TO SEND IT TO | ||
+ | # THE BBDISPLAY TO BE DISPLAYED... | ||
+ | # | ||
+ | $BB $BBDISP "status $MACHINE.$TEST $ERR `date` - $STATUS | ||
+ | $LINE" | ||
+ | #echo $BBDISP "status $MACHINE.$TEST $ERR `date` - $STATUS $LINE" | ||
+ | </code> | ||
+ | </hidden> | ||
+ | |||
+ | ==== nb.cfg ==== | ||
+ | <hidden onHidden="Show Code ⇲" onVisible="Hide Code ⇱"> | ||
+ | <code bash> | ||
+ | # NetBackup plugin configuration | ||
+ | # | ||
+ | MASTER_SERVER="back-ser" | ||
+ | MEDIA_SERVERS="back-ser mediasrv0 mediasrv1" | ||
+ | |||
+ | # change following commands to the correct path | ||
+ | SUDO="/usr/local/bin/sudo" | ||
+ | BPERROR="$SUDO /opt/openv/netbackup/bin/admincmd/bperror" | ||
+ | VMQUERY="$SUDO /opt/openv/volmgr/bin/vmquery" | ||
+ | BPMEDIA="$SUDO /opt/openv/netbackup/bin/admincmd/bpmedialist" | ||
+ | VMDAREQ="$SUDO /usr/openv/volmgr/bin/vmdareq" | ||
+ | BPDBJOBS="$SUDO /usr/openv/netbackup/bin/admincmd/bpdbjobs" | ||
+ | |||
+ | # | ||
+ | # minimun number of media available on the scratch poll | ||
+ | # | ||
+ | WARN_MEDIA=7 | ||
+ | ERR_MEDIA=5 | ||
+ | |||
+ | # name of the scratch poll | ||
+ | SCRATCH_POOL=Scratch_pool | ||
+ | |||
+ | # alert if there are too many frozen media | ||
+ | WARN_FROZEN=5 | ||
+ | ERR_FROZEN=7 | ||
+ | |||
+ | # alert in there are DOWN drive | ||
+ | ERR_DOWN=2 | ||
+ | WARN_DOWN=1 | ||
+ | |||
+ | # | ||
+ | # not yet used | ||
+ | # | ||
+ | IGNORE_CODES="0 150" | ||
+ | WARNING_CODES="1" | ||
+ | </code> | ||
+ | </hidden> | ||
+ | |||
+ | ===== Known Bugs and Issues ===== | ||
+ | |||
+ | * No known bugs | ||
+ | |||
+ | ===== To Do ===== | ||
+ | |||
+ | ===== Credits ===== | ||
+ | |||
+ | ===== Changelog ===== | ||
+ | |||
+ | * **2007-02-15** | ||
+ | * Initial release | ||