Jobs
job-ID prior name user state submit/start at queue slots ja-task-ID
-----------------------------------------------------------------------------------------------------------------
4839010 0.60000 ovl_p.huma bwalenz r 06/02/2009 13:18:39 medium.q@dell-1-0-6.tigr.org 2 10
4839988 0.59902 ovl_d.anan bwalenz r 06/02/2009 15:32:37 medium.q@dell-1-0-6.tigr.org 2 17
4839988 0.59902 ovl_d.anan bwalenz r 06/02/2009 15:37:25 medium.q@dell-1-0-6.tigr.org 2 21
4835225 0.50000 blastp djohnson r 06/02/2009 09:56:29 default.q@dell-1-0-6.tigr.org 1
4835351 0.50000 blastp djohnson r 06/02/2009 10:11:21 default.q@dell-1-0-6.tigr.org 1
4835471 0.50000 blastp djohnson r 06/02/2009 11:11:05 default.q@dell-1-0-6.tigr.org 1
4835507 0.50000 blastp djohnson r 06/02/2009 11:16:08 default.q@dell-1-0-6.tigr.org 1
Host
HOSTNAME ARCH NCPU LOAD MEMTOT MEMUSE SWAPTO SWAPUS
-------------------------------------------------------------------------------
dell-1-0-6 lx26-eon64 8 11.60 15.7G 6.5G 16.0G 51.2M
Queue Instance Status Report
yellow default.q@dell-1-0-6 is in ALARM
green fast.q@dell-1-0-6 is OK
green medium.q@dell-1-0-6 is OK
green troubleshooting.q@dell-1-0-6 is OK
===== Installation =====
=== Client side ===
Edit sge.sh and set SGEBIN path.
Add this to clientlaunch.cfg :
[sge]
ENVFILE $HOBBITCLIENTHOME/etc/hobbitclient.cfg
CMD $HOBBITCLIENTHOME/ext/sge.sh
LOGFILE $HOBBITCLIENTHOME/logs/sge.log
INTERVAL 5m
=== Server side ===
===== Source =====
==== sge.sh ====
#!/bin/sh
#
# SGE: Sun Grid Engine check - BB external script test
#
##### Purpose is to report back to a central server, all Sun
##### Grid Engine software faults.
#####
#
# version 0.3
#
# BIG BROTHER / XXXXXXXXXXXXXXXX status
#
# Written by Butch Deal
# Daniel Gomez
#
# v0.3e 10/14/08 cut down on the number of qhost runs
# v0.3d 03/31/06 added alarm/suspend state identification
# v0.3c 03/01/06 propogated yellow state upon UNAVAILABLE queue intances
# v0.3b 01/31/06 fixed yellow warning queue status for ambigious config test
# v0.3a 01/31/06 added unknown queue status and ambigious config test
# v0.3 01/26/06 fixed status reporting and optimized job status
# v0.2 08/03/05 flag disabled queues as clear
# v0.1 07/28/05 authored
########################################
# NOTE
# This has been tested with BB 1.9e and Xymon 4.2.x
#
# The color status with respects to queue status is arbitrary and should be
# reviewed for your particular environment.
#
# Tested on :
# Solaris & Linux
########################################
########################################
# INSTALLATION
# step 1 - update bb-bbexttab to include this script
#
# step 4 - restart Big Brother
#
# NOTE - the TEST variable in the configuration section, this is the name used
# as the column header.
########################################
##################################
# CONFIGURE IT HERE
##################################
TEST="sge"
BBPROG="$0"; export BBPROG
SGEBIN=/usr/local/bin
QSTAT=${SGEBIN}/qstat
QHOST=${SGEBIN}/qhost
QSELECT=${SGEBIN}/qselect
export SGEBIN QSTAT QHOST QSELECT
# define colours for graphics
# Comment these out if using older BB versions
CLEAR_PIC="&clear"
RED_PIC="&red"
YELLOW_PIC="&yellow"
GREEN_PIC="&green"
UNKNOWN_PIC="&purple"
##################################
# Start of script
##################################
BBHOME="/var/BB/bb"; export BBHOME
if test ! "$BBHOME"
then
echo "template: BBHOME is not set"
exit 1
fi
if test ! -d "$BBHOME"
then
echo "template: BBHOME is invalid"
exit 1
fi
if test ! "$BBTMP" # GET DEFINITIONS IF NEEDED
then
# echo "*** LOADING BBDEF ***"
. $BBHOME/etc/bbdef.sh # INCLUDE STANDARD DEFINITIONS
fi
get_header()
{
echo ""
#echo "$1 ($2)
"
echo "$1
"
# If you do not want the header in a bigger font use line below instead
#echo "$1 ($2)"
# If you want the "Paul Luzzi" look uncomment this section and comment
# out the above sections:
#echo "
"
#echo "============== $1 =============="
#echo "--- ($2) ---"
#echo "
"
#echo ""
}
get_header_small()
{
echo ""
#echo "$1 ($2)
"
echo "$1
"
# If you do not want the header in a bigger font use line below instead
# echo "$1 ($2)"
# If you want the "Paul Luzzi" look uncomment this section and comment
# out the above sections:
#echo "
"
#echo "============== $1 =============="
#echo "--- ($2) ---"
#echo "
"
#echo ""
}
get_footer()
{
echo ""
# If you want the "Paul Luzzi" look uncomment this section and comment
# out the above sections:
#echo "
"
}
#####
##### Get Status proc - used to get all responses
#####
get_status()
{
#####
##### Setup some variables for use later
#####
COLOR="green"
# Check defaults have been set
if [ "$SGEBIN" = "" ]; then
SGEBIN=/usr/local/bin
echo ""
echo "$YELLOW_PIC SGEBIN command is not defined in etc/bbsys.local - using default: $SGEBIN"
fi
if [ "$QSTAT" = "" ]; then
QSTAT=${SGEBIN}/qstat
echo ""
echo "$YELLOW_PIC QSTAT command is not defined in etc/bbsys.local - using default: $QSTAT"
fi
if [ "$QHOST" = "" ]; then
QHOST=${SGEBIN}/qhost
echo ""
echo "$YELLOW_PIC QHOST command is not defined in etc/bbsys.local - using default: $QHOST"
fi
###
### Check the jobs
###
get_header "Jobs" "$QSTAT -l hostname=$MACHINEDOTS"
jobs=`${QSTAT} -l hostname=${MACHINEDOTS} -s r`
if [ -z "$jobs" ]; then
echo "No Running Jobs"
else
${QSTAT} -l hostname=${MACHINEDOTS} -s r
fi
get_footer
###
### Check the host
###
get_header "Host" "$METAHS -i"
${QHOST} -h ${MACHINEDOTS} | ${GREP} -v "global"
get_footer
###
### Identify queue memberships
###
#get_header "Queue Membership" "$QHOST -q"
#${QHOST} -h ${MACHINEDOTS} -q | ${TAIL} +5
#get_footer
###
### Check queue instance states
###
queueTriggered=false;
# Queueset=`${QHOST} -h ${MACHINEDOTS} -q | ${TAIL} +5 | ${AWK} '{ print $1 }'`
${QHOST} -h ${MACHINEDOTS} -q | ${TAIL} +5 > $BBTMP/$MACHINE.$TEST.QSTATE
Queueset=`cat $BBTMP/$MACHINE.$TEST.QSTATE | ${AWK} '{ print $1 }'`
for Qset in $Queueset; do
# qstate=`${QHOST} -q -h ${MACHINEDOTS} | ${TAIL} +5 | $GREP " $Qset" | $AWK '{print $4}'`
qstate=`cat $BBTMP/$MACHINE.$TEST.QSTATE | $GREP " $Qset" | $AWK '{print $4}'`
# Order determines more significant color status
if [ "`echo $qstate | $GREP -c d`" != "0" ]; then
COLOR="clear"
queueMsg=`echo "$queueMsg
$CLEAR_PIC $Qset@$HOST is DISABLED"`
queueTriggered=true;
elif [ "`echo $qstate | $GREP -c E`" != "0" ]; then
COLOR="red"
queueMsg=`echo "$queueMsg
$RED_PIC $Qset@$HOST is in ERROR!"`
queueTriggered=true;
elif [ "`echo $qstate | $GREP -c c`" != "0" ]; then
COLOR="yellow"
queueMsg=`echo "$queueMsg
$YELLOW_PIC $Qset@$HOST has an ambigious configuration!"`
queueTriggered=true;
elif [ "`echo $qstate | $GREP -c a`" != "0" ] || \
[ "`echo $qstate | $GREP -c A`" != "0" ]; then
queueMsg=`echo "$queueMsg
$YELLOW_PIC $Qset@$HOST is in ALARM"`
elif [ "`echo $qstate | $GREP -c s`" != "0" ] || \
[ "`echo $qstate | $GREP -c S`" != "0" ]; then
queueMsg=`echo "$queueMsg
$YELLOW_PIC $Qset@$HOST is SUSPENDED"`
elif [ "`echo $qstate | $GREP -c u`" != "0" ]; then
COLOR="yellow"
queueMsg=`echo "$queueMsg
$YELLOW_PIC $Qset@$HOST is UNAVAILABLE!"`
queueTriggered=true;
elif [ "$qstate" = "" ]; then
queueMsg=`echo "$queueMsg
$GREEN_PIC $Qset@$HOST is OK"`
else
queueMsg=`echo "$queueMsg
$UNKNOWN_PIC $Qset@$HOST is UNKNOWN"`
queueTriggered=true;
fi
done
if [ -f $BBTMP/$MACHINE.$TEST.QSTATE ]; then
$RM $BBTMP/$MACHINE.$TEST.QSTATE
fi
get_header "Queue Instance Status Report"
echo "$queueMsg"
get_footer
#####
##### Make sure to export COLOR so that it gets back to "central"
#####
export COLOR
#####
##### End of get_status proc
#####
}
#####
##### Main body
#####
get_status > $BBTMP/$MACHINE.$TEST
# NOW USE THE BB COMMAND TO SEND THE DATA ACROSS
$BB $BBDISP "status $MACHINE.$TEST $COLOR `$DATE` `$CAT $BBTMP/$MACHINE.$TEST`"
#For testing only
# echo $BB $BBDISP "status $BBTMP/$MACHINE.$TEST $COLOR `$DATE` `$CAT $BBTMP/$MACHINE.$TEST` ">/tmp/qtmp
# Clean up our mess
# Checking for existence of each file since the whole test may be optional
# and may not actually run on every client
#
if [ -f $BBTMP/$MACHINE.$TEST ]; then
$RM $BBTMP/$MACHINE.$TEST
fi
##############################################
# end of script
##############################################