no way to compare when less than two revisions
Differences
This shows you the differences between two versions of the page.
— | monitors:diskstat.ksh [2010/10/12 03:43] (current) – created - external edit 127.0.0.1 | ||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== DiskStat ====== | ||
+ | ^ Author | [[ everett.vernon@gmail.com | Vernon Everett ]] | | ||
+ | ^ Compatibility | Tested on Solaris 10 | | ||
+ | ^ Requirements | Nothing special | | ||
+ | ^ Download | None | | ||
+ | ^ Last Update | 2010-09-21 | | ||
+ | ===== Description ===== | ||
+ | Graphs of iostat output designed to appear on the trends page. | ||
+ | Really useful to see what disks are being hit hard, and getting an idea of where your bottlenecks are. | ||
+ | |||
+ | I called it diskstat, instead of iostat, for two reasons. | ||
+ | |||
+ | 1. There was already an iostat graph definition, and I had no idea what it was for | ||
+ | |||
+ | 2. Since it appears in the trends, it really makes no difference what it's called. | ||
+ | |||
+ | |||
+ | By default, it ignores NFS disks, but you can change that with the following in the appropriate section of clientlocal.cfg (or just hack the code) | ||
+ | DISKSTAT: | ||
+ | |||
+ | | ||
+ | |||
+ | ===== Installation ===== | ||
+ | === Client side === | ||
+ | 1. Copy diskstat.ksh to ~$HOME/ | ||
+ | |||
+ | 2. Edit the client/ | ||
+ | [diskstat] | ||
+ | ENVFILE $HOBBITCLIENTHOME/ | ||
+ | CMD $HOBBITCLIENTHOME/ | ||
+ | LOGFILE $HOBBITCLIENTHOME/ | ||
+ | INTERVAL 5m | ||
+ | |||
+ | === Server side === | ||
+ | 3. Add this to TEST2RRD= in hobbitserver.cfg | ||
+ | diskstat-reads=ncv, | ||
+ | |||
+ | 4. Add this to GRAPHS= in hobbitserver.cfg | ||
+ | diskstat-reads:: | ||
+ | # ::7 indicated number of lines per graph. (Default 4) Flavour to taste | ||
+ | |||
+ | 5. Add this to hobbitserver.cfg | ||
+ | SPLITNCV_diskstat-pb=" | ||
+ | SPLITNCV_diskstat-reads=" | ||
+ | SPLITNCV_diskstat-writes=" | ||
+ | SPLITNCV_diskstat-kreads=" | ||
+ | SPLITNCV_diskstat-kwrites=" | ||
+ | SPLITNCV_diskstat-wait=" | ||
+ | SPLITNCV_diskstat-actv=" | ||
+ | SPLITNCV_diskstat-wsvc=" | ||
+ | SPLITNCV_diskstat-svct=" | ||
+ | SPLITNCV_diskstat-pw=" | ||
+ | |||
+ | 6. Add this hobbitgraph.cfg | ||
+ | [diskstat-reads] | ||
+ | FNPATTERN diskstat-reads, | ||
+ | TITLE Disk Reads per Second | ||
+ | YAXIS Reads | ||
+ | -l 0 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | | ||
+ | [diskstat-writes] | ||
+ | FNPATTERN diskstat-writes, | ||
+ | TITLE Disk Writes per Second | ||
+ | YAXIS Writes | ||
+ | -l 0 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | | ||
+ | [diskstat-kreads] | ||
+ | FNPATTERN diskstat-kreads, | ||
+ | TITLE Disk Reads per Second in Kb | ||
+ | YAXIS Kb | ||
+ | -l 0 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | | ||
+ | [diskstat-kwrites] | ||
+ | FNPATTERN diskstat-writes, | ||
+ | TITLE Disk Writes per Second in Kb | ||
+ | YAXIS Kb | ||
+ | -l 0 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | | ||
+ | [diskstat-wait] | ||
+ | FNPATTERN diskstat-wait, | ||
+ | TITLE Average Number of Transactions Waiting | ||
+ | YAXIS Total | ||
+ | -l 0 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | | ||
+ | [diskstat-actv] | ||
+ | FNPATTERN diskstat-actv, | ||
+ | TITLE Average Number of Transactions Active | ||
+ | YAXIS Total | ||
+ | -l 0 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | | ||
+ | [diskstat-svct] | ||
+ | FNPATTERN diskstat-svct, | ||
+ | TITLE Average Response Time of Transaction | ||
+ | YAXIS Milliseconds | ||
+ | -l 0 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | | ||
+ | [diskstat-wsvc] | ||
+ | FNPATTERN diskstat-wsvc, | ||
+ | TITLE Average Number of Transactions Waiting | ||
+ | YAXIS Total | ||
+ | -l 0 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | | ||
+ | [diskstat-pw] | ||
+ | FNPATTERN diskstat-pw, | ||
+ | TITLE Percent of Time Waiting | ||
+ | YAXIS % | ||
+ | -l 0 | ||
+ | -u 100 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | | ||
+ | [diskstat-pb] | ||
+ | FNPATTERN diskstat-pb, | ||
+ | TITLE Percent of Time Disk Busy | ||
+ | YAXIS % | ||
+ | -l 0 | ||
+ | -u 100 | ||
+ | DEF: | ||
+ | LINE2: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | GPRINT: | ||
+ | |||
+ | |||
+ | |||
+ | ===== Source ===== | ||
+ | ==== diskstat.ksh ==== | ||
+ | |||
+ | <hidden onHidden=" | ||
+ | < | ||
+ | #!/bin/ksh | ||
+ | TEMPFILE=$BBTMP/ | ||
+ | SHOW_NFS=no | ||
+ | # DISKSTAT: | ||
+ | DURATION=10 | ||
+ | # This can be updated in the same way as above | ||
+ | |||
+ | # Now we redefine some variables, if they are set in clientlocal | ||
+ | LOGFETCH=${BBTMP}/ | ||
+ | if [ -f $LOGFETCH ] | ||
+ | then | ||
+ | grep " | ||
+ | | while read NEW_DEF | ||
+ | do | ||
+ | $NEW_DEF | ||
+ | done | ||
+ | fi | ||
+ | |||
+ | > $TEMPFILE | ||
+ | / | ||
+ | # We have to collect 2 sets, because the first set is the average since boot. | ||
+ | |||
+ | # Define where the second set of data starts | ||
+ | LINE=$(cat $TEMPFILE.raw | grep -n ", | ||
+ | # take the second set, and massage it into usable data | ||
+ | cat $TEMPFILE.raw | awk " | ||
+ | | sed "s/,/ /g" \ | ||
+ | | awk '{ print $NF" "$0 }' \ | ||
+ | | awk '{ $NF=""; | ||
+ | rm $TEMPFILE.raw | ||
+ | count=1 | ||
+ | # Now we format the data and send it off to the server | ||
+ | for subtest in reads writes kreads kwrites wait actv wsvc svct pw pb | ||
+ | do | ||
+ | | ||
+ | echo "" | ||
+ | cat $TEMPFILE.data | cut -d" " -f1,$count \ | ||
+ | | while read DEVICE VAL | ||
+ | do | ||
+ | echo " | ||
+ | if [ $? -eq 0 -a " | ||
+ | then | ||
+ | break | ||
+ | else | ||
+ | DEVICE=$(echo $DEVICE | tr : - ) | ||
+ | fi | ||
+ | echo " | ||
+ | done | ||
+ | echo "" | ||
+ | $BB $BBDISP "data $MACHINE.diskstat-${subtest} $(echo; cat $TEMPFILE ;echo "" | ||
+ | # Without the last echo " | ||
+ | # Odd really, but that seems to fix it. | ||
+ | rm $TEMPFILE | ||
+ | done | ||
+ | rm $TEMPFILE.data | ||
+ | |||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== Known Bugs and Issues ===== | ||
+ | 2010-09-21 - Found and fixed a bug. (Left out the wsvc_t stat.) | ||
+ | |||
+ | |||
+ | All bugs are currently unknown. | ||
+ | |||
+ | If you find any, let me know, and I will see what I can do to fix them. | ||
+ | |||
+ | ===== To Do ===== | ||
+ | Was toying with the idea of having some of the values appear as alerts, with standard red/ | ||
+ | |||
+ | Might be useful to watch the average service time? | ||
+ | |||
+ | However, to be of concern, high iostat figures need to be sustained. Disk usage is expected to peak from time to time, so is it really suitable for alerts? | ||
+ | And even if it does peak, sustained, what exactly can you do about it? | ||
+ | |||
+ | Your comments on the back of $100 bills only. | ||
+ | |||
+ | ===== Credits ===== | ||
+ | This all started because a piece of software is crashing on one of my servers every month or so. The application admin is blaming me (and my server) | ||
+ | |||
+ | I said it's not the server, but after some constructive googling, I found a link which hinted that it might be disk performance. | ||
+ | |||
+ | I decided to monitor disk performance, | ||
+ | |||
+ | So all credit for this goes to really poorly written mail server that doesn' | ||
+ | ===== Changelog ===== | ||
+ | |||
+ | * **2010-09-09** | ||
+ | * Initial release | ||
+ | |||
+ | * **2010-09-21** | ||
+ | * Fairly major bug fix. (Left out the wsvc_t stats) |