no way to compare when less than two revisions
Differences
This shows you the differences between two versions of the page.
Next revision | |||
— | addons:remrrds [2020/01/28 11:27] – created wnelis | ||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== REMove_RRD_Spikes ====== | ||
+ | |||
+ | ^ Author | [[ wim.nelis@ziggo.nl | Wim Nelis ]] | | ||
+ | ^ Compatibility | Xymon 4.2 | | ||
+ | ^ Requirements | Perl | | ||
+ | ^ Download | None | | ||
+ | ^ Last Update | 2020-01-28 | | ||
+ | |||
+ | ===== Description ===== | ||
+ | |||
+ | Perl script remrrds.pl is yet another script to remove spikes from an Round Robin Database (RRD). Hence it's name: remrrds is short for REMove_Round_Robin_Database_Spikes. (Some people may recognize the naming convention of NOS/VE.) | ||
+ | |||
+ | A unique feature of this script is, as far as I know, that if a spike is located, that measurement is removed completely, that is replaced by value ' | ||
+ | |||
+ | Additionally, | ||
+ | |||
+ | The script also supports removing all measurements before a given time stamp. This feature is included, as extending an RRD with another level of aggregation, | ||
+ | |||
+ | If an RRD is modified, the original RRD is still available. However, it's name has an additional extension ' | ||
+ | |||
+ | ===== Installation ===== | ||
+ | |||
+ | The installation is very site specific. Copy the script to a convenient location. It might be necessary to adjust the names of the temporary files which are defined at the beginning of the script. Those files will hold an XML representation of the RRD. | ||
+ | |||
+ | ===== Source ===== | ||
+ | |||
+ | <hidden onHidden=" | ||
+ | < | ||
+ | # | ||
+ | # | ||
+ | # remrrds, REMove_Round_Robin_Database_Spikes: | ||
+ | # This script removes spikes in an RRD. The lower bound and/or the upper bound | ||
+ | # for the legal values, that is the values to retain, can be specified. If any | ||
+ | # number in a row exceeds the boundaries, all values in that row will be set to | ||
+ | # ' | ||
+ | # | ||
+ | # This script is based on script removespikes.pl, | ||
+ | # are outside the range of common values, which cover 0.99 of all values. | ||
+ | # | ||
+ | # An essential extension on the original script is that in the analysis phase | ||
+ | # also the consolidation functions are considered. A lower bound is applied to | ||
+ | # the RRA using consolidation function MIN, and any row to be deleted is also | ||
+ | # deleted in the RRA's with another consolidation function. The same goes for | ||
+ | # the upper bound and the RRA using consolidation function MAX. | ||
+ | # | ||
+ | # Another extension is the possibility to define one or more names of data sets. | ||
+ | # Only those data sets will be checked, the others will be skipped. By default | ||
+ | # all data sets will be checked. | ||
+ | # | ||
+ | # remrrds.pl [-dhv] [-l <X>] [-u <X>] [-s <N>] [-b <T>] < | ||
+ | # | ||
+ | # Written by W.J.M. Nelis, wim.nelis@ziggo.nl, | ||
+ | # | ||
+ | # To do: | ||
+ | # - Check size of newly created RRD, and restore old one if the new one is | ||
+ | # | ||
+ | # | ||
+ | use strict ; | ||
+ | use Getopt::Std ; | ||
+ | |||
+ | # Command line parameters. | ||
+ | # | ||
+ | my %opt= () ; # All command line parameters | ||
+ | my $DEBUG | ||
+ | my $VERBOSE= undef ; # Flag: verbose mode | ||
+ | my $LOWTIM = undef ; # Time before which all rows are deleted | ||
+ | my $LOWBND = undef ; # Lower bound of correct values | ||
+ | my $UPPBND = undef ; # Upper bound of correct values | ||
+ | my @DATASET= | ||
+ | my $RRDFIL = undef ; # Name of RRD file | ||
+ | |||
+ | # | ||
+ | # Define the names of the temporary files containing the XML version of the RRD. | ||
+ | # | ||
+ | my $XMLOLD= "/ | ||
+ | my $XMLNEW= "/ | ||
+ | |||
+ | # | ||
+ | # In case multiple consolidation functions are used in the RRD, define the | ||
+ | # preferred list of consolidated data to look for out-of-bounds values. Note | ||
+ | # that it is not trivial to determine the preference of consolidation function | ||
+ | # LAST, as it's average value is AVERAGE but it's fluctuations will be larger. | ||
+ | # | ||
+ | my @pcfLOW= ( ' | ||
+ | my @pcfUPP= ( ' | ||
+ | |||
+ | # Global variables. | ||
+ | # | ||
+ | my $RrdMod= 0 ; # Flag: RRD is modified | ||
+ | my @xml= () ; # Save area XML file | ||
+ | my @ds = () ; # List of data sets used in RRD | ||
+ | my %ds = () ; # List of data sets to check | ||
+ | my %cf = () ; # Consolidation functions used in RRD | ||
+ | my $lbcf= undef ; # CF to use for lower bound, if specified | ||
+ | my $ubcf= undef ; # CF to use for upper bound, if specified | ||
+ | |||
+ | |||
+ | # | ||
+ | # Function AnalyseRrd takes the XML dump of an RRD and determines the data sets | ||
+ | # (DS's) in use and the consolidation functions (CF's) in use. | ||
+ | # | ||
+ | sub AnalyseRrd() { | ||
+ | # | ||
+ | # Determine the list of data sets as well as the consolidation functions in use | ||
+ | # in this RRD. | ||
+ | # | ||
+ | @ds= map { $1 if m/< | ||
+ | %cf= map { $1 => 0 if m/< | ||
+ | |||
+ | # | ||
+ | # Build a list of indices of the data sets to check. If the data sets to check | ||
+ | # are not specified, using parameter -s, all data sets will be checked. | ||
+ | # | ||
+ | @DATASET= @ds unless @DATASET ; | ||
+ | my $ds= ' | ||
+ | for ( my $i= 0 ; $i < @ds ; $i++ ) { | ||
+ | next unless index($ds, | ||
+ | $ds{$i}= $ds[$i] ; | ||
+ | } # of for | ||
+ | |||
+ | # | ||
+ | # Determine the CF to use when checking the lower bound (if specified) and the | ||
+ | # upper bound (if specified). | ||
+ | # | ||
+ | $lbcf= ( map { exists $cf{$_} ? $_ : () } @pcfLOW )[0] ; | ||
+ | $ubcf= ( map { exists $cf{$_} ? $_ : () } @pcfUPP )[0] ; | ||
+ | } # of AnalyseRrd | ||
+ | |||
+ | # | ||
+ | # Function CrackParameters extracts all parameters from the command line. The | ||
+ | # function returns a true value upon successful completion. | ||
+ | # | ||
+ | sub CrackParameters() { | ||
+ | return 0 unless getopts( " | ||
+ | |||
+ | # Check for the flags in the command line. | ||
+ | ShowHelp(), exit if $opt{h} || ($#ARGV < 0) ; | ||
+ | if ( $opt{d} ) { | ||
+ | $DEBUG | ||
+ | $VERBOSE= 1 ; | ||
+ | print " | ||
+ | } # of if | ||
+ | if ( $opt{v} ) { | ||
+ | $VERBOSE= 1 ; | ||
+ | print " | ||
+ | } # of if | ||
+ | |||
+ | # Check for the parameters with an associated value in the command line. | ||
+ | if ( $opt{b} ) { | ||
+ | $LOWTIM= $opt{b} ; | ||
+ | print " Time boundary set to $LOWTIM\n" | ||
+ | } # of if | ||
+ | if ( $opt{l} ) { | ||
+ | $LOWBND= $opt{l} ; | ||
+ | print " Lower bound set to $LOWBND\n" | ||
+ | } # of if | ||
+ | if ( $opt{s} ) { | ||
+ | @DATASET= split( /,/, $opt{s} ) ; | ||
+ | for ( my $i= $#DATASET ; $i >= 0 ; $i-- ) { | ||
+ | splice( @DATASET, $i ), next if $DATASET[$i]=~ m/^\s*$/ ; | ||
+ | $DATASET[$i]=~ s/\s//g ; # Remove blank spaces | ||
+ | } # of for | ||
+ | if ( scalar(@DATASET) == 1 ) { | ||
+ | print " Data set to check is $DATASET[0]\n" | ||
+ | } else { | ||
+ | print " Data sets to check are (" . join( ',', | ||
+ | } # of else | ||
+ | } # of if | ||
+ | if ( $opt{u} ) { | ||
+ | $UPPBND= $opt{u} ; | ||
+ | print " Upper bound set to $UPPBND\n" | ||
+ | } # of if | ||
+ | |||
+ | # One parameter should be left, the name of the file containing the RRD. | ||
+ | if ( $#ARGV == 0 ) { | ||
+ | if ( substr($ARGV[0], | ||
+ | if ( -f $ARGV[0] ) { | ||
+ | $RRDFIL= $ARGV[0] ; # Save name of file | ||
+ | print " Source is $RRDFIL\n" | ||
+ | } # of if | ||
+ | } # of if | ||
+ | } # of if | ||
+ | |||
+ | # Check for parameter consistency. At least one boundary needs to be defined. | ||
+ | # The name of the RRD must be defined. | ||
+ | return 0 unless defined $LOWBND or defined $UPPBND or defined $LOWTIM ; | ||
+ | return 0 unless defined $RRDFIL ; | ||
+ | die " | ||
+ | return 1 # All is well | ||
+ | } # of CrackParameters | ||
+ | |||
+ | # | ||
+ | # Function DumpRrd dumps the content of an RRD into an XML file. As this XML | ||
+ | # file will be read multiple times, it reads the file and stores the lines in | ||
+ | # array @xml. | ||
+ | # | ||
+ | sub DumpRrd() { | ||
+ | print " Dumping $RRDFIL to an XML file...\n" | ||
+ | system( " | ||
+ | |||
+ | open ( FH, '<', | ||
+ | chomp( @xml= <FH> ) ; # Read entire file | ||
+ | close( FH ) ; | ||
+ | } # of DumpRrd | ||
+ | |||
+ | # | ||
+ | # Function InstallNewRrd takes the new version of the XML dump and creates an | ||
+ | # updated version of the RRD. The original one is renamed with extension ' | ||
+ | # | ||
+ | sub InstallNewRrd() { | ||
+ | my $RRDFILOLD= " | ||
+ | |||
+ | print " Installing new version of RRD...\n" | ||
+ | rename( $RRDFIL, $RRDFILOLD ) ; # Rename original RRD file | ||
+ | # | ||
+ | system( " | ||
+ | |||
+ | my($mode, | ||
+ | chmod $mode, $RRDFIL ; | ||
+ | chown $uid, $gid, $RRDFIL ; | ||
+ | } # of InstallNewRrd | ||
+ | |||
+ | # | ||
+ | # Function SaveXml writes the in-memory copy of the XML file to disk. | ||
+ | # | ||
+ | sub SaveXml() { | ||
+ | open ( FH, '>', | ||
+ | print FH " | ||
+ | close( FH ) ; | ||
+ | } # of SaveXml | ||
+ | |||
+ | # | ||
+ | # Function ShowHelp shows a short help screen on standard output. | ||
+ | # | ||
+ | sub ShowHelp() { | ||
+ | print " | ||
+ | print " | ||
+ | print "$0 [-dhv] [-l number] [-u number] [-b timestamp] [-s name[, | ||
+ | print " | ||
+ | print " | ||
+ | print " | ||
+ | print " | ||
+ | print " | ||
+ | print " | ||
+ | print " | ||
+ | print " | ||
+ | print " | ||
+ | } # of ShowHelp | ||
+ | |||
+ | # | ||
+ | # Function StripSpikesOnePass scans the XML version of the RRD, and removes any | ||
+ | # spikes it finds. The result is saved in the in-memory copy of the XML file. | ||
+ | # The result of this function is true if the XML file is modified, and false | ||
+ | # otherwise. This function is used whenever there is only one consolidation | ||
+ | # function in use, thus if an out-of-bound value is found, the only line to be | ||
+ | # cleared is the one being processed. | ||
+ | # | ||
+ | sub StripSpikesOnePass() { | ||
+ | my $result= 0 ; # Function result, default to no change | ||
+ | my $modif ; # Flag: line image is modified | ||
+ | my $modcnt= 0 ; # Number of rows modified | ||
+ | my $tsom ; # Time stamp of measurement | ||
+ | my $i ; # Index of data set in row | ||
+ | |||
+ | print " Removing spikes in one pass...\n" | ||
+ | |||
+ | foreach ( @xml ) { | ||
+ | $modif= | ||
+ | |||
+ | if ( m/< | ||
+ | if ( defined $LOWTIM ) { | ||
+ | $tsom= $1 if m/ | ||
+ | $modif= 1 if $tsom < $LOWTIM ; | ||
+ | } # of if | ||
+ | |||
+ | unless ( $modif ) { | ||
+ | $i= -1 ; # Preset data set index | ||
+ | foreach my $val ( m/ | ||
+ | # See if this value (data set) needs to be checked. | ||
+ | $i++ ; | ||
+ | next if $val eq ' | ||
+ | next unless exists $ds{$i} ; | ||
+ | |||
+ | if ( defined $LOWBND ) { | ||
+ | $modif= 1 if $val < $LOWBND ; | ||
+ | } # of if | ||
+ | if ( defined $UPPBND ) { | ||
+ | $modif= 1 if $val > $UPPBND ; | ||
+ | } # of if | ||
+ | last if $modif ; # Small optimization | ||
+ | } # of foreach | ||
+ | } # of unless | ||
+ | |||
+ | if ( $modif ) { | ||
+ | s/> | ||
+ | $modcnt++ ; # Update modification count | ||
+ | $result= 1 ; # Function result | ||
+ | if ( $DEBUG ) { | ||
+ | $tsom= $1 if m/< | ||
+ | print " | ||
+ | } # of if | ||
+ | } # of if | ||
+ | } # of if | ||
+ | } # of while | ||
+ | |||
+ | return $result ; | ||
+ | } # of StripSpikesOnePass | ||
+ | |||
+ | # | ||
+ | # Function StripSpikesTwoPasses scans the XML version of the RRD, and removes | ||
+ | # any spikes it finds. The result is saved in the in-memory copy of the XML | ||
+ | # file. The result of this function is true if the XML file is modified, and | ||
+ | # false otherwise. This function is called if the RRD uses two or more | ||
+ | # consolidation functions. In the first pass the list of measurements to strip | ||
+ | # is determined using the preferred CF only. In the next pass, these | ||
+ | # measurements, | ||
+ | # time stamp, are cleared in each CF. | ||
+ | # | ||
+ | sub StripSpikesTwoPasses() { | ||
+ | my $pdppr= undef ; # Primary data points per row in RRA | ||
+ | my $cf = undef ; # Consolidation function in RRA | ||
+ | my $inrra= 0 ; # Flag: current line inside RRA def | ||
+ | my $skrra= 0 ; # Flag: skip current RRA | ||
+ | my $indb = 0 ; # Flag: current line in database | ||
+ | |||
+ | my $result= 0 ; # Function result, default to no change | ||
+ | my $modif ; # Flag: line image is modified | ||
+ | my $modcnt= 0 ; # Number of rows modified | ||
+ | my $tsom ; # Time stamp of measurement | ||
+ | my $i ; # Index of data set in row | ||
+ | |||
+ | print " Removing spikes in two passes...\n" | ||
+ | |||
+ | # Prepare a hash to store the identification of the measurements to be | ||
+ | # stripped. Such a measurement is identified by two numbers, the number of | ||
+ | # primary data points per row and the time stamp of the measurement. | ||
+ | # | ||
+ | my %sl= map { $1 => {} if m/ | ||
+ | |||
+ | # | ||
+ | # Phase A: Scan the appropate RRA's for out-of bound values. If one is found, | ||
+ | # the pair (pdp_per_row, | ||
+ | # | ||
+ | foreach ( @xml ) { | ||
+ | $modif= 0 ; # Clear flag | ||
+ | unless ( $inrra ) { # If not in an RRA definition, | ||
+ | if ( m/ | ||
+ | $inrra= 1 ; # Set flag | ||
+ | $skrra= 0 ; # Reset skip flag | ||
+ | $indb = 0 ; # Reset in-database flag | ||
+ | next ; # Line done | ||
+ | } # of if | ||
+ | } # of unless | ||
+ | next unless $inrra ; | ||
+ | $inrra= 0, next if m/ | ||
+ | next if $skrra ; # Done with line if skip flag set | ||
+ | |||
+ | unless ( $indb ) { | ||
+ | if ( m/ | ||
+ | $cf= $1 ; # Save consolidation function name | ||
+ | $skrra= 1 ; # Assume this RRA does not need to examined | ||
+ | $skrra= 0 if defined $LOWBND | ||
+ | $skrra= 0 if defined $UPPBND | ||
+ | next ; # Line done | ||
+ | } # of if | ||
+ | $pdppr= $1, next if m/ | ||
+ | $indb = 1, next if m/ | ||
+ | } # of unless | ||
+ | next unless $indb ; | ||
+ | $indb= 0, next if m/ | ||
+ | |||
+ | if ( defined $LOWTIM ) { | ||
+ | $tsom= $1 if m/ | ||
+ | if ( $tsom < $LOWTIM ){ | ||
+ | $modif= 1 if m/ | ||
+ | } # of if | ||
+ | } # of if | ||
+ | |||
+ | unless ( $modif ) { | ||
+ | $i= -1 ; # Preset data set index | ||
+ | foreach my $val ( m/ | ||
+ | # See if this value (data set) needs to be checked. | ||
+ | $i++ ; | ||
+ | next if $val eq ' | ||
+ | next unless exists $ds{$i} ; | ||
+ | |||
+ | if ( defined $LOWBND ) { | ||
+ | $modif= 1 if $val < $LOWBND ; | ||
+ | } # of if | ||
+ | if ( defined $UPPBND ) { | ||
+ | $modif= 1 if $val > $UPPBND ; | ||
+ | } # of if | ||
+ | last if $modif ; # Small optimization | ||
+ | } # of foreach | ||
+ | } # of unless | ||
+ | |||
+ | if ( $modif ) { | ||
+ | $tsom= $1 if m/ | ||
+ | $sl{$pdppr}{$tsom}= 0 ; # Clear these measurements | ||
+ | if ( $DEBUG ) { | ||
+ | $tsom= $1 if m/< | ||
+ | print " | ||
+ | } # of if | ||
+ | } # of if | ||
+ | } # of foreach | ||
+ | |||
+ | # | ||
+ | # Phase B: Scan the XML file again, and if in a database the time stamp of a | ||
+ | # row is marked in hash %sl, the row is cleared. | ||
+ | # | ||
+ | # $result= 0 ; # Preset function result | ||
+ | foreach ( @xml ) { | ||
+ | $pdppr= $1, next if m/ | ||
+ | next unless m/< | ||
+ | $tsom= $1 if m/ | ||
+ | next unless exists $sl{$pdppr}{$tsom} ; | ||
+ | |||
+ | s/> | ||
+ | $result= 1 ; # Update function result | ||
+ | if ( $DEBUG ) { | ||
+ | $tsom= $1 if m/< | ||
+ | print " | ||
+ | } # of if | ||
+ | } # of foreach | ||
+ | |||
+ | return $result ; | ||
+ | } # of StripSpikesTwoPasses | ||
+ | |||
+ | |||
+ | # | ||
+ | # MAIN PROGRAM. | ||
+ | # ============= | ||
+ | # | ||
+ | unless ( CrackParameters ) { # Interpret the command line parameters | ||
+ | ShowHelp ; # Show help information | ||
+ | exit 0 ; # Stop this script | ||
+ | } # of unless | ||
+ | |||
+ | DumpRrd ; # Dump the RRD to a temporary XML file | ||
+ | AnalyseRrd ; # Determine consolidation functions in use | ||
+ | if ( defined $LOWTIM and not defined $LOWBND and not defined $UPPBND ) { | ||
+ | $RrdMod= 1 if StripSpikesOnePass ; # Strip spikes | ||
+ | } elsif ( keys %cf == 1 ) { | ||
+ | $RrdMod= 1 if StripSpikesOnePass ; # Strip spikes | ||
+ | } else { | ||
+ | $RrdMod= 1 if StripSpikesTwoPasses ; # Strip spikes | ||
+ | } # of else | ||
+ | if ( $RrdMod ) { | ||
+ | SaveXml ; # Write new XML file | ||
+ | InstallNewRrd ; # Create new version of RRD file | ||
+ | } # of if | ||
+ | |||
+ | END { | ||
+ | unlink $XMLOLD ; # Remove intermediate files | ||
+ | unlink $XMLNEW ; | ||
+ | # if $RRDFILOLD exists, but RRDFIL does not (or is shorter), remove $RRDFIL and | ||
+ | # rename $RRDFILOLD to $RRDFIL. | ||
+ | } | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | ===== Known Bugs and Issues ===== | ||
+ | |||
+ | There are no known bugs. | ||
+ | This script has been tested and used on Raspberry Pi (RPi) computers. Even on a RPi 0W with a slow SD card, an RRD with 3 data sets, 3 consolidation functions and in total 15 RRA's containing 576 points each, the removal of a spike takes less than 6 seconds. | ||
+ | |||
+ | ===== To Do ===== | ||
+ | |||
+ | On the to do list is a check on the newly created RRD: it should have the same size as the original RRD. | ||
+ | |||
+ | ===== Changelog ===== | ||
+ | |||
+ | * **2020-01-28** | ||
+ | * Initial release | ||