I had an extremely embarrasing moment this morning when I discovered that our MySQL replication wasn’t working, and hadn’t been working all weekend. We had some mission critical services that failed miserably when we didn’t get the systems back up right.
I scribbled this together to hopefully warn me when it happens again.
[code]
use strict;
use Getopt::Long;
use vars qw($opt_V $opt_w $opt_c $opt_h $PROGNAME);
use lib "/usr/lib64/nagios/plugins" ;
use utils qw(%ERRORS &print_revision &support &usage);
$PROGNAME = "check_simplereplication.pl";
sub print_help ();
sub print_usage ();
$ENV{'PATH'}='';
$ENV{'BASH_ENV'}='';
$ENV{'ENV'}='';
Getopt::Long::Configure('bundling');
GetOptions
("V" => \$opt_V, "version" => \$opt_V,
"h" => \$opt_h, "help" => \$opt_h,
"w=s" => \$opt_w, "warning=s" => \$opt_w,
"c=s" => \$opt_c, "critical=s" => \$opt_c
);
if ($opt_V) {
print_revision($PROGNAME,'$Revision: 0.5 $');
exit $ERRORS{'OK'};
}
if ($opt_h) {print_help(); exit $ERRORS{'OK'};}
($opt_w) || usage("Warning threshold not specified\n");
my $warning = $1 if ($opt_w =~ /([0-9]{1,2}|100)+/);
($warning) || usage("Invalid warning threshold: $opt_w\n");
($opt_c) || usage("Critical threshold not specified\n");
my $critical = $1 if ($opt_c =~ /([0-9]{1,2}|100)/);
($critical) || usage("Invalid critical threshold: $opt_c\n");
my @rawinput = `echo "show slave status\\G" | /usr/bin/mysql -uUSER -pPASS `;
chomp( @rawinput );
my @vararray;
my $var, my $val=0;
for (@rawinput) {
( $var, $val) = split /:\s+/ ;
@vararray[$var]=$val;
}
my $lag = $vararray['Seconds_Behind_Master'];
my $missing = $vararray['Read_Master_Log_Pos'] - $vararray['Exec_Master_Log_Pos'];
my $msg = 'OK';
if ( $lag eq 'NULL' ) { $msg = "CRITICAL"; }
if ( $lag > $critical ) { $msg = "CRITICAL"; }
if ( $lag > $warning ) { $msg = "WARNING"; }
print "$msg| lag:$lag missed:$missing\n";
sub print_usage () {
print "Usage: $PROGNAME -w -c \n";
}
sub print_help () {
print_revision($PROGNAME,'$Revision: 0.5 $');
print "Copyright (c) 2005 Russell Gilman-Hunt
This plugin uses the 'show slave status' command in mysql to check replication status.
";
print_usage();
print "
-w, --warning=INTEGER
Number of seconds before a warning status will result
-c, --critical=INTEGER
Number of seconds before a critical status will result
";
support();
}
[/code]