apt-get install nagios-nrpe-server nagios-plugins
wget http://www.lsi.com/downloads/Public/SATA/SATA%20Common%20Files/cli_linux_10.2.1_9.5.4.zip
unzip cli_linux_10.2.1_9.5.4.zip
mv x86_64/tw_cli /usr/local/sbin/
root@vz2010:~# cat /etc/nagios/nrpe_local.cfg
command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
command[check_raid]=/bin/cat /tmp/3ware
command[check_disk]=/usr/lib/nagios/plugins/check_disk -w 10M -c 20M -p /dev/sda1
command[check_vz]=/usr/lib/nagios/plugins/check_vz_ubc
root@vz2010:~# grep observer /etc/nagios/nrpe.cfg
allowed_hosts=observer.vmk.zhdk.ch
root@vz2010:~# crontab -l
* * * * * cat /proc/user_beancounters > /tmp/user_beancounters
* * * * * /usr/lib/nagios/plugins/check_3ware.pl -d c2 >/tmp/3ware
root@vz2010:~# cat /usr/lib/nagios/plugins/check_vz_ubc
#!/bin/bash
# Servicestate description can have a http-link to the openvz-wiki
# in case that a ressource is warning/critical. To use it:
# 1. set “escape_html_tags=0” in nagios/etc/cgi.cfg
# 2. set “my $linked=1;” in the first perl lines in this script
#
export FILE=/tmp/check_ubc
RET=0
ubc_file=’/tmp/user_beancounters’;
DATA=”;
if [ -r $ubc_file ]; then
DATA=`cat $ubc_file`
fi
if [ -z “$DATA” ]; then
echo “UNKNOWN – $ubc_file is not readable or empty. Maybe it is only readable for root and this script should be called by sudo.”;
exit 3;
fi
if [ -f $FILE ]; then
echo “$DATA” | perl -n -e’
use Data::Dumper;
my $linked=1; # 0:plain text output, 1:resourcename is a http-link to OpenVZ-wiki
my $file=$ENV{“FILE”};
my $ret=0 ;
my $vid ;
my $resource ;
my $held ;
my $maxheld ;
my $barrier ;
my $limit ;
my $failcnt ;
my %beancounters ;
my %beancounters_old ;
while(<STDIN>){
my %vmachine;
if ( /\D*(\d+):.*/ ){ $vid=$1; $beancounters{$vid}=\%vmachine ; }
if ( /^[\W\d]+([a-z]+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+).*/ ) {
$resource=$1 ;
$held=$2 ;
$maxheld=$3 ;
$barrier=$4 ;
$limit=$5 ;
$failcnt=$6 ;
${beancounters{$vid}}{$resource}=[$held , $maxheld , $barrier , $limit ,$failcnt ];
if ( ($held > $barrier) && ($barrier != 0) ) {
print “WARNING: Limits on $vid: “.&url($resource,$linked).” held->$held , barrier->$barrier ( limit->$limit ) ” ;
$ret=1;
}
#print “$vid:$resource $held Barrier:$barrier “;
}
}
# read and parse old data
open(MYINPUTFILE, “<$file”);
while(<MYINPUTFILE>){
my %vmachine;
if ( /\D*(\d+):.*/ ){ $vid=$1; $beancounters_old{$vid}=\%vmachine ; }
if ( /^[\W\d]+([a-z]+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+).*/ ) {
$resource=$1 ;
$held=$2 ;
$maxheld=$3 ;
$barrier=$4 ;
$limit=$5 ;
$failcnt=$6 ;
${beancounters_old{$vid}}{$resource}=[$held , $maxheld , $barrier , $limit ,$failcnt ];
}
}
foreach my $vmachine_id (keys %beancounters) {
foreach my $resource (keys %{$beancounters{$vmachine_id}} ) {
if ( defined($beancounters{$vmachine_id}{$resource}[4]) && defined($beancounters_old{$vmachine_id}{$resource}[4]) ){
my $failcnt=$beancounters{$vmachine_id}{$resource}[4];
my $failcnt_old=$beancounters_old{$vmachine_id}{$resource}[4];
my $held=$beancounters{$vmachine_id}{$resource}[0];
my $maxheld=$beancounters{$vmachine_id}{$resource}[1];
my $barrier=$beancounters{$vmachine_id}{$resource}[2];
my $limit=$beancounters{$vmachine_id}{$resource}[3];
if ( $failcnt_old < $failcnt ){
print “CRITICAL: Incrased failcnt $vmachine_id: “.url($resource,$linked).” from $failcnt_old to $failcnt (held->$held , maxheld->$maxheld , barrier->$barrier , limit->$limit ) ” ;
$ret=2;
}
#print “$vmachine_id: Old_Failcnt: $failcnt_old Failcnt: $failcnt \n”;
}
}
}
sub url {
my ($name,$with_link) = @_;
if ($with_link) {
return “<a target=\”_blank\” href=\”http://wiki.openvz.org/”.$name.”#”.$name.”\”>$name</a>”;
} else {
return $name;
}
}
if ($ret == 0 ) { print “OK. \n” ; }
# print Dumper(%beancounters_old) ;
# print “\n”;
exit($ret);
‘
RET=$?
fi
echo “$DATA” > $FILE
exit $RET
root@vz2010:~# cat /usr/lib/nagios/plugins/check_3ware.pl
#!/usr/bin/perl -w
#
# 2006 (C) Andrei Warkentin, Data Armor.
# Read the GNU Copyright stuff for all the legalese.
#
# Check 3ware array health. Uses the 3ware tw_cli utility.
# This is based of check_ntp.pl plugin, by Ian Cass.
use strict;
use Getopt::Long;
# Note that we don’t use the critical, timeout or warning flags for anything. Or the hostname.
use vars qw($state $state_message $PROGNAME $opt_controller $opt_version $opt_verbose $opt_help $opt_hostname $opt_warning $opt_critical $opt_timeout);
use lib “/usr/lib/nagios/plugins”;
use utils qw($TIMEOUT %ERRORS &print_revision &support);
# Execute command for running the 3ware CLI.
my $tw_cli = “/usr/local/sbin/tw_cli”;
# Name of this plugin.
$PROGNAME = “check_3ware”;
sub print_help();
sub print_usage();
# Clear the environment.
$ENV{‘PATH’} = ”;
$ENV{‘BASH_ENV’} = ”;
$ENV{‘ENV’} = ”;
Getopt::Long::Configure(‘bundling’);
GetOptions
(
“V” => \$opt_version, “version” => \$opt_version,
“h” => \$opt_help, “help” => \$opt_help,
“v” => \$opt_verbose, “verbose” => \$opt_verbose,
“w=s” => \$opt_warning, “warning=s” => \$opt_warning,
“c=s” => \$opt_critical, “critical=s” => \$opt_critical,
“t=s” => \$opt_timeout, “timeout=s” => \$opt_timeout,
“H=s” => \$opt_hostname, “hostname=s” => \$opt_hostname,
“d=s” => \$opt_controller, “controller=s” => \$opt_controller);
if($opt_version) {
print_revision($PROGNAME, ‘Version: 1.0’);
exit $ERRORS{‘OK’};
}
if($opt_help) {
print_help();
exit $ERRORS{‘OK’};
}
if(! $opt_controller) {
print “ERROR Missing controller name – did you pass anything to -d?\n”;
exit $ERRORS{“UNKNOWN”};
}
# Check for tw_cli presence.
if(! -x $tw_cli) {
print “ERROR No executable tw_cli at $tw_cli.\n”;
exit $ERRORS{“UNKNOWN”};
}
# First parse just general info.
if(!open(TWCLI, $tw_cli . ” info $opt_controller 2>&1 |”)) {
print “ERROR Could not open $tw_cli: $!\n”;
exit $ERRORS{“UNKNOWN”};
}
$state = ‘UNKNOWN’;
$state_message = ‘No understandable output from tw_cli’;
my $out;
# There is some reasoning behind the ordering of the checks.
while(<TWCLI>)
{
$out .= “$_ “;
if(/OK/)
{
$state = ‘OK’;
$state_message = “Arrays OK on Controller $opt_controller”;
}
if(/VERIFYING/)
{
$state = ‘OK’;
$state_message = “Arrays Verifying on Controller $opt_controller”;
}
if(/INITIALIZING/)
{
$state = ‘WARNING’;
$state_message = “Arrays Initializing on Controller $opt_controller”;
}
if(/REBUILDING/)
{
$state = ‘WARNING’;
$state_message = “Arrays Rebuilding on Controller $opt_controller”;
}
if(/MIGRATING/)
{
$state = ‘WARNING’;
$state_message = “Arrays Migrating on Controller $opt_controller”;
}
if(/DEGRADED/) {
$state = ‘CRITICAL’;
$state_message = “Arrays Degraded on Controller $opt_controller”;
}
if(/INOPERABLE/)
{
$state = ‘CRITICAL’;
$state_message = “Arrays Inoperable on Controller $opt_controller”;
}
}
close(TWCLI) ||
die $! ? “$out – Error close $tw_cli pipe: $!”
: “$out – Exit status: $? from $tw_cli\n”;
# Parse the alarm info.
if(!open(TWCLI, $tw_cli . ” alarms 2>&1 |”)) {
print “Could not open $tw_cli: $!\n”;
exit $ERRORS{“UNKNOWN”};
}
while(<TWCLI>)
{
$out .= “$_ “;
if(/ERROR/)
{
$state = ‘CRITICAL’;
$state = “Alarms on Controller $opt_controller”;
}
}
close(TWCLI) ||
die $! ? “$out – Error close $tw_cli pipe: $!”
: “$out – Exit status: $? from $tw_cli\n”;
print “$state_message.\n”;
if($opt_verbose)
{
print $out;
}
#exit $ERRORS{$state};
sub print_usage()
{
print “Usage: $PROGNAME [-d array controller] [-v verbose]\n”;
}
sub print_help()
{
print_revision($PROGNAME, “Version: 1.1”);
print “Copyright (C) 2006 Andrei Warkentin\n”;
print “\n”;
print_usage();
print “Checks the status of the 3ware controller (c0, c1, etc) passed with -d.\n\n”;
support();
}