#!/usr/bin/perl
# -----------------------------------------------------------------------------
#  check_sm_status : Nagios plugin for Stratus Storage Manager health check
# -----------------------------------------------------------------------------
#  Created by: Andriy Fomenko
#  Authors: Alex Titov, Alex Tsibulnik, Andriy Fomenko
#  QA by:
#  Copyright: videoNEXT Federal, Inc., 2015
# -----------------------------------------------------------------------------

use strict;
use warnings;

use Nagios::Plugin;
use Nagios::Plugin::Getopt;

use SKM::DB;
use SKM::Common "ArrayTable";
use Master::Conf;
use Node::Conf ":all";

my $np = Nagios::Plugin->new( usage => "Usage: %s [ -v|--verbose ]" );

$np->getopts;

my $APL=$ENV{APL} || $np->nagios_exit( UNKNOWN, 'APL environment variable is not defined' );

$np->nagios_exit( OK, 'skipped on slave node' ) if ! am_I_master();

# Try connecting to db
my $dbh;
eval {
    $dbh = DBMaster({ PrintError=>0, RaiseError=>1, FetchHashKeyName=>'NAME_uc' });
};
$np->nagios_exit( CRITICAL, 'Database is DOWN' ) if $@;

my $ra = $dbh->selectall_arrayref(
        "select sw.id, sw.name, oa.val, so.ost
           from sm_wheels sw inner 
           join sm_ost so on sw.id=so.id
		  inner join _objs o on sw.nodeid=o.name
		  inner join _obj_attr oa on o.obj=oa.obj
		  where o.otype='D' and o.subtype='N' and o.deleted=0 and oa.attr='IP'", 
          {Slice=>[]}
		);

$np->nagios_exit( CRITICAL, "Failed to read volume information: $@") if $@;

# TODO: make optional parameter to run this nice report:
# ArrayTable(['Volume ID', 'Volume name', 'Node IP', 'Volume Status'], $ra) if $ra;

$ra=$dbh->selectall_hashref(
    "select w.id, w.nodeid, w.name, o.ost, c.cst 
       from sm_wheels w, sm_ost o,sm_cst c 
      where o.id=w.id and c.id=o.id 
      order by 2,1",
    'ID',
    {Slice=>[]}
);

my $warnings = '';
my $nlist = NodeList;

# Get volume-specific warnings: deviations from configured state
foreach my $id (keys %$ra) {
    my $uni=$ra->{$id}->{NODEID};
    if( $ra->{$id}->{CST} ne uc($ra->{$id}->{OST}) and $ra->{$id}->{OST} ne 'Full') {
         my $node=$nlist->{$uni}->{HOST};
         $warnings .= "Volume $ra->{$id}->{NAME}  on node $node has status $ra->{$id}->{OST} (expected $ra->{$id}->{CST})";
    }
    $nlist->{$uni}->{ONLINE_WHEELS}++ if $ra->{$id}->{OST}=~/(Online|Degraded)/; # Degraded also 'Online'
}

# Get nodes with no "workable" storage
# TODO: consider checking if node has any non-deleted devices configured, so we if this is a "dedicated master", we do not report it
foreach my $uni (keys %$nlist) {
    next if $nlist->{$uni}->{DEAD};       # skip dead nodes
    if(not exists $nlist->{$uni}->{ONLINE_WHEELS} or $nlist->{$uni}->{ONLINE_WHEELS}==0) {
        $warnings.="Node $nlist->{$uni}->{HOST} does not have any ONLINE storage volume\n";
    }
}

$np->nagios_exit( WARNING, "Storage Manager requires attention:\n$warnings" ) if $warnings;

$np->nagios_exit( OK, 'Storage Manager is running normally' );
