#!/usr/bin/perl

#
# ------------------------------------------------------------------------
#  Patrols ServiceMix daemon
# ------------------------------------------------------------------------
#  Author: Alex Tsibulnik
#  Edited by:
#  QA by:
#  Copyright: videoNEXT LLC
# ------------------------------------------------------------------------

use strict;
use warnings;
use IO::Select;
use NextCAM::OMClient;
use SKM::DB;
use Log::Log4perl "get_logger";

my $APL = $ENV{APL} || '/opt/sarch';

require "$APL/common/bin/logger.patrol";
my $SMXCTL = "$APL/smix/bin/smxctl";
my $SLEEP = 60;
my $READ_TMT = 20;
my $MAX_SEQ_ERR = 6;
my $MAX_SEQ_RESTARTS = 20;
my $RESTART_WAIT = 60; # Wait 1 min for Servicemix to restart
my $CLEANSTART_WAIT = 120; 
my $log = get_logger('NEXTCAM::CAM::SMX_PATROL');
my $restart = 0;
my $seq_err = 0;
my $seq_restarts = 0;

# Main -------------------------------------------------------------------

main();

# Routines ---------------------------------------------------------------

sub read_status {
    my %status;
    my $pipe;
    my $pid = open($pipe, "-|");
    if ($pid) { # parent
        my $select = new IO::Select;
        $select->add($pipe);
        if ($select->can_read($READ_TMT)) {
            %status = map {/^(\w+)=(.*)$/} grep {/^\w+=/} <$pipe>;
        }
        else {
            $status{STATUS} = 'timeout';
            `sudo $SMXCTL kill_siblings`; # Kill hanging smxctl processes
        }
        eval {
            local $SIG{ALRM} = sub { die "timeout" };
            alarm 3;
            close($pipe);
        };
        alarm 0;
    }
    elsif (defined $pid) { # child
        exec("/usr/bin/sudo", $SMXCTL, "extstatus")
            || die "Cannot exec: $!";
    }
    else {
        $log->logdie("Cannot fork: $!");
    }

    return \%status;
}

sub restart {
    my $msg = shift;
    $restart = 0;
    $seq_err = 0;
    $seq_restarts++;
    my $clean = $seq_restarts > $MAX_SEQ_RESTARTS ? 1 : 0;
    $log->warn($msg) if defined $msg;
    $log->warn("Do restart (seq_restarts=$seq_restarts clean=$clean)");
    if ($clean) {
        $seq_restarts = 0;
        system("sudo $SMXCTL cleanstart");
	sleep $CLEANSTART_WAIT;
    }
    else {
        system("sudo $SMXCTL restart");
	sleep $RESTART_WAIT;
    }
}

sub check_smx {
    my %status = %{ &read_status };
    if ($status{STATUS} ne 'run') {
        $log->warn("ServiceMix instance works slow")  if $status{STATUS} eq 'timeout';
        $log->warn("ServiceMix instance not running") if $status{STATUS} eq 'stop';
        $log->warn("Duplicate ServiceMix instances found") if $status{STATUS} eq 'duplicate';
        restart;
    }
    elsif (not check_om()) {
        $log->warn("OM hangup detected");
        if ($seq_err >= $MAX_SEQ_ERR) {
            restart("$seq_err sequential errors: restarting");
        }
        else {
            $log->warn("Reload OM bundle");
            system("sudo $SMXCTL restart_om");
            $seq_err++;
        }
    }
    else {
        $seq_err = 0;
        $seq_restarts = 0;
    }
    
    $restart = 0 if $status{STATUS} eq 'run';
}

sub check_om {
    # check state of random camera
    my $dbm = DBMaster({RaiseError => 0, PrintError => 0});
    return if not $dbm;
    my $objid = $dbm->selectrow_arrayref("SELECT obj FROM _objs WHERE otype='D' AND subtype='C' AND deleted=0 LIMIT 1");
    if (not $objid) {
        $dbm->disconnect;
        return 1;
    }
    my $om = new OMClient;
    my $res = $om->getObjects($objid);
    $dbm->disconnect;
    
    return defined $res ? 1 : 0;
}

sub check_key {
	system("sudo $SMXCTL keyscan");
	if ($?) {
	    $log->info("smix ssh key is not in known_hosts. Scan and store key");
	    return 0;
	}
	return 1;
}

sub main {
    $log->info(" starting ..");
    # Wait for bundles to start
    sleep 30;
    # Start loop
    while (1) {
        if (check_key) {
            check_smx; 
            check_om;
        }
        else {
            $log->info("Cannot read smix ssh key. Waiting for smix to start...");
        }
	sleep $SLEEP;
    }
}
