#!/usr/bin/perl
#================================================================ 
#
# Master support file script
# Creates master support files. Manages collector scripts on nodes.
#
# Usage: 
#    master_sf_collect [-T]
#	-T option forces script to start in 'terminate' mode.
#	It interrupts all running collector processes on all nodes
#	and exits
#
use strict;
use warnings;

use POSIX '_exit';
use Data::Dumper;
use File::Basename;
use Log::Log4perl 'get_logger';
use NextCAM::Conf 'GetCfgs';
use SKM::Common ':all';
use NextCAM::Init 'GetAsrv';
use Master::Conf;
use Node::Conf;
use SKM::DB;

# Constants =====================================================
my $APL = $ENV{APL};
my $APL_VAR = $ENV{APL_VAR};
my $MASTER_SF_ROOT = "$APL_VAR/sf/master";
my $NODE_SF_ROOT = "$APL_VAR/sf/node";
my $S_MASTER = "$APL_VAR/conf/master/s_master";
my $TMP_ROOT = "$APL_VAR/sf/tmp";
my $MAX_TRIES = 5;
my $STATUS_CHECK_TIMEOUT = 5;
my $MINSPACE = 300; # Minimum space to start collecting
my $SPACE_PER_NODE = 50; # Minimum free space per node
my $SPACE_PER_DS = 10; # Minimum free space per Display Server
my $NODE_COLLECT_TIMEOUT = 1200;
my $DS_COLLECT_TIMEOUT = 1200;
my $PASSWORD = 'X3ins1de';
my $VCTL = "$APL/vpatch/bin/vctl";
my $SSH = "/usr/bin/ssh";
my $SCP = "/usr/bin/scp";
my $NODE_SF_CTL = "$APL/conf/bin/node_sf_ctl";
my $SF_KEEP_CNT = 3; # Maximum number of support files to keep in MASTER_SF_ROOT
my $TOC_DOMAIN_TMPL = <<TOC;
<table width="600" cols="3" cellspacing="2" cellpadding="4" border="1">
<tr style="font-size:12pt;font-weight:bold;">
<td>Node IP</td><td>Node UNI</td><td>Status</td></tr>
%s
</table>
TOC
my $TOC_DOMAIN_TR_TMPL = <<TR;
<tr style="font-size:12pt;font-weight:bold;">
<td>%s</td><td>%s</td><td>%s</td></tr>
TR

# Logging =======================================================
require "$APL/common/bin/logger.patrol";
my $Log = get_logger('NEXTCAM::SDI::REV');

# Vars ==========================================================
my %Asrv = GetAsrv;
my %Workers; # Pids and exit codes of child processes
my $StartTime = time;
my $UNI = UNI;
my $NodeList = NodeList;
my $MasterConf = NodeConf;
my $MasterSFDir;
my $MSFID = gen_sfid();
my $SF;
my $Error = 0;
my $Debug = $ENV{DEBUG}; # Print debugging info
my $MasterIP;

# Decrease process priority =====================================
my $PRIO_PROCESS = 0;
my $prio = getpriority($PRIO_PROCESS, $$);
setpriority($PRIO_PROCESS, $$, $prio + 10);

# Signals =======================================================
$SIG{TERM} = $SIG{INT} = sub {
    my ($signame) = (@_);
    _log('warn',"Got SIG${signame}. Stopping...");
    foreach my $kid (values %Workers) {
	kill(15 => $kid->{pid});
    }
    sf_status('TERM');
    exit 1;
};

# Proc ==========================================================
sub _log
{
    my ($level,$msg,$node) = @_;
    chomp $msg;
    if ($node) {
        my $label = $node->{IP};
        if ($node->{IP} and $MasterConf->{IP} and $MasterConf->{IP} eq $node->{IP}) {
            $label = 'master';
        }
        $msg = "[$label] $msg";
    }
    warn uc($level).": $msg\n";
    $Log->$level($msg);
}

sub gen_sfid
{
    my %vinfo;
    open(VCTL, "$VCTL info|") or die "Failed to run $VCTL: $!\n";
    %vinfo = %{{map {/(\w+)=(.*)/} grep {/^\w+=.*$/} <VCTL>}};
    close VCTL;
    
    my $sysid = $vinfo{sysid} || $UNI;
    my $short_verid = substr($vinfo{verid}, 0, index($vinfo{verid}, $vinfo{tag}));
    my ($tmin,$thour,$tday,$tmon,$tyear)=(gmtime($StartTime))[1..5];
    my $uid = "${sysid}_${short_verid}" . ($tyear+1900) . sprintf("%02s", $tmon+1) .
              sprintf("%02s", $tday) . '_' . sprintf("%02s", $thour) . sprintf("%02s", $tmin);
    return $uid;
}

sub disk_space_check
{
    my $free_space;
    my %mons = GetCfgs(DEVICETYPE => 'MONITOR');
    my $minspace = $SPACE_PER_NODE * scalar(keys %$NodeList) + $SPACE_PER_DS * scalar(keys %mons);
    $minspace = $MINSPACE if $minspace < $MINSPACE;
    open(DF, "df $APL/var/sf 2>/dev/null|") || _log('error', "Cannot do df");
    my @rows=<DF>;
    close DF;
    foreach(@rows) {
	if(/\s\d+\s+\d+\s+(\d+)\s+\d+%\s+(.+)/) {
	    my ($free_space, $dev) = (int($1/1024), $2);
	    if(int($1/1024) < $minspace) {
		die "Not enough free space to start collecting! ".
		    "${free_space}MB found on $dev but need at least $minspace";
	    }
        }
    }
}

sub sf_status
{
    my $status = shift;
    
    opendir(DH, $MASTER_SF_ROOT) or die "Cannot open dir: $!";
    my ($sf, $curstatus) = map {/^($MSFID\.*(\w*))$/} grep {/^$MSFID/} readdir DH;
    close DH;
    if ($status) {
	unless ($sf) {
	    open (FH, ">$MASTER_SF_ROOT/${MSFID}.${status}") or die "Cannot create support file $MSFID: $!";
	    close FH;
	}
        else {
    	    rename("$MASTER_SF_ROOT/$sf", "$MASTER_SF_ROOT/${MSFID}.${status}") or
		_log('error',"Cannot rename $sf to ${MSFID}.${status}: $!");
	}
    }
    return $curstatus;
}

sub request
{
    my $node   = shift;
    my $action = shift;
    
    my $sfid = $node->{sfid}; # Support file ID
    my $out;
    
    if ($action eq 'download') {
	$out = NodeCopy($node, "$NODE_SF_ROOT/${sfid}.nspt", "$MasterSFDir/$node->{UNI}.zip");
    }
    else {
	my $args = "action=$action";
	$args .= " sfid=$sfid" if defined $sfid;
	$out = NodeCmd($node, "$NODE_SF_CTL $args");
    }    
    chomp $out;
    
    if ($?) {
	unlink "$MasterSFDir/$node->{UNI}.zip" if $action eq 'download';
	_log('error',"$action request failed: $out",$node);
	return (0,undef,$out);
    }
    else {
	if ($action eq 'create') {
    	    $node->{sfid} = $out;
    	    if (open (SFID, ">>$MasterSFDir/sfids")) {
    	        print SFID "$node->{UNI}: $node->{sfid}\n";
    	        close SFID;
    	    }
    	}

    }
    
    return (1,$out);
}

sub wait_for_complete
{
    my $node = shift;
    
    _log('info',"Wait for 'node_sf_collect' to complete",$node);
    my $status;
    my $tStart = time;
    do {
	my ($ok,$resp,$msg) = request $node,'status';
	my $passed = time-$tStart;
	unless ($ok) {
	    _log('error',"Wait interrupted: collection failed",$node);
	    last;
	}
	$status = $resp;
	_log('info',"$passed sec passed. Waiting...",$node) if $status eq 'inprogress';
	
    } while ($status eq 'inprogress' and sleep $STATUS_CHECK_TIMEOUT);
    
    _log('info',"Got status '$status'",$node);
    return $status eq 'OK' ? 1 : 0;
}

sub process_one_node
{	
    my $node = shift;
	
    my $pid;
    my $tries = $MAX_TRIES;
    my $completed = 0;
	
    # Do fork
    $pid = fork;
    die "Cannot fork: $!" unless defined $pid;
    return $pid if $pid > 0;
	
    _log('info',"Child process $$ started",$node);
    
    #local $SIG{INT};
    #local $SIG{TERM};
    $SIG{INT} = $SIG{TERM} = sub { _exit(1) };

    eval {
	local $SIG{ALRM} = sub { die "TIMEOUT" };
	alarm $NODE_COLLECT_TIMEOUT;
	    
	while($tries-- and not $completed) {
	    _log('info',"Tries left: $tries",$node);
	    #request $node,"cancel" or next;
	    request $node,"create" or next;
	    wait_for_complete $node or next;
	    $completed = 1;
	}
	continue {
	    sleep 5 if not $completed and $tries;
	}
	    
	die "Cannot create support file" unless $completed;
	
	# download node SF when ready
	_log('info',"Downloading node SF file...",$node);
	my ($ok,$resp,$err) = request $node,"download";
	die "Failed to download node SF: $err" unless $ok;
	_log('info', "Downloaded successfully",$node);
	
	# Do cleanup on node
	_log('info',"Do cleanup",$node);
	($ok,$resp,$err) = request $node,'cleanup';
	_log('warn',"Unable to do cleanup: $err",$node) unless $ok;
    };
    if ($@) {
	_log('error',"Collection failed: $@",$node);
	_exit(1);
    }
    alarm 0;
    
    _exit(0);
}

sub summary
{
    my %summary = ();
    my $dbm;
    eval {
	$dbm = DBMaster({PrintError=>0,RaiseError=>1});
	$dbm->{FetchHashKeyName} = 'NAME_lc';
	my $arr = $dbm->selectall_arrayref(
	    "SELECT otype, subtype, COUNT(obj) FROM _objs
	    WHERE otype IN ('D','X') AND deleted=0
	    GROUP BY otype, subtype", {Slice=>[]}
	);
	foreach my $rec (@$arr) {
	    $summary{'Nodes'} += $rec->[2] if $rec->[0] eq 'D' and $rec->[1] eq 'N';
	    $summary{'Cameras'} += $rec->[2] if $rec->[0] eq 'D' and $rec->[1] eq 'C';
	    $summary{'Audio'} += $rec->[2] if $rec->[0] eq 'D' and $rec->[1] eq 'A';
	    $summary{'Sensors'} += $rec->[2] if $rec->[0] eq 'D' and $rec->[1] eq 'S';
	    $summary{'Relays'} += $rec->[2] if $rec->[0] eq 'D' and $rec->[1] eq 'R';
	    $summary{'vMX Monitors'} += $rec->[2] if $rec->[0] eq 'D' and $rec->[1] eq 'V';
	    $summary{'vMX Walls'} += $rec->[2] if $rec->[0] eq 'D' and $rec->[1] eq 'W';
	    $summary{'External objects'} += $rec->[2] if $rec->[0] eq 'X';
	}
	$arr = $dbm->selectall_arrayref("SELECT max(eventid) from event");
	$summary{Events} = $arr->[0][0] || 0;
    };
    if ($@) {
	_log('error', "DB summary failed: $@");
    }
    eval { $dbm->disconnect } if $dbm;
    return \%summary;
}

sub gen_toc
{
    my $header ="<html><title>Master Support File</title><body>";
    my $footer = "</body></html>";
    my $toc = "$header\n";

    # Summary
    my $hSum = summary;
    my $summary = "<div style='font-size: 12pt'>\n";
    $summary .= "<b>Date:</b> ".`date`."<br>\n";
    foreach (sort keys %$hSum) {
	$summary .= "<b>$_:</b> $hSum->{$_}<br>\n";
    }
    $summary .= "</div>\n";
    $summary = "<H1>System Summary</H1>\n${summary}";
    
    
    # Domain
    my $domain = "";
    # Read node SFIDs from file
    my %sfids = ();
    if (open(SFIDS, "$MasterSFDir/sfids")) {
	%sfids = map {/^(\w{22}): (.+)$/} grep {/^\w{22}: .+$/} <SFIDS>;
	close SFIDS;
    }
    foreach my $node (values %$NodeList) {
	my $worker = $Workers{$node->{UNI}};
	my $status = $worker->{code}  ? 
	    '<font color="red">FAIL</font>' : 
	    '<font color="green">OK</font>';
	my ($href,$node_toc) = ($node->{IP},$sfids{$node->{UNI}}?"$sfids{$node->{UNI}}/toc.html":"$node->{UNI}.zip");
	
	my $s_master = $node->{UNI} eq UNI() ?' (master)':'';
	# Add record to TOC
	if ($node_toc) {
	    $href = "<a href='$node_toc'>$node->{IP}$s_master</a>";
	}
	$domain .= sprintf($TOC_DOMAIN_TR_TMPL,$href,$node->{UNI},$status);
    }
    $domain = "<H1>Domain</H1>".sprintf($TOC_DOMAIN_TMPL, $domain);
    
    # DS
    my $ds = "";
    if (-d "$MasterSFDir/ds") {
	$ds .= "<H1>Other components</H1>\n";
	$ds .= "<div style='font-size: 12pt'>\n";
	$ds .= "<a href='ds'>Display Server Logs</b></a><br>\n";
	$ds .= "</div>";
    }
    # Backup
    my $bck = "";
    if (-d "$MasterSFDir/backup") {
	$bck .= "<H1>Other components</H1>\n" unless -d "$MasterSFDir/ds";
	$bck .= "<div style='font-size: 12pt'>\n";
	$bck .= "<a href='backup'>System Backup</b></a><br>\n";
	$bck .= "</div>";
    }
    $toc .= $summary.$domain.$ds.$bck.$footer;
    return $toc;
}

sub combine
{
    my $toc = gen_toc;
    open (TOC, ">$MasterSFDir/toc.html");
    print TOC $toc;
    close TOC;
    
    my $bn = basename($MasterSFDir);
    my $cmd = 
	"cd $TMP_ROOT/master && /usr/bin/zip -ry ".
	"-P $PASSWORD $MSFID.tmp $bn 1>/dev/null";
    my $status = system($cmd);
    die "Failed to run zip" if $status == -1;
    if($status >> 8 != 0) {
        _log('error',"Bad status returned from /usr/bin/zip");
    }
    my $sf_status = sf_status;
    rename("$TMP_ROOT/master/$MSFID.tmp", "$MASTER_SF_ROOT/$MSFID.$sf_status")
	or _log('logdie',"Unable to rename resulting SF: $!");
    sf_status 'mspt';
    system("/bin/chgrp apache $MASTER_SF_ROOT/$MSFID.mspt &>/dev/null");
    #rename("$TMP_ROOT/master/$MSFID.tmp", "") or die "Failed to move tmp zip file to $SFDir: $!";
}

sub collect_ds
{
    _log('info',"DS logs collecting starts");
    my $ds_dir = "$MasterSFDir/ds";
    mkdir $ds_dir;
    system("$APL/conf/bin/sf_collect_ds $ds_dir $DS_COLLECT_TIMEOUT >$ds_dir/sf_collect_ds.log 2>&1");
    my $exit_code = $? >> 8;
    if ($exit_code) {
	_log('error',"sf_collect_ds finished with errors: exit code $exit_code");
    }
    else {
	_log('info',"DS logs collecting finished successfully");
    }
}

sub collect_acsg 
{
    _log('info',"ACSG logs collecting starts");
    my $acsg_dir = "$MasterSFDir/acsg";
    mkdir $acsg_dir;
    system("$APL/conf/bin/sf_collect_acsg $acsg_dir 1200 >$acsg_dir/sf_collect_acsg.log 2>&1");
    my $exit_code = $? >> 8;
        if ($exit_code) {
	_log('error',"sf_collect_acsg finished with errors: exit code $exit_code");
    }
    else {
	_log('info',"ACSG logs collecting finished successfully");
    }
}

sub collect_backup
{
    _log('info',"Master backup creating starts");
    my $bck_dir = "$MasterSFDir/backup";
    mkdir $bck_dir;
    my $bck_log = `$APL/conf/bin/master_backup 2>&1`;
    my $bck_res = $1 if $bck_log =~ /FINAL: (.+)$/m;
    my $bck_path = $1 if $bck_log =~ /Backup area: (.+)$/m;
    if ($bck_path && -d $bck_path) {
	`cp -rf $bck_path $MasterSFDir/backup 2>&1`;
	_log('warn',"Error copying backup to support directory") if $?;
    }
    else {
	_log('warn',"Backup was not found on server!");
    }
    
    _log('info',"Master backup final result: $bck_res");
}

sub collect
{
    $Log->info("Master Support File collecting started");
    warn("Concurrent run detected\n"),_exit(1) if CheckPid;
    WritePid;
	
    my $kidpid;
    my $still_wait;

    # Prepare temporary location for backup	
    mkdir $MASTER_SF_ROOT unless -d $MASTER_SF_ROOT;
    unless (-d $TMP_ROOT) {
	mkdir $TMP_ROOT;
	mkdir "$TMP_ROOT/master";
	mkdir "$TMP_ROOT/node";
    }
    if (opendir DH, $MASTER_SF_ROOT) {
	my @files = grep {/$MSFID/} readdir DH;
	if (@files) {
	    RemovePid;
	    warn("Support file with the same ID already exists!\n");
	    _exit(2);
	}
    }
    $MasterSFDir = "$TMP_ROOT/master/$MSFID";
    mkdir $MasterSFDir;
    
    # Check disk space availability
    eval {
	disk_space_check;
    };
    if ($@) {
	_log('error',$@);
	sf_status 'NOSPACE';
	exit 1;
    }
    
    # Create empty master support file
    sf_status 'INPR';    
	
    foreach my $uni (keys %$NodeList) {
	my $node = $NodeList->{$uni};
	my $kidpid = process_one_node($node);
	next unless $kidpid;
	@{$Workers{$uni}}{'code','pid'} = (undef,$kidpid);
	
    }
	
    # Now wait for child processes to exit
    _log('info',"Waiting for childs to exit");
    while($kidpid = wait) {
	$still_wait = 0;
	my ($worker) = grep {$_->{pid}==$kidpid} values %Workers;
	$worker->{code} = $?>>8;
	foreach my $rec (values %Workers) {
		$still_wait = 1 unless defined $rec->{code};
	}
	last unless $still_wait;
    }
    
    # Collect Display Server logs
    collect_ds if $Asrv{INCLUDE_DSLOGS_INTO_SF} eq 'yes';
    
    # Collect ACSG logs
    collect_acsg;
    
    # Collect Master backup
    collect_backup;
    
    # Combine downloaded support files into single one
    combine;
	
    # Summarize
    foreach my $worker (values %Workers) {
	_log('info',"Child process $worker->{pid} exited with code $worker->{code}");
    }
    
    _log('info',"Complete");
    exit 0;
}

sub cancel_nodes 
{
    _log("info","Cancel collecting for all nodes!");
    foreach my $uni (keys %$NodeList) {
	my $node = $NodeList->{$uni};
	next if $node->{DEAD};
	
        next if fork > 0; # Do 'cancel' asynchronously
        $SIG{INT} = $SIG{TERM} = sub { _exit(1) };
	my ($ok,$resp,$msg) = request($node,"cancel");
	unless ($ok) {
	    _log('error',"Unable to cancel collecting: $msg",$node);
	}
	_exit(0);
    }
    sleep 1;
}

sub cleanup_sf_root
{
    # Leave 3 latest successful SF, and maximum 10 files total
    _log("info", "Cleanup SF directory");
    unless (opendir DH, $MASTER_SF_ROOT) {
	_log("error", "Cannot open SF root dir: $!");
	return;
    }
    my %sf = map { ($_, [/(\d{8}_\d{4})\.(\w+)$/]) } 
	grep {/\d{8}_\d{4}\.\w+$/} readdir DH;
    closedir DH;
    return if keys %sf <= $SF_KEEP_CNT;
    my ($full, $total) = (0, 0);
    foreach my $sfid (sort {$sf{$b}[0] cmp $sf{$a}[0]} keys %sf) {
	my $status = $sf{$sfid}[1];
	unlink("$MASTER_SF_ROOT/$sfid"),next if $status eq 'mspt' and $full >= $SF_KEEP_CNT;
	unlink("$MASTER_SF_ROOT/$sfid"),next if $status ne 'mspt' and $total >= 12;
	$total++;
	$full++ if $sf{$sfid}[1] eq 'mspt';
    }
}

# Main ======================================================
if (@ARGV && $ARGV[0] eq '-T') {

    cancel_nodes; # Cancel collecting of node SF
    
    my $pid = ReadPid;
    my $term_sent = 0;
    if ($pid and kill(0,$pid)) {
	my $ps = `ps -p $pid -o command=`;
	chomp $ps;
	if ($ps =~ /master_sf_collect/) {
	    $term_sent = kill(15, $pid);
	}
    }
    unless ($term_sent) {
        `ps -au apl | grep master_sf_collect | grep -v grep | grep -v $$ | xargs kill -TERM 2>/dev/null`;
        sleep 2;
        if(opendir(DH,$MASTER_SF_ROOT)) {
    	    foreach (readdir DH) {
    		rename("$MASTER_SF_ROOT/$1.INPR","$MASTER_SF_ROOT/$1.TERM") if /(.+)\.INPR$/;
    	    }
        }
    }
    _exit(0);
}
collect;

END {
    my $exit_code = $?;
    my $sf_status = sf_status;
    if ($exit_code != 0 && $sf_status !~ /^(FAIL|NOSPACE|TERM)$/) {
	sf_status 'FAIL';
    }
    cleanup_sf_root;
    RemovePid;
    `rm -rf $MasterSFDir` if $MasterSFDir;
    exit $exit_code;
}
