#!/usr/bin/perl
#  $Id:$
# -----------------------------------------------------------------------------
#  Restore master configuration
#  Usage: master_restore [-l] [-t] BACKUP_NAME
#         master_restore -a
#	  master_restore -m BACKUP_PATH
# -----------------------------------------------------------------------------
#  Author: Andrey Fomenko
#  Modified by: Alexey Tsibulnik
#  QA by:
#  Copyright: videoNEXT Network solutions, Inc.
# -----------------------------------------------------------------------------

use strict;
use warnings;
use Socket qw(inet_ntoa);
use File::Basename;
use XML::Simple;
use Getopt::Std;
use SKM::DB qw(DBLocal DBMasterConf);
use LWP::Simple qw($ua get getstore);
use HTTP::Request::Common;
use Log::Log4perl "get_logger";
use Master::Conf;
use Node::Conf qw(UNI DB_Version);
use NextCAM::Conf "GetDefaultCfg";
use SKM::Common "ReadPid";

# -------------------- standard environment -----------------------------------
my $APL = $ENV{APL};
open (ECF,"$APL/base/etc/env.conf") || die("Cannot read base/etc/env.conf");
map {$ENV{$1}=$2 if /^(\w+)=(\S+)/} grep {/^\w+=\S+/} <ECF>;
close ECF;
#------------------------------------------------------------------------------

my $procname = (split(/\//,$0))[-1];
my $USAGE = <<END;
USAGE: $procname [-l] [-t] BACKUP_NAME
       $procname -a
END
my $APL_USR = $ENV{APL_USR} || 'apl';
my $APL_VAR = $ENV{APL_VAR};
my $DBEXEC = "$APL/db/bin/db_exec skm_master";
my $FIND_BACKUP = "$APL/conf/bin/find_backup";
my $DB_BACKUP = "$APL/db/bin/db_backup";
my $PG_RESTORE = $^O =~ /darwin/i ? "$APL/imp/bin/pg_restore" : "/usr/bin/pg_restore";
my $APL_HTTPD_USR = $ENV{APL_HTTPD_USR} || 'apache';
my $APL_HTTPD_GRP = $ENV{APL_HTTPD_GRP} || 'apache';
my $LICENSE_KEY = "$APL/var/license/license.key";
my $EMAILER_SIGHUP = "$APL/base/bin/emailer_sighup";
my $ACC_VAR="$APL/var/advantor_asn";
my $ACC_ACT="$ACC_VAR/ACTIVATED";
my $NODE_CTL = "sudo $APL/conf/bin/node_proc_ctl";
my $dbl;                   				# DB handler
my $reqTimeout = 90;	# timeout to wait responce from nodes/services/etc..
my $nlist = NodeList;
my $stat; # Parsed data from stat.xml
my $bckRoot = "$APL/var/backup/master";
my $DoMigrate = 0;	# Backup recovery is used for migration from previous version of SKM
my $UNIChanged = 0;     # Special logic if Master UNI was changed
my $bckDir;	# Backup directory name (e.g. 091030_164423)
my $LOGFILE;
my $bckPath;	# Full path to a backup directory, usually "$bckRoot/$bckDir"
my $DomainConfPath = "$APL/var/conf/master/nodes";
my $TmpPath = "$APL_VAR/tmp/backup";
my $SQLPATH = "$APL/db/sql";
my $SessionDir = $^O =~ /Darwin/i ? '/tmp' : '/var/lib/php/session';
my $nErrors = 0;				# total number of non-fatal errors
my $nWarnings = 0;
my $pid_name = "$APL/var/backup/master_restore.pid";
my $backup_pid_name = "$APL/var/backup/master_backup.pid";
my $DEBUG = 0;	# Output log messages to STDERR
my $IDENTITY_OBJID = 53;  # ID of Identity GUI object
my ($VER,$MAJOR,$MINOR) = `$APL/vpatch/bin/vctl ver`=~/^((\d+)\.(\d+))\./;
my %DB = %{ DBMasterConf() };
my %DBVerBck;	# Version of Backup DB
my %DBVerBld;	# Version of Local build (max version of DB SQL scripts)
my $NeedUpdate = 0;	# Backup version differs from software one. Must run update scripts
my $OutdatedDB = 0; 	# Software DB version is older than in backup

my %Opts = ();
getopts('amlt', \%Opts);

require "$APL/common/bin/logger.patrol";
my $Logger = get_logger('NEXTCAM::CONF::MASTERRESTORE');

$SIG{INT}  = sub { die "Interrupted!\n" };
$SIG{TERM} = sub { die "Terminated!\n"  };
$SIG{HUP}  = 'IGNORE';

###############################################################################

sub db_master { # -------------------------------------------------------------
    unless($dbl=DBLocal({PrintError=>0,RaiseError=>1}))
    {
	die "Can not connect to S_MASTER database!\n";
    }
    $dbl->{FetchHashKeyName} = 'NAME_uc';
    $dbl->{ShowErrorStatement} = 1;
    $dbl->{AutoCommit} = 0;
} # sub db_master

sub read_db_ver {
    my $path = shift || $bckPath;

    ($NeedUpdate, $OutdatedDB) = (0, 0);
    %DBVerBck = ();
    %DBVerBld = %{ &DB_Version };
    my ($has_vc, $has_vt, $has_data) = (0, 0);

    opendir DB, "$path/db";
    my @dbck = readdir(DB);
    foreach my $f (@dbck) {
	$has_vc   = 1 if $f eq 'version_confdb';
	$has_vt   = 1 if $f eq 'version_transdb';
	$has_data = 1 if $f eq 'confdb.db';
    }
    close DB;
    return undef if !$has_vt || !$has_vc || !$has_data;

    # Check if backup db versions match current ones
    for my $db ('confdb', 'transdb') {
	open(FH, "$path/db/version_$db") || die "Error opening version_$db: $!\n";
	my %ver = map {/^(\w+)=(.+)$/} <FH>;
	close FH;
	$DBVerBck{$db}{ver}  = $ver{VERSION};
	$DBVerBck{$db}{iter} = $ver{ITERATION};
    }

    for my $db ('confdb', 'transdb') {
    	# TP6481
    	# Direct comparison of version strings is wrong because it falls when minor version is greater than 9
    	# e.g. 3.7.9 is "bigger" than 3.7.10 when using string "cmp" operator
    	# So, we can use "normalized" version strings e.g. "003.007.010" instead of "3.7.10"
        my $vercmp = join(".", map {sprintf("%03d",$_)} split(/\./, $DBVerBld{$db}{ver})) cmp 
        		join(".", map {sprintf("%03d",$_)} split(/\./, $DBVerBck{$db}{ver}));
	my $itercmp = $DBVerBld{$db}{iter} <=> $DBVerBck{$db}{iter};

	if    ($vercmp > 0)  { $NeedUpdate = 1; last; }
	elsif ($vercmp < 0)  { $OutdatedDB = 1; last; }
	if    ($itercmp > 0) { $NeedUpdate = 1; last; }
	elsif ($itercmp < 0) { $OutdatedDB = 1; last; }
    }
    Log("System database version is higher than backup version") if $NeedUpdate;
}

sub prepare { # ------------------------------------------------------------------
    if($Opts{a}) {	# Recover last successful backup
	$LOGFILE = "$TmpPath/$procname-AutoRecovery.log";
	$DEBUG = 1;
	eval { db_master } unless $dbl;
	die "Database connection failed: $@" if $@;
	opendir BCKROOT, $bckRoot or die "Cannot open dir $bckRoot: $!\n";
	my @mbcks = sort {$b cmp $a} grep {/^\d{6}_\d{6}$/} readdir BCKROOT;
	close BCKROOT;
	for my $bck (@mbcks) {
	    my $bckstat = eval {XMLin("$bckRoot/$bck/stat.xml")};
	    next if $@;
	    next if $bckstat->{UNI} ne UNI;

	    read_db_ver("$bckRoot/$bck");
	    next if not %DBVerBck;

	    next if $OutdatedDB; # Only backups with version <= current can be restored

	    $bckDir = $bck, last if $bckstat->{STATUS} eq 'OK';
	}
	die "No valid backups found!\n" unless $bckDir;
	Log("Found valid backup: $bckDir");
	$bckPath = "$bckRoot/$bckDir";
    }
    elsif ($Opts{m}) {
	$bckDir = $ARGV[0];
	$LOGFILE = "$TmpPath/$procname-$bckDir-Migrate.log";
	$DEBUG = 1;
	$bckPath = "$bckRoot/$bckDir";
	# If backup not in BACKUP_ROOT, copy it using find_backup
	unless (-d $bckPath) {
	    `$FIND_BACKUP -v $bckDir 2>/dev/null`;
	    die "Cannot find backup: $bckDir\n" unless -d $bckPath;
	}
	$DoMigrate = 1;
    }
    else {
	$bckDir = $ARGV[0];
	$bckPath = "$bckRoot/$bckDir";
	$LOGFILE = "$TmpPath/$procname-$bckDir.log";
    }
    die "ERROR: Backup directory does not exist!\n" unless -d $bckPath;
    $DEBUG = 1 if $ENV{DEBUG};
    $ua->timeout(3);
    $ua->agent('skm agent/2.5');
    # Prepare log file
    open(LOGGER, ">$LOGFILE");
    close LOGGER;
    system("chmod 640 $LOGFILE && chown $APL_USR:$APL_HTTPD_GRP $LOGFILE");
    Log("Master ".($DoMigrate?"migration":"recovery")." process started at ".get_ts(1));
    Log("Backup name: $bckDir");
} # sub prepare

# test backup integrity
sub test { # ----------------------------------------------------------------------
    die "Invalid backup directory naming!\n" unless $bckDir =~ /^\d{6}_\d{6}$/;
    die "Version is missing!\n" if not -f "$bckPath/verinfo";
    die "Backup statistics is missing!\n" unless -f "$bckPath/stat.xml";
    # For migratory backup check if special file flagfile exists
    Log("License recovery requested") if $Opts{l};
    # check whether backup was created for the current system
    $stat = eval { XMLin("$bckPath/stat.xml", ForceArray => 1, KeyAttr => ['ID','NAME']) };
    die "Failed to parse backup information\n" if $@;
    if($stat->{UNI} ne UNI) {
	Log("WARNING: Current Master UNI differs from value stored in backup");
	$UNIChanged = 1;
    }
    # check master database
    die "Database files are missing!\n" unless -d "$bckPath/db";

    read_db_ver;
    die "Incomplete database backup!\n" unless %DBVerBck;

    # Check if backup db versions are not greater then current
    die "DB version in backup is higher than software version\n" if $OutdatedDB;

    # Check origin
    if (open FH, "$bckPath/origin") {
	my %origin = map {/^(\w+)=(.+)$/} grep {/^\w+=/} <FH>;
	if ($origin{origin} ne 'native' and $origin{protected} ne '1') {
	    Log("WARNING: Foreign backup is not password protected\n");
	}
	close FH;
    }

} # sub test

# Wait for backup process to complete if any
sub cond_wait { # -------------------------------------------------------------
    my $wait = 30;
    my $sleep = 5;
    while (-f $backup_pid_name && $wait > 0) {
	open (BPID, $backup_pid_name) or last;
	my $pid = <BPID>;
	close BPID;
	chomp $pid;
	kill(0, $pid) or last;
	$wait -= $sleep;
	Log("Waiting $sleep sec for backup process to complete...");
	sleep $sleep;
    }
} # sub cond_wait

sub Log { # -------------------------------------------------------------------
    my $msg = shift;
    chomp $msg;
    my $str = '['.get_ts().'] '.$msg;
    print STDERR ">>>> $str\n" if $DEBUG;
    eval { # Due to rsyslog, exception can be thrown here
    	$Logger->info($str);
    };
    if ($@) {
    	sleep 5;
    	eval { $Logger->info($str) }; # retry
    }
    open(LOGGER, ">>$LOGFILE") or return;
    print LOGGER ">>>> $str\n";
    close LOGGER;
    $nWarnings++ if $msg =~ /^WARNING: /i;
    $nErrors++ if $msg =~ /^ERROR: /i;
} # sub Log

sub get_ts {# --------------------------------------------------------
    my $full = shift;
    my ($sec,$min,$hour,$mday,$mon,$year)=(gmtime(time))[0..5];
    return $full ? sprintf("%02s-%02s-%02s %02s:%02s:%02s",$year+1900,$mon+1,$mday,$hour,$min,$sec)
		 : sprintf("%02s:%02s:%02s",$hour,$min,$sec);
} # sub get_ts

sub pid_check { # -------------------------------------------------------------
    my $old_pid = pid_read();
    if($old_pid > 1) {
	open PS, "ps -p $old_pid -o comm=|";
	my $proc = <PS>;
	close PS;
	chomp $proc if defined $proc;
	if (defined $proc && $proc eq $procname) {
	    Log("Concurrent run of master_backup detected. Exiting");
    	    exit 1;
	}
    }
    pid_write() or exit 3;
} # sub pid_check

sub pid_read { # --------------------------------------------------------------
   my $pid = 0;
   if(open PID,"$pid_name") {
     ($pid) = <PID>=~/^(\d+)/;
     close PID;
     if (defined $pid) {
        chomp $pid;
     } else {
        $pid = 0;
     }
   }
   return $pid;
} # sub pid_read

sub pid_write { # -------------------------------------------------------------
   my $ok = 1;
   if(open PID,">$pid_name") {
     print PID "$$\n";
     close PID or do {
        Log("Handle close failed. Cannot write PID file: $!");
        unlink $pid_name;
        $ok = 0;
     };
   } else {
      Log("Cannot create a PID file $pid_name");
      $ok = 0;
   }
   return $ok;
} # sub pid_write

sub pid_remove { # ------------------------------------------------------------
    unlink $pid_name if pid_read()==$$;
} # sub pid_remove

sub clear_sessions {	# Remove PHP sessions
    system("sudo -u $APL_HTTPD_USR $APL/api/bin/php_session_clean.php --clean &>/dev/null");
    Log("WARNING: Clear PHP session failed") if $?;
}

sub node_service_ctl {
    my $cmd = shift;		# start|stop
    my $desired_state = $cmd eq 'stop' ? 'stop' : 'starting';
    my $wait_nodes = keys %$nlist;
    Log("'$cmd' command will be issued to the nodes");

    # Wait and periodically check until either command[s] processed or timeout expires
    # NOTE: wait for 'stop' command only!
    my $start_time = time;
    while($wait_nodes != 0 && time-$start_time < $reqTimeout) {
	foreach my $node (values %$nlist) {
	    if(($node->{state} = query_app_state($node)) eq $desired_state) {
		$wait_nodes--;
	    }
	    elsif(not $node->{issued} or $cmd eq 'start') {
		# Repeat 'start' cmd if status wasn't changed to 'staring'
		$node->{issued} = issue_node_cmd($node, $cmd);
	    }
	}
	#last if $cmd ne 'stop';
	sleep 2 if $wait_nodes;
    }
    if($wait_nodes) {
	foreach my $node (values %$nlist) {
	    Log("WARNING: Failed to execute '$cmd' command against node $node->{UNI}")
		unless $node->{state} eq $desired_state;
	}
	return 0;
    }
    else {
	Log("Successfully executed '$cmd' command against nodes");
	return 1;
    }
}

# Get current application state from node. Returns state or undef if fails
sub query_app_state { # -----------------------------------------------------------
    my $node = shift;
    my $state = NodeCmd($node, "cat $APL_VAR/wd/status");
    unless(defined $state) {
	Log("Unable to get application status for node $node->{FQDN}");
	$state = 'unknown';
    }
    else { chomp $state; };
    return $state;
} # sub query_app_state

# Stop/start software on the node
sub issue_node_cmd { # ----------------------------------------------------------
    my ($node, $cmd) = @_;
    my $out = NodeCmd($node, "$NODE_CTL $cmd");
    if ($?) {
	Log("Unable to execute '$cmd' command against node $node->{IP}");
	return 0;
    }
    return 1;
}

sub read_master_ip {
    open(FH, "$APL_VAR/conf/master/s_master") or return;
    my $ip = <FH>;
    close FH;
    chomp $ip;
    return $ip;
}

sub restore_acc_cfg { # -------------------------------------------------------
    my $acc_cfg = "$bckPath/acc/asn.5.cfg";
    return unless -f $acc_cfg;
    system("cp -f $bckPath/acc/asn.5.cfg $ACC_VAR");
    if ($?) {
	Log("WARNING: Unable to restore ASN configuration file");
    }
    else {
	Log("ASN configuration restored successfully");
    }
} # sub restore_acc_cfg

sub restore_master_db { # ---------------------------------------------------
    my $nRows;
    Log("Starting master database recovery");

    # Create fallback DB dump before recovery
    my $bck_fallback = "$TmpPath/master_restore-$bckDir-confdb.tmp";
    my $bck_fallback_ok = system(
	"sudo $APL/db/bin/db_backup confdb $bck_fallback &>/dev/null"
    ) == 0;
    if ($bck_fallback_ok) {
	Log("Fallback backup created");
    }
    else {
	Log("WARNING: Error creating fallback backup");
    }

    eval {
	my $serialno;
	my $license_key;

	# Step 1: Backup SERIAL_NO and LICENSE_KEY if not in migration mode
	unless ($DoMigrate) {
    	    my $res = $dbl->selectrow_arrayref(
    		"select val from _obj_attr where obj=$IDENTITY_OBJID and attr='SERIAL_NO'"
    	    );
    	    $serialno = $res->[0];
	    $res = $dbl->selectrow_arrayref(
    		"select val from _obj_attr where obj=$IDENTITY_OBJID and attr='LICENSE_KEY'"
    	    );
    	    $license_key = $res->[0];
        }

        eval { $dbl->disconnect } if $dbl;

        # Step 2: Restore 'confdb' and 'apl' schema
    	my $cmd = "sudo $APL/db/bin/db_restore CONFDB=$bckPath/db/confdb.db APLDB=$bckPath/db/apl.db";
    	if (-f "$bckPath/db/transdb.db") {
    		$cmd .= " TRANSDB=$bckPath/db/transdb.db" if $Opts{t} or $DoMigrate;
    	}
    	elsif ($DoMigrate) { # When migrating from 3.7 either full transdb backup or schem-only should be present
    	        die "Neither transdb schema nor schema+data present in backup!" unless -f "$bckPath/db/transdb.schema";
    	        $cmd .= " TRANSDB_SCHEMA=$bckPath/db/transdb.schema";
    	}
    	$cmd .= " UPDATE=1" if $NeedUpdate;
        `$cmd 1>$TmpPath/$procname-$bckDir-dbrestore.log 2>&1`;
        die "ConfDB recovery finished with errors" if $?;

	# Filter object attributes according to templates
	die "Errors while filtering object attributes"
	    unless system("$APL/conf/bin/update_objects.php 2>&1") == 0;

	# Step 3: Finalize recovering data. Perform special actions
	eval { $dbl->disconnect } if $dbl;
	db_master;

	unless($Opts{l}) {
	# Recover old SERIAL_NO if non-empty
	    $dbl->do("update _obj_attr set val='$serialno' where obj=$IDENTITY_OBJID and attr='SERIAL_NO'")
		if $serialno;
	    # Recover old LICENSE_KEY if non-empty
	    $dbl->do("update _obj_attr set val='$license_key' where obj=$IDENTITY_OBJID and attr='LICENSE_KEY'")
		if $license_key;
	}

	# clear rtime
	$dbl->do("update _objs set rtime=NULL");
	Log("Reset objects rtime");

	$dbl->commit;
    };
    if($@) {	# Rollback all changes if error
	chomp $@;
	Log("ERROR: Failed to recover Master DB Objects: $@");
	Log("Rollback all changes");
	eval { $dbl->rollback };
	Log("ERROR: Rollback failed!") if $@;
	if ($bck_fallback_ok) {
	    system(
		"$PG_RESTORE -d '$DB{name}' -h '$DB{host}' -U '$DB{user}' ".
	        "--disable-triggers -c -F c $bck_fallback &>/dev/null"
	    );
	    Log("WARNING: pg_restore finished with code ". ($? >> 8) ) if $?;
	}
    }
    else {
        # Migrate analytics attributes if migrating from 3.3.x release
        if ($DoMigrate eq '3.3') {
            Log("Migrating analytics configuration");
            system("$APL/conf/bin/migrate_vae_conf 1>$TmpPath/$procname-$bckDir-migrate-vae.log 2>&1");
            Log("WARNING: Analytics configuration migration completed with errors") if $?;
        }

	# Signal db2conf
        my $pid = ReadPid('db2conf');
        Log("db2conf signalled") if $pid > 1 and kill(1 => $pid);
        Log("Master DB recovery finished successfully");
    }

    # Remove fallback db dump
    unlink $bck_fallback;

} # sub restore_master_db

# Send command to SM nodes
sub sm_cmd {
    my $cmd = shift;
    my $cmd_ok = 0;
    for (my $i = 1; $i <= 3; $i++) {
	eval {
	    $dbl->do("update SM_NODES set CMD='$cmd'");
	    $dbl->commit;
	};
	if ($@) {
	    Log("WARNING: Storage Manager $cmd failed (try=$i)");
	    sleep 3;
	} else {
	    $cmd_ok = 1;
	    last;
	}
    }
    Log($cmd_ok ? "$cmd command sent to Storage Manager" : "ERROR: Storage Manager $cmd failed");
} # sub sm_cmd

# Handle any special post-restore situatons
sub post_restore {
    # Wake up emailer if necessary
    system("$EMAILER_SIGHUP &>/dev/null");
    Log($??"WARNING: Failed to restore Emailer run state":"Restored Emailer run state successfully");
}

sub finalize {
    Log("FINAL: ".($nErrors?"ERROR[s]: $nErrors" : 'SUCCESS').($nWarnings?" (WARNINGS: $nWarnings)":''));
    exit $nErrors;
} # sub finalize

sub stage {
    for my $stage (@_) {
	$stage eq 'PREPARE' and do {
	    die $USAGE if not defined $ARGV[0];
	    pid_check;
	    eval {
		prepare;
		db_master;
	        test;
	        cond_wait;
	    };
	    if($@) {
		my $strErr = $@;
		Log("FATAL: Process exited, no changes to configuration were made. Reason: $strErr");
		die $strErr;
	    }
	    node_service_ctl('stop');
	};
	$stage eq 'RESTORE' and do {
	    clear_sessions;
	    restore_acc_cfg if -f $ACC_ACT;
	    restore_master_db;
	};
	$stage eq 'FINALIZE' and do {
	    sm_cmd('RESTORE');
	    node_service_ctl('start');
	    post_restore;
	    finalize;
	};
    }
}

###############################################################################
############# MAIN ############################################################
###############################################################################


sub main  {
    stage
	'PREPARE',
	'RESTORE',
	'FINALIZE';
}

main();

### END ########################################################################

END {
    pid_remove;
    $dbl->disconnect() if ($dbl);
}
