#!/usr/bin/perl
#
#  $Id: sm_keeper 30568 2014-05-13 14:31:05Z atsybulnik $
# -----------------------------------------------------------------------------
#  SM_KEEPER - Keep disk space and load balance
# -----------------------------------------------------------------------------
#  Author: Alex Titov
#  QA by:
#  Copyright: videoNEXT LLC
# -----------------------------------------------------------------------------
#
# for REQ see: Rally S623:Storage Manager TA926: Space Keeper
#                    S971: Media content creation in low resolution format
# notes:
#   1. Should have own log (get_logger etc)
#
#Space Keeper (SK):
#
#   1. SK reads device configurations and volume states
#   2. Validate storage references 
#     *Reference is invalid if volume operation state is not Online | Degraded
#   3. Find missing storage references (new devices, etc)
#   4. Assign invalid and missing storage references to Online volumes
#   5. Pre-create references for next hour (~ 5 minutes before new hour starts)
#   6. Call Decimator for frame reduction at XX:02
#   7. Call WIPE for storage cleaning at XX:YY where YY=abs(RTSP%100-54)/5+15
#   8. Call WIPE in case of space shortage
#   9. SPACE_SHORT_LIMIT=3 should be less then MIN_LIMIT in wipe
#  10. Two dirs -00,01 and two corresponded links are created on the same volume
#      for LR and Native streams (S971). In future dirs will be on different volumes
#
#Volume Assignment  it a decision making process. Following information is considered for making a decision:
#
#   1. Retriever: Writing power [KB/s]. Stat information
#   2. Volume:  LIMIT_WRITING [MB/S]. Configuration
#   3. Volume: FREE_SPACE. Stat information
#
#SK tries to do a load balancing during device assignment but honors volume's space and writing limitations


use warnings;
use strict;
use Data::Dumper;
use File::Basename qw(dirname);
use lib dirname(__FILE__).'/../lib';              # find  SM::Config here
use SM::Config ':all';
use NextCAM::Init qw(GetAsrv);
#use Node::Conf qw(RTSP_PORT);

# CONS ------------------------------------------------------------------------
my $KEEPER    =(split(/\//,$0))[-1];	          #the actual name of the prog
my $WIPE      =dirname(__FILE__).'/sm_wipe >/dev/null 2>&1';     #wipe program
my $APL       =$ENV{APL};
my $SPACE_SHORT_LIMIT=3;                          # start wipe if less then 3% 
# VARS ------------------------------------------------------------------------
my $vols;                                         # configs of spindles/volumes 
my $devs;                                         # configs of storage consumers
my $opt=SM_Options;                               # load options on start only

# SUBS ------------------------------------------------------------------------


#-------------------------------------
# create a dir if absend. die if error
#-------------------------------------
sub mk_dir {
 my $dir=shift;
 return       if -d $dir;# do nothing since dir already created
 unlink($dir) if -f $dir;# unlink flat file with the same name if exists(DE978)
 mkdir($dir,0755) || die "ERR: cannot create $dir : $!";
}

#------------------------------------------------------------------------------
# creates directory and link for one device
# dir and link name looks like:
#   link: /vasm/store/va-2.6.0/3/080512/14.0000 -> 
#   dir:  /vasm/ca98684e-6489-4dcd-910e-9f0f0befe6f2/va-2.6.0/3/080512/14.0000
#   link: /vasm/store/va-2.6.0/3/080512/14.3242 -> 
#   dir:  /vasm/ca98684e-6489-4dcd-910e-9f0f0befe6f2/va-2.6.0/3/080512/14.3242
# minutes and seconds part is mandatory and always present
# the alias for the latest fragment of the hour does not have a mins & secs
# if $vol udef then old links are removed but new dirs/links are not created
# AT: frd suffix is added to all links and dirs
# 
#------------------------------------------------------------------------------
sub mk_link2dir {
  my ($dev,$dd,$hh,$mmss)=@_;        # dev and date+hour, mmss - mins & secs
  my $id=$dev->{DEVID};              # device id
  my $vol=$dev->{_vol};              # volume(spindle) id
  my $vpath=SM_MNT ."/$vol/".SM_VER; # path to volume
  my $store=SM_MNT ."/store/".SM_VER;# path to store
  my $frd='01';                      # 2.6.3 frd is constant then calculated in future
  my $lrs='00';                      # 2.7.1 lrs is constant for Low Resolution stream
  eval {
   if(not $vol) {                    # volume is not defined
     unlink ("$store/$id/$dd/$hh.$mmss-$frd") if -l "$store/$id/$dd/$hh.$mmss-$frd";
     unlink ("$store/$id/$dd/$hh.$mmss-$lrs") if -l "$store/$id/$dd/$hh.$mmss-$lrs";
     unlink ("$store/$id/$dd/$hh-$frd")       if -l "$store/$id/$dd/$hh-$frd";
     unlink ("$store/$id/$dd/$hh-$lrs")       if -l "$store/$id/$dd/$hh-$lrs";
   }else {                           # volume is present
     mk_dir ("$store/$id");
     mk_dir ("$vpath/$id");
     mk_dir ("$store/$id/$dd");
     mk_dir ("$vpath/$id/$dd");
     mk_dir ("$vpath/$id/$dd/$hh.$mmss-$frd"); 
     mk_dir ("$vpath/$id/$dd/$hh.$mmss-$lrs"); 
     unlink ("$store/$id/$dd/$hh.$mmss-$frd") if -l "$store/$id/$dd/$hh.$mmss-$frd";
     unlink ("$store/$id/$dd/$hh.$mmss-$lrs") if -l "$store/$id/$dd/$hh.$mmss-$lrs";
     symlink("$vpath/$id/$dd/$hh.$mmss-$frd","$store/$id/$dd/$hh.$mmss-$frd") ||
       die("ERR: Cannot create symlink $store/$id/$dd/$hh.$mmss-$frd->$vpath/$id/$dd/$hh.$mmss-$frd");
     symlink("$vpath/$id/$dd/$hh.$mmss-$lrs","$store/$id/$dd/$hh.$mmss-$lrs"); # do not die if lrs
     unlink ("$store/$id/$dd/$hh-$frd") if -l "$store/$id/$dd/$hh-$frd";
     unlink ("$store/$id/$dd/$hh-$lrs") if -l "$store/$id/$dd/$hh-$lrs";
     `rm -rf $store/$id/$dd/$hh-$frd`   if -d "$store/$id/$dd/$hh-$frd"; # this is temporary TBD remove
     `rm -rf $store/$id/$dd/$hh-$lrs`   if -d "$store/$id/$dd/$hh-$lrs"; # this is temporary TBD remove
     symlink("$vpath/$id/$dd/$hh.$mmss-$frd" ,"$store/$id/$dd/$hh-$frd") ||
       die("ERR: Cannot create alias $store/$id/$dd/$hh->$vpath/$id/$dd/$hh.$mmss-$frd");
     symlink("$vpath/$id/$dd/$hh.$mmss-$lrs" ,"$store/$id/$dd/$hh-$lrs");
   }
  };
  if ($@) {                          # catch an error
    SM_LOG->error("SMK-0011: dev=$id vol=$vol: $@");
  }
}

#------------------------------------------------------------------------------
#creates all possible links and corresponded directories
#------------------------------------------------------------------------------
sub create_links_all {
   SM_LOG->debug("<create_links_all>");
   my $point=shift;        # possible values next & now
   assign_dev2vol($point,keys %$devs);
   my $tm=time(); $tm+=($point eq 'next')?60*60:0;
   my ($day,$hour,$min,$sec)=SM_DateSplit($tm);
   ($min,$sec)=('00','00') if $point eq 'next';
   mk_link2dir($devs->{$_},$day,$hour,"$min$sec") foreach (keys %$devs);
   SM_LOG->debug("</create_links_all>");
}

#------------------------------------------------------------------------------
#  DECISION !
#  create links for a list of devices
#  consider space/load balancing 
#  report balance to a log
#  warning if balance exceed any limit
#------------------------------------------------------------------------------
sub assign_dev2vol {
 my $point=shift;	            # possible values next/now
 my @dev=@_;                        # list of devices
 SM_LOG->debug("<assign_dev2vol point='$point' list='".join(' ',@dev)."'>");
 if(not @dev) {
     SM_LOG->info("Nothing to assign. the list is empty");
     return;
 }
 #TBD error and exit if no active volumes are present
 #--------- prepare sorted array for volumes, active om-devices and off-devices
 my @s_vol=sort {
            if ($point eq 'next') {
              $vols->{$a}->{LIMIT_WRITE} <=> $vols->{$b}->{LIMIT_WRITE}
            }else{
              $vols->{$a}->{LIMIT_WRITE}-$vols->{$a}->{stat_WRITE}
              <=>
              $vols->{$b}->{LIMIT_WRITE}-$vols->{$b}->{stat_WRITE}
            }
          }grep {$vols->{$_}->{ost}=~/^(Online|Degraded)$/}        keys %$vols;
 my @s_dev=sort {$devs->{$a}->{_writepower} <=> $devs->{$b}->{_writepower}} 
           grep {$devs->{$_}->{ARCHSTATE} eq 'on' }                        @dev;
 my @o_dev=sort {$devs->{$a}->{_writepower} <=> $devs->{$b}->{_writepower}}
           grep {$devs->{$_}->{ARCHSTATE} ne 'on' }                        @dev;
 #--------- check avaiable volumes
 if(not keys %$vols) {              # no volumes in the system
   SM_LOG->error("SMK-0001: no active volumes are present in the system");
   return;
 }
 #--------- clean calculation slots
 foreach (keys %$vols) {     # TBD: alredy reserver space
    $vols->{$_}->{_space_count}=$vols->{$_}->{stat_FREE};  
    $vols->{$_}->{_write_count}=$vols->{$_}->{LIMIT_WRITE}*1024;   # limit in KB
    #TBD use 1/2 of LIMIT_WRITE for degraded volumes
 }
 foreach (keys %$devs) { $devs->{$_}->{_vol}='' }

 # --= AF Storage overlow fix =--
 my %asrv = GetAsrv();                           # System Identity attributes from DB
 my $SPACE_LIMIT_ALLOCATION = $asrv{SM_FREE_SPACE_LIMIT} || 15;                                  # 15% is default if not set properly
 my $SPACE_STOP_ALLOCATION = $SPACE_LIMIT_ALLOCATION > 5 ? $SPACE_LIMIT_ALLOCATION-5 : 1;        # 1% is minimum

 #--------- placement cycle for active devices----------------------
 # during the placement bandwith limit shoold not be exided 
 # volume limit can be exided only if no way to place under limitation
 # degraded volume should not avoided but may be used under a half capacity
 my $kgb=60*60/1024/1024;
 for(my $i=0;$i<scalar @s_dev;$i++) {          # cycle has few redundant runs
   foreach my $vp (@s_vol) {                   # volumes biggest 
     foreach my $dp (@s_dev) {                 # devices
        next if $devs->{$dp}->{_vol};             # skip, already assigned
        next if $devs->{$dp}->{_writepower}*$kgb>$vols->{$vp}->{_space_count};
        next if $devs->{$dp}->{_writepower}     >$vols->{$vp}->{_write_count};
	# AF: Discourage allocation on volume having less then SPACE_LIMIT_ALLOCATION% space
	next  if 100*$vols->{$vp}->{stat_FREE}/$vols->{$vp}->{stat_SIZE} < $SPACE_LIMIT_ALLOCATION; 
        $vols->{$vp}->{_write_count}-=$devs->{$dp}->{_writepower};     #reduce count
        $vols->{$vp}->{_space_count}-=$devs->{$dp}->{_writepower}*$kgb;#reduce space
        $devs->{$dp}->{_vol}=$vp;
        last;
     } 
   }
 }
 SM_LOG->debug("initial assigment: $_-> $devs->{$_}->{_vol}") foreach (@dev);
 #--------- see what devices has not been placed:
 @s_dev=sort {$devs->{$a}->{_writepower} <=> $devs->{$b}->{_writepower}}
        grep {$devs->{$_}->{ARCHSTATE} eq 'on' and !$devs->{$_}->{_vol}}   @dev;

 if(@s_dev) {  #--------- continue active device placement without space limit
  SM_LOG->warn("SMK-0002: Space limit will not be obeayed for devices:".join ' ',@s_dev);
  for(my $i=0;$i<scalar @s_dev;$i++) {         # cycle has few redundant runs
   foreach my $vp (@s_vol) {                   # volumes biggest
     foreach my $dp (@s_dev) {                 # devices
        next if $devs->{$dp}->{_vol};             # skip, already assigned
        next if $devs->{$dp}->{_writepower}   >$vols->{$vp}->{_write_count};
	#  AF: Block allocation on volume having less then $SPACE_STOP_ALLOCATION% space
	next  if 100*$vols->{$vp}->{stat_FREE}/$vols->{$vp}->{stat_SIZE} < $SPACE_STOP_ALLOCATION;
        $vols->{$vp}->{_write_count}-=$devs->{$dp}->{_writepower};     #reduce count
        $vols->{$vp}->{_space_count}-=$devs->{$dp}->{_writepower}*$kgb;#reduce space
        $devs->{$dp}->{_vol}=$vp;
        last;
     }
   }
  }
 } # endif  #--------- active device placement without space limit

 #--------- see what devices has not been placed: 
 @s_dev=grep {$devs->{$_}->{ARCHSTATE} eq 'on' and !$devs->{$_}->{_vol}}     @dev; 
 foreach (@s_dev) {
  SM_LOG->error("SMK-0012: dev=$_ canot use disk for archive since write limit is reached");
 }
 #--------- placement for inative devices without space/write check
 for(my $i=0;$i<scalar @o_dev;$i++) {          # cycle has few redundant runs
   foreach my $vp (@s_vol) {                   # volumes biggest
     foreach my $dp (@o_dev) {                 # devices
        next if $devs->{$dp}->{_vol};          # skip, already assigned
        $devs->{$dp}->{_vol}=$vp;
        last;
     }
   }
 }
 SM_LOG->info("assigment: $_ [$devs->{$_}->{ARCHSTATE}]->$devs->{$_}->{_vol}") foreach (@dev);
 SM_LOG->debug("</assign_dev2vol>");
}


#-------------------------------------------------------------------------------
# find invalid links 
# find missing links
# assing corresponded devices to active volumes
# create links
# TBD!! do not do check_links in last 2 seconds before hour switch
#-------------------------------------------------------------------------------
sub check_links {
 my @invalid;                            # array for devices with invalid link
 my @missing;                            # array for devices with missing link
 my $MOUNTPOINT=SM_MNT;
 my ($date,$hour,$min,$sec)=SM_DateSplit(time);
#my @on_dev=grep {$devs->{$_}->{ARCHSTATE} eq 'on' }  keys %$devs;
 my @on_dev=keys %$devs;                 # use all devices since dev may start by schedule
 my $frd='01';                           # constant for 2.6.3 will be calculated in future
 SM_LOG->debug("<check_links>");
 foreach my $id (@on_dev)   {
   my $linkname="$MOUNTPOINT/store/".SM_VER."/$id/$date/$hour-$frd";
   if( -l $linkname) {                   # link is present
      $_=readlink($linkname);
      SM_LOG->debug("Checking link $_");
      if(m|^$MOUNTPOINT/(\w{8}-\w{4}-\w{4}-\w{4}-\w{12})/|){
        my $uuid=$1;
        if(not $vols->{$uuid}->{ost}=~/^(Online|Degraded)/) {  # invalid link!
           SM_LOG->info("SMK->0003: $uuid status $vols->{$uuid}->{ost}: found invalid $linkname->$_");
           push @invalid,$id;
        }
      } else {                           # link does not point to volume, it invalid
        SM_LOG->info("SMK->0004: Strange link found $linkname->$_: found if invalid");
        push @invalid,$id;
      }
   }else {                               # missing link
      SM_LOG->info("SMK->0005: Link $linkname for device $id is missing");
      push @missing,$id;
   }
 }
 if(@missing or @invalid) {
    assign_dev2vol('now',(@missing,@invalid));
    mk_link2dir($devs->{$_},$date,$hour,"$min$sec") foreach ((@missing,@invalid)); 
 }
 SM_LOG->debug("</check_links>");
}

sub check_dirs {
 my $point=shift;                        # curr or next
 my $store=SM_MNT.'/store/'.SM_VER;
 my $tm=time(); $tm+=($point eq 'next')?60*60:0;
 my ($dd,$hh,$min,$sec)=SM_DateSplit($tm); # dd - date, $hh - hour
 my $frd='01';                           # constant for 2.6.3 will be calculated in future
 my $lrs='00';                           # 2.7.1 lrs is constant for Low Resolution stream
 my $mmss='0000';                        # constant
 SM_LOG->debug("<check_dirs>");
 foreach my $id (keys %$devs)   {
   if( ! -d  "$store/$id/$dd/$hh.$mmss-$frd"){# directory missing
     mk_dir ("$store/$id");
     mk_dir ("$store/$id/$dd");
     mk_dir ("$store/$id/$dd/$hh.$mmss-$frd");
     mk_dir ("$store/$id/$dd/$hh.$mmss-$lrs");
     unlink ("$store/$id/$dd/$hh-$frd") if -l "$store/$id/$dd/$hh-$frd";
     unlink ("$store/$id/$dd/$hh-$lrs") if -l "$store/$id/$dd/$hh-$lrs";
     symlink("$hh.$mmss-$frd","$store/$id/$dd/$hh-$frd");
     symlink("$hh.$mmss-$lrs","$store/$id/$dd/$hh-$lrs");
   }
 } 
 SM_LOG->debug("</check_dirs>");
}

#-------------------------------------------------------------------------------
# check total space for wheels
# return 0 if wipe is running
# return 0 if 
# return 1 if total free space < 10 (only active volumes are count)
#-------------------------------------------------------------------------------

sub space_shortage {
  my $pids=SM_PID;
  return 0 if -f SM_PID.'/sm_wipe.pid'; #wipe is working
  my ($t_size, $t_free)=(0,0);
  foreach (keys %$vols) {
    next if not $vols->{$_}->{ost}=~/^(Online|Degraded|Full)$/; # ignore inactive 
    $t_size+=$vols->{$_}->{stat_SIZE};
    $t_free+=$vols->{$_}->{stat_FREE} if $vols->{$_}->{stat_FREE}>0;
  }  
  my $prc=($t_size)?int(100*$t_free/$t_size):-1;
  SM_LOG->debug("Space check: size=$t_size MB,free=$t_free MB [$prc<=>$SPACE_SHORT_LIMIT]");
  if($prc< $SPACE_SHORT_LIMIT and $t_size) {
    SM_LOG->info("Space shortage identified: size=$t_size MB,free=$t_free MB [$prc<$SPACE_SHORT_LIMIT]");
    return 1;
  }
  return 0;
}

sub get_status {
  if(open STATUS, "$APL/var/sm/status") {
    my $st=<STATUS>; chomp $st;
    close STATUS;
    return $st if $st=~/^(STOPPED|NO-ARCHIVE|ARCHIVING)$/;  
    return '';
  }
  return '';
}

# MAIN =========================================================================
SM_WritePid($KEEPER);                   # write pid for a first time
my $time_mark=0;                        # last time configuration is loaded
my $devconfupd="$APL/var/conf/conf";    # trace of device conf update
my $smstatupd=SM_STAT.'/.updated';      # trace of ost update
my $next_links=int(time()/3600);        #hour for next links 
my $next_wipe=int(time()/3600);         #hour for next wipe
my $once_in15=0;                        #actions onece in 15 seconds
my $port=55; # RTSP_PORT;                     # port is different for each node
my $wipe_start=abs((($port%100)-54))/5+15; #each node has its own start time
my $wipe_end=$wipe_start+7;             # the end of wipe window
SM_LOG->info("$KEEPER starts; wipe window is $wipe_start-$wipe_end ");
for(my $cycle=0;;) { # ----------------------------------- this is main loop
  my $need_check=0;                     # presumably the links should not be checked
  $need_check=1 if -e $devconfupd and mtime($devconfupd)>$time_mark;
  $need_check=1 if -e $smstatupd  and mtime($smstatupd) >$time_mark;
  $need_check=1 if not ++$cycle%60;     # have to check onec in 1 minute
  $need_check=1 if not time%3600;       # this is the first second of the hour
  if($need_check){
    if ($opt->{MODE} eq 'NOWHEELS') {
      $devs=SM_Devices();
      check_dirs('curr');
    }else {
      SM_LOG->debug("time to check links");
      if(get_status() eq 'NO-ARCHIVE') {  # do not try cheking links if NO-ARHIVE
        SM_LOG->info("NO-ARCHIVE, cannot check/create links");
      }else {
        $vols=SM_Wheels();                       
        $devs=SM_Devices();
        check_links();
      }
    }
    $time_mark=time;
  }
  #----------------------------- check if need links for next hour
  my $cur_hour=int(time()/3600);
  my $cur_min=int(time()%3600/60);
  if($next_links<=$cur_hour and $cur_min>50) { #links are not created yet
     $vols=SM_Wheels()  if $opt->{MODE} ne 'NOWHEELS';                     
     $devs=SM_Devices();
     $time_mark=time;
     if($opt->{MODE} ne 'NOWHEELS') {
       create_links_all('next');
     }else {
       check_dirs('next');
     }
     $next_links=$cur_hour+1;
  }
  if($next_wipe<=$cur_hour and $cur_min>$wipe_start) { #wipe is not done yet
     $next_wipe=$cur_hour+1;
     if($cur_min<$wipe_end) {
       SM_LOG->debug("starting $WIPE");
       system("$WIPE &");                     #wipe window from [16-24] min
     }
  }
  if(time-$once_in15 >15) {                   # this works oce in 15 sec
    $once_in15=time;
    SM_WritePid($KEEPER);                     # write PID every 15 seconds
    if (space_shortage){
      SM_LOG->debug("starting $WIPE");
      system("$WIPE &");                      # start wipe if not enoph free
    }
  }
  sleep 1;
}

