#!/usr/bin/perl
#
# -----------------------------------------------------------------------------
#  PFMON - Performance monitor daemon
# -----------------------------------------------------------------------------
#  Author: Alex Tsibulnik
#  QA by:
#  Copyright: videoNEXT LLC
# -----------------------------------------------------------------------------
#  
#
# See Rally S914
#
# Pfmon's main goal is to compute overall CPU throttle value 
# and write it to $APL/var/hm/throttle/cpu, so that CPU-demanding
# engines, e.g. MD or Analytics can adjust their performance
# with respect to overall CPU load
#
# Pfmon calculates CPU throttle value
# Algorithm is as follows:
#
# Terms:
#	LAC  - current value of Load Average (1min)
#       LACC - Load Average per CPU core
#	IDLE - Current CPU idle value mapped from [0..100] to (0..1]
#       EWMA - Exponentially Weighted Moving Average
#       TLC  - Current throttle value (not weighted)
#       TLCW - Current throtle value (EWMA-weighted)
#       TLPW - Previous throttle value (EWMA-weighted)
#
# Steps:
#       1. LACC = LAC / NCORES
#       2. IDLE = (%idle / 100) || 0.01
#	3. TLC = LACC <= 2 ? 0 : (LAC - 2) * 10
#	   Compute TLC(LACC) by simple mapping from [0..LA_MAX] to [0..100]
#	4. W = TLPW - TLC > 0 ? 1 : IDLE
#       5. TLCW = EWMA(TLC, TLPW, 60, W)
#
# Recently computed values of Throttle are stored in special array 
# and can be used in analysis and logging
# 

use strict;
use warnings;
use Data::Dumper;
use Log::Log4perl "get_logger";
use Node::Conf "Count_Cores";

# CONS
my $APL = $ENV{APL};
my $APL_VAR = $ENV{APL_VAR};
my $TCPU = "$APL_VAR/hm/throttle/cpu";
my $OVERLOAD = "$APL_VAR/cpu-overload";
my $TOP = "/usr/bin/top";
my $MPSTAT = "/usr/bin/mpstat";
my $UPTIME = "/usr/bin/uptime";
my @REPORT_GROW_THLD = (15, 30, 50); # TCPU growth by 1, 5 and 15  minutes to report
my $QLEN = 60;
my $NCORES = Count_Cores;
my $CPU_OVERLOAD_THLD = 3; # Threshold per CPU core
my $LA_MAX = 6;
my $DELTA_T = 60; # Measure in 1-minute intervals
my $TMT_OVERLOAD = 1; # Check for CPU overload every second
my $W_DECR = 2; # Coeff is used in EWMA algorithm to slow down TCPU decreasing rate

# VARS
my @LQ; # Load Queue
my $LastWarnTime = 0;

# LOGGING
my $LOG_CFG_FILENAME  = "$APL/common/etc/logger_pfmon.conf"; # logger settings config file
my $LOG_CFG_REFRESH_INTERVAL = 60;                      # logger config refresh interval
Log::Log4perl::init_and_watch($LOG_CFG_FILENAME, $LOG_CFG_REFRESH_INTERVAL);
my $log = get_logger('PFMON');

sub tcpu
{
	my $tcpu_new = shift;
	
	open (TCPU, $TCPU) or die "Unable to read CPU throttle: $!";
	my $tcpu_prev = <TCPU>; chomp $tcpu_prev;
	close TCPU;
	if (defined $tcpu_new) { # Write new value
		open (TCPU, ">$TCPU") or die "Unable to write CPU throttle: $!";
		print TCPU $tcpu_new;
		close TCPU;
	}
	return $tcpu_prev;
}

sub loadavg
{
        if(open(AVG, "/proc/loadavg")) {
        	my @avg = (split(/\s/,<AVG>))[0..2];
		close AVG;
		return @avg;
        }
        else {
		die "Cannot read load average: $!\n";
        }
}

sub cpu_load
{
	my @avg = loadavg;
	my $lacc  = $avg[0] / $NCORES; # LA per core
	
	if ($lacc < $CPU_OVERLOAD_THLD) {
		if (-f $OVERLOAD) {
			unlink($OVERLOAD) or $log->logdie("Cannot unlink cpu-overload flag: $!");
		}
	}
	else {
	    open(FH, ">$OVERLOAD") or $log->logdie("Cannot write to $OVERLOAD: $!");
	    print FH sprintf("%.2f", $lacc);
	    close(FH) or $log->logdie("close() failed: $!");
	}
}

sub cpu_idle
{
	my $idle;
	if ($^O=~/darwin/i) {
		open (TOP, "$TOP -s 0 -n 0 -l 1|") or die "top failed: $!";
		while (<TOP>) {
			/^CPU usage:.+\s+(\d+\.\d+)% idle/ && do { $idle = $1; last };
		}
		close TOP;
	}
	else {
		open (MPSTAT, "$MPSTAT 1 1|") or die "mpstat failed: $!";
		while(<MPSTAT>) {
			/^Average.+\s+(\d+\.\d+)\s+\d+\.\d+$/ && do { $idle = $1; last };
		}
		close MPSTAT;
	}
	return $idle;
}

sub calc_ewma
{
	my ($Yc, $Sp, $Dt, $W) = @_;
	
	$log->debug("calc_ewma(): $Yc, $Sp, $Dt, $W");
	my $Sc = (1 - exp(-$Dt/($W * 60))) * $Yc + exp(-$Dt/($W * 60)) * $Sp;
	return $Sc;
}

sub recalc_tcpu
{
	my $lac = loadavg;
	my $idle = cpu_idle;
	
	# Map value of CPU idle from [0..100] to (0..1]
	#
	$idle = 1 unless int $idle;
	$idle /= 100;

	my $lacc = $lac / $NCORES; # LA per core
	
	# First map Load Average value from 0..LA_MAX to 0..100
	#
	my $tlc = ($lacc * 100) / $LA_MAX;
	
	# Compute Exponentially Weighted Moving Average for throttling
	# Round TLC to 2 digits after decimal point
	#
	my $tlp_w = @LQ ? $LQ[0]{tl_w} : $tlc;
	my $w = ($tlp_w - $tlc) > 0 ? ($lacc>=1?$W_DECR:$W_DECR/2) : $idle;
	my $tlc_w = calc_ewma(
		$tlc, 	   # Current throttle value (non-weighted)
		$tlp_w   , # Previous throttle value (weighted)
		$DELTA_T,  # Interval between measurements
		$w         # Weighting coefficient (positive rational number)
	);
	# Normalize resulting value
	#
	$tlc_w = 100 if $tlc_w > 100;
	$tlc_w = 0 if $tlc_w < 1;
	$tlc_w = sprintf("%.2f", $tlc_w);
	
	# Store values
	#
	unshift @LQ, {
		time => time,
		idle => $idle,
		la   => $lac,
		tl   => $tlc,
		tl_w => $tlc_w,
		w    => $w
	};
	pop @LQ if @LQ > $QLEN;
	
	return $tlc_w;
}

sub analyze
{
	return if @LQ <= 1;
	
	# Search for sudden changes of Throttle values
	#
	my $time = time;
	my @t = ($time-60, $time-300, $time-900);
	
	my $tlc_w = $LQ[0]{tl_w};
	my $ti = 0;
	my ($la_str, $id_str) = ("", "");
	
	if (int($tlc_w) == 100 && int($LQ[1]{tl_w}) < 100) {
		$log->warn("Maximum CPU throttlin value (100) reached!");
	}
	
	foreach my $snap (@LQ) {
		last if $ti >= @t;
		last if $snap->{time} <= $LastWarnTime;
		next if $snap->{time} > $t[$ti];
		
		my $tlp_w = $snap->{tl_w};
		if ($tlc_w - $tlp_w >= $REPORT_GROW_THLD[$ti]) {
			# Too big TCPU growth detected
			# Must report it to the log
			#
			$la_str .= $_->{la}.' ' foreach @LQ[0..5];
			$id_str .= $_->{idle}.' ' foreach @LQ[0..5];
			$log->warn("Intensive Throttle growth detected: ".
				($tlc_w - $tlp_w).
				" (thresh=$REPORT_GROW_THLD[$ti])");
			$log->warn("Recent LA values: ".$la_str);
			$log->warn("Recent CPU Idle values: ".$id_str);
			$LastWarnTime = $time;
			last;
		}
		$ti++;
	}
}

sub main
{
	$log->info("SKM Performance Monitor started (pid=$$, ncores=$NCORES)");
	
	while (1) {
		#my $tlc = recalc_tcpu;
		#tcpu $tlc;
		#analyze;
		cpu_load;
		
		#sleep $DELTA_T;
		sleep $TMT_OVERLOAD;
	}
}

# MAIN
#
eval {
	main;
};
$log->logdie("Error in main(): $@") if $@;
