#!/usr/bin/perl -w

#=============================================================================
#
#  Copyright 2006  Etienne URBAH for the EGEE project
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details at
#  http://www.gnu.org/licenses/gpl.html
#
#  This script parses TSV files generated by 'lfc-perf-symlinks-parselog.pl'
#  when parsing the log files generated by 'lfc-perf-symlinks.sh'.
#  It generates matrix CE host / LFC host in TSV format (tab separated
#  values).
#
#  By default, the base folder for subfolders containing TSV files is
#  $HOME/lfc/lfc-perf-symlinks   (if it does not exist, it is set to '.')
#
#  ASSUMPTIONS  -  The subfolders containing TSV files must have following
#                  naming rule :  yyyymmdd-HHMMSS-<ce_host>
#               -  The name of each TSV file must be 'lfc-perf-symlinks.tsv'
#
#  Optional first     parameter :  Beginning of subfolders (by default yyyymm)
#  Optional following parameters : Numbers of symbolic links to process
#                                  (by default 0 1024 16384)
#
#  For RTT (Round Trip Time) and commands operating on 1 folder or file
#  (lcg-cr, lfc-mkdir, lfc-rm and lcg-del), the name of the output files is
#  <baseFolder>/<beginning>-<command>.tsv
#
#  For commands operating on multiple files (create, list, delete),
#  the name of the output files is
#  <baseFolder>/<beginning>-symlinks-<command>-<number>.tsv
#
#=============================================================================

use strict ;

$| = 1 ;                                          # Flush STDOUT automatically

#-----------------------------------------------------------------------------
#  Constants for special processing
#-----------------------------------------------------------------------------
my $specialDate         = '20070309' ;
my $specialDomain       = '.datagrid.cea.fr' ;
my $specialDomainLength = length($specialDomain) ;
my $specialReplacement  = 'in2p3.fr' ;

#-----------------------------------------------------------------------------
#  Constants
#-----------------------------------------------------------------------------
my $b_Windows = ( defined($ENV{'OS'}) and ($ENV{'OS'} eq 'Windows_NT') ) ;

my ($home, $pwdPattern) = $b_Windows ?
                          ( $ENV{'HOMEDRIVE'}.$ENV{'HOMEPATH'}, `cd` ) :
                          ( $ENV{'HOME'}, $ENV{'PWD'} ) ;
if  ( $b_Windows )
    {
      chomp $pwdPattern ;
      $pwdPattern = quotemeta($pwdPattern) ;
    }

my $baseName      = 'lfc-perf-symlinks' ;
my $baseFolder    = $baseName ;
if  ( not -d $baseFolder )
    { $baseFolder = $home.'/lfc/'.$baseName }

my $tsvFile       = $baseName.'.tsv' ;

my %commandTitles = ( 'rtt'       => 'RTT (Round Trip Time)',
                      'lcg-cr'    => 'lcg-cr (create and copy file)',
                      'lfc-mkdir' => 'lfc-mkdir (create folder)',
                      'lfc-rm'    => 'lfc-rm (remove folder)',
                      'lcg-del'   => 'lcg-del (delete file)' ) ;

my %monthNames    = ( '01', 'January', '02', 'February', '03', 'March',
                      '04', 'April',   '05', 'Mai',      '06', 'June',
                      '07', 'July',    '08', 'August',   '09', 'September',
                      '10', 'October', '11', 'November', '12', 'December' ) ;

#-----------------------------------------------------------------------------
#  If the beginning of the subfolders has not been given as first parameter,
#  set it to current year and month (with 'yyyymm' format).
#-----------------------------------------------------------------------------
my $datePrefix ;

if  ( scalar(@ARGV) )
    { $datePrefix = shift @ARGV }
else
    {
      my ($month, $year) = (localtime(time))[4..5] ;
      $year += 1900 ;
      $month ++ ;
      if  ( $month < 10 )  { $month = '0'.$month }
      $datePrefix = $year.$month ;
    }

#-----------------------------------------------------------------------------
#  If the numbers of symbolic links to process have not been given as
#  following parameters, set them to (0, 1024, 16384).
#-----------------------------------------------------------------------------
my @numbers = ( scalar(@ARGV) ?
                @ARGV :
                (0, 1024, 16384) ) ;
my %numbers = map { $_ => 1 } @numbers ;

#-----------------------------------------------------------------------------
#  Variables
#-----------------------------------------------------------------------------
my $folderDate ;
my $ceHost ;
my @tokens1 ;
my @tokens2 ;
my $country ;
my %domains ;
my $tsvPath ;
my $num ;
my %commands ;
my %units ;
my %timeNumbers ;
my %timeSums ;
my %timeSquares ;
my %errorNumbers ;
my $b_country_todo ;
my %b_commands ;
my $number = 0 ;


#=============================================================================
#
#  In each folder found, parse the TSV file
#
#=============================================================================
foreach  my $folder  ( glob($baseFolder.'/'.$datePrefix.'*') )
{

  #---------------------------------------------------------------------------
  #  The name of the CE HOST is at the end of the folder name
  #---------------------------------------------------------------------------
  if  ( $folder =~ m|/([0-9]{8})\-[0-9]{6}\-([^/]+)$| )
      {
        ($folderDate, $ceHost) = ($1, $2) ;
        
        #---------------------------------------------------------------------
        #  Verify the TSV file is readable and is not empty
        #---------------------------------------------------------------------
        $tsvPath = $folder.'/'.$tsvFile ;
        ( (-r $tsvPath) and (-s _) )  or  next ;
        
        #---------------------------------------------------------------------
        #  Merge CE hosts by domain
        #---------------------------------------------------------------------
        if  ( ($folderDate eq $specialDate) and
              (substr($ceHost, -$specialDomainLength) eq $specialDomain) )
            { $ceHost = $specialReplacement }
        
        @tokens1 = reverse(split('\.', $ceHost)) ;
        $country = $tokens1[0] ;
        if  ( defined($domains{$country}) )
            {
              @tokens2 = @{$domains{$country}} ;
              foreach  $num  ( 1..$#tokens1 )
              {
                if  ( $num > $#tokens2 )
                    { last }
                if  ( $tokens1[$num] ne $tokens2[$num] )
                    {
                      $#tokens2 = $num - 1 ;
                      last ;
                    }
              }
            }
        else
            { @tokens2 = @tokens1 }
        
        #---------------------------------------------------------------------
        #  Parse the TSV file
        #  Fields :  Command, 'symlinks'?, Number, Result, Time, Unit, LFC
        #---------------------------------------------------------------------
        $b_country_todo = 1 ;
        
        open(TSV, $tsvPath)  or  die $! ;
        while  ( <TSV> )
        {
          if  ( m/^\s*([^\s:]+)(\s+symlinks\s*)?:?\s+
                  ([0-9]*)\s+([0-9]*)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)/x )
          {
            if  ( ($1 eq 'Command') or
                  ( ($folderDate eq $specialDate) and
                    (substr($7, -$specialDomainLength) eq $specialDomain) ) )
                { next }
            
            #-----------------------------------------------------------------
            #  Begin to process line
            #-----------------------------------------------------------------
            if  ( defined($commandTitles{$1}) or
                  ( (index($2, 'symlinks') > 0) and defined($numbers{$3}) ) )
                {
                  if  ( $b_country_todo )
                      {
# print $folderDate, "  $3  $country :  ", join('.', @tokens2), "\n";
                        $domains{$country} = [ @tokens2 ] ;
                        $b_country_todo = 0 ;
                      }
                  
                  if  ( defined($units{$1}) )
                      { ($6 eq $units{$1}) or
                        die "Date='", $folderDate, "'   CE_host='", $ceHost,
                            "'   LFC_host='", $7, "'   Command='", $1,
                            "'   Unit not consistent :  '", $6, "' != '",
                            $units{$1}, "'\n" }
                  else
                      { $units{$1} = $6 }
                }
            else
                { next }
            
            #-----------------------------------------------------------------
            #  RTT and commands operating on 1 folder or file
            #-----------------------------------------------------------------
            if  ( defined($commandTitles{$1}) )
                {
                  $commands{$1} = 1 ;
                  
                  if  ( defined($timeNumbers{$7}{$country}{$1}) )
                      {
                        $timeNumbers{$7}{$country}{$1} ++ ;
                        $timeSums{$7}{$country}{$1}    += $5 ;
                        $timeSquares{$7}{$country}{$1} += $5 * $5 ;
                      }
                  else
                      {
                        $timeNumbers{$7}{$country}{$1} = 1 ;
                        $timeSums{$7}{$country}{$1}    = $5 ;
                        $timeSquares{$7}{$country}{$1} = $5 * $5 ;
                      }
                }
          
            #-----------------------------------------------------------------
            #  Commands operating on multiple files
            #-----------------------------------------------------------------
            else
                {
                  $b_commands{$3}{$1} = 1 ;
                  
                  if  ( $3 eq $4 )
                  {
                    if  ( defined($timeNumbers{$3}{$7}{$country}{$1}) )
                        {
                          $timeNumbers{$3}{$7}{$country}{$1} ++ ;
                          $timeSums{$3}{$7}{$country}{$1}    += $5 ;
                          $timeSquares{$3}{$7}{$country}{$1} += $5 * $5 ;
                        }
                    else
                        {
                          $timeNumbers{$3}{$7}{$country}{$1} = 1 ;
                          $timeSums{$3}{$7}{$country}{$1}    = $5 ;
                          $timeSquares{$3}{$7}{$country}{$1} = $5 * $5 ;
                        }
                  }
                  else
                  {
                    if  ( defined($errorNumbers{$3}{$7}{$country}{$1}) )
                        { $errorNumbers{$3}{$7}{$country}{$1} ++ }
                    else
                        { $errorNumbers{$3}{$7}{$country}{$1} = 1 }
                  }
                }
          }
        }
        close(TSV) ;
        $number ++ ;
        if  ( ($number % 10) == 0 )
            { print '.' }
      }
  
}

print "\n", $tsvFile, ' :  ', $number, " files processed\n" ;

if  ( not defined($ceHost) )
    { die "No subfolder found for '", $baseFolder, '/', $datePrefix, "*'\n" }

if  ( not defined($country) )
    { die "No TSV file found in '", $baseFolder, '/', $datePrefix, "*'\n" }

if  ( not scalar(%domains) )
    { die "No record found for '", join("' or '", sort(keys(%commandTitles))),
          "'\nor ", join(' or ', @numbers), " LFC symbolic links in '",
          $baseFolder, '/', $datePrefix, "*'\n" }


#-----------------------------------------------------------------------------
#  Sort CE host domains and LFC hosts according to their country
#-----------------------------------------------------------------------------
my @countries  = sort (keys %domains) ;
my @domains    = map { join('.', reverse(@{$domains{$_}})) } @countries ;

my @lfcHosts   = map { $_->[0] }
                     sort { $a->[1] cmp $b->[1] }
                          map { [$_, (reverse(split('\.', $_)))[0]] }
                              grep { /[A-Za-z]/ } (keys %timeNumbers) ;
my $lfcNumDiv2 = int(scalar(@lfcHosts) / 2) ;


#-----------------------------------------------------------------------------
#  Calculate date prefix and month name
#-----------------------------------------------------------------------------
my $monthNumber ;
my $monthName ;

if  ( length($datePrefix) < 8 )
    { $datePrefix = substr($datePrefix.'xxxxxxxx', 0, 8) }

$monthNumber = substr($datePrefix, 4, 2) ;
$monthName   = ( defined($monthNames{$monthNumber}) ?
                 $monthNames{$monthNumber} :
                 '' ) ;


#=============================================================================
#
#  Subroutine createMatrix
#
#=============================================================================
sub createMatrix
{
  
  my ($matrixPath, $blanks, $title, $command, $unit,
      $timeNumbers, $errorNumbers, $timeSums, $timeSquares, @types) = @_ ;
  
  #---------------------------------------------------------------------------
  #  Create matrix
  #---------------------------------------------------------------------------
  $matrixPath =~ s|^$pwdPattern/|| ;
  open(MATRIX, ">$matrixPath")  or  die $! ;
  
  my $fileTitle = ( index($title, 'symlinks') >= 0 ?
                    $title :
                    $title.' in '.$unit ) ;
  
  print MATRIX 'EGEE LFC performances', $blanks, $fileTitle, $blanks,
               $monthName, ' ', substr($datePrefix, 0, 4), "\n\n" ;
  
  #---------------------------------------------------------------------------
  #  Print matrix for Average, Stdev, Sample size and Number of failures
  #---------------------------------------------------------------------------
  my $timeNumber ;
  my $timeSum ;
  my $lfcHost ;
  my $lfcNum ;
  my $errorNumber ;
  
  foreach  my $type  ( @types )
  {
    #-------------------------------------------------------------------------
    #  Print matrix header  (first and second lines)
    #-------------------------------------------------------------------------
    print MATRIX $title, " :\t\t",
                 "\t" x int(scalar(@countries) / 2), "CE HOST\n" ;
    
    print MATRIX "$type\t\t", join("\t", @domains), "\n" ;
    
    #-------------------------------------------------------------------------
    #  Print matrix content
    #-------------------------------------------------------------------------
    $lfcNum = 0 ;
    
    foreach  $lfcHost  ( @lfcHosts )
    {
      if    ( $lfcNum == $lfcNumDiv2 )
            { print MATRIX 'LFC ' }
      elsif ( $lfcNum == $lfcNumDiv2 + 1 )
            { print MATRIX 'HOST' }
      else
            { print MATRIX '    ' }
      $lfcNum ++ ;
        printf MATRIX "\t%-30s", $lfcHost ;
      foreach  $country  ( @countries )
      {
        print MATRIX "\t" ;
        $timeNumber = $timeNumbers->{$lfcHost}{$country}{$command} ;
        if    ( $type eq 'Number of failures' )
        {
          $errorNumber = $errorNumbers->{$lfcHost}{$country}{$command} ;
          if  ( defined($errorNumber) )
              { print  MATRIX $errorNumber }
        }
        elsif ( defined($timeNumber) )
        {
          if    ( $type eq 'Sample size' )
                { print  MATRIX  $timeNumber }
          elsif ( $type eq 'Average in '.$unit )
                { printf MATRIX '%.2f',
                         ($timeSums->{$lfcHost}{$country}{$command} /
                          $timeNumber) }
          else
                {
                  if  ( $timeNumber <= 1 )
                      { print  MATRIX "#DIV/0!" }
                  else
                      {
                        $timeSum = $timeSums->{$lfcHost}{$country}{$command} ;
                        printf MATRIX '%.2f',
                          sqrt(($timeSquares->{$lfcHost}{$country}{$command} -
                                ($timeSum * $timeSum / $timeNumber) ) /
                               ($timeNumber-1) ) ;
                      }
                }
        }
      }
      print MATRIX "\n" ;
    }
    
    print MATRIX "\n" x 2 ;
  }
  
  close(MATRIX) ;
  
  printf "%-9s :  %s\n", $command, $matrixPath ;
  
}


#=============================================================================
#
#  For RTT (Round Trip Time) and commands operating on 1 folder or file,
#  create TSV matrix files
#
#=============================================================================
my $unit ;

print "\nWritten matrix CE host / LFC host for :\n" ;

foreach  my $command  ( sort(keys %commands) )
{
  $unit = $units{$command} ;
  
  &createMatrix($baseFolder.'/'.$datePrefix.'-'.$command.'.tsv',
                ' ' x 45, $commandTitles{$command}, $command, $unit,
                \%timeNumbers, \%errorNumbers, \%timeSums, \%timeSquares,
                'Average in '.$unit,
                'Standard deviation in '.$unit,
                'Sample size')
}


#=============================================================================
#
#  For commands operating on multiple files, loop on the numbers of symbolic
#  links to process
#
#=============================================================================
foreach  my $number  ( @numbers )
{
  if  ( not defined($b_commands{$number}) )
      {
        print "\nNo record found for ", $number, " LFC symbolic links in '",
              $baseFolder, '/', $datePrefix, "*'\n" ;
        next ;
      }
  
  #---------------------------------------------------------------------------
  #  TSV matrix files
  #---------------------------------------------------------------------------
  print "\nWritten matrix CE host / LFC host for ", $number,
        " LFC symbolic links :\n" ;
  
  foreach  my $command  ( sort(keys %{$b_commands{$number}}) )
  {
    $unit = $units{$command} ;
    
    &createMatrix($baseFolder.'/'.$datePrefix.'-symlinks-'.$command.'-'.
                  $number.'.tsv',
                  ' ' x 50, "\u$command $number symlinks", $command, $unit,
                  $timeNumbers{$number}, $errorNumbers{$number},
                  $timeSums{$number},    $timeSquares{$number},
                  'Average in '.$unit,
                  'Standard deviation in '.$unit,
                  'Sample size',
                  'Number of failures')
  }
}
