# This file is part of ModPipe, Copyright 1997-2020 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

package PLLib::Stat;
require Exporter;
@ISA    = qw(Exporter);
@EXPORT = qw( GetMean GetStdDev GetMeanStdDev GetMedian GetROCValues GetROCStat
              GetTrapezoidArea );


use strict;
use PLLib::Utils;


sub GetTrapezoidArea {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 4;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ( $x1, $x2, $y1, $y2 ) = @_;

   # --- Calculate the area
   #     Assume that x1 & x2 represent the base (non-parallel
   #     sides) and y1 & y2 are the heights (parallel sides).
   my $base   = abs($x1 - $x2);
   my $height = ($y1 + $y2)/2;
   my $area   = $base * $height;

   # -- Return stuff
   return $area;
}


sub GetROCStat {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 4;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ( $tp, $fp, $fn, $tn ) = @_;

   # -- Count the number of elements
   return unless ( ($fp + $tn) > 0 && ($tp + $fn) > 0 );

   # -- Get the TP-rate
   my $tprate = $tp/($tp+$fn);

   # -- Get the FP-rate
   my $fprate = $fp/($fp+$tn);

   # -- Return values
   return ( $tprate, $fprate );
}


sub GetROCValues {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 4;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ( $xvals, $yvals, $cut, $order ) = @_;

   # -- Count the number of elements
   return unless ( @$xvals == @$yvals );

   # -- Get the ROC numbers
   my ($tp, $fp, $fn, $tn) = 0;
   if ( $order eq 'BAD_TO_GOOD' ){

      # -- For scores which increase in magnitude with increasing
      #    quality. Eg: Sequence Identity, GAScore etc.
      for(my $i = 0; $i < @$xvals; $i++){

         # -- Calculate true and false positives
         $tp++ if ( $xvals->[$i] >= $cut && $yvals->[$i] == 1);
         $fp++ if ( $xvals->[$i] >= $cut && $yvals->[$i] == 0);

         $fn++ if ( $xvals->[$i] <  $cut && $yvals->[$i] == 1);
         $tn++ if ( $xvals->[$i] <  $cut && $yvals->[$i] == 0);
      }
   } else {

      # -- For scores which decrease in magnitude with increasing
      #    quality. Eg: Normalized DOPE, GAP Ratio etc.
      for(my $i = 0; $i < @$xvals; $i++){

         # -- Calculate true and false positives
         $tp++ if ( $xvals->[$i] <= $cut && $yvals->[$i] == 1);
         $fp++ if ( $xvals->[$i] <= $cut && $yvals->[$i] == 0);

         $fn++ if ( $xvals->[$i] >  $cut && $yvals->[$i] == 1);
         $tn++ if ( $xvals->[$i] >  $cut && $yvals->[$i] == 0);
      }
   }

   # -- Return values
   return( $tp, $fp, $fn, $tn );
}


sub GetMedian {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 1;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my $arr = $_[0];

   # -- Count the number of elements
   return unless ( @$arr > 1 );

   # -- Sort the array to get the min & max
   my @vals = sort { $a <=> $b } @$arr;

   # -- Get the median
   my $median = undef;
   if ( @vals % 2 == 0 ){
      my $indx = @vals/2;
      $median = ($vals[$indx] + $vals[$indx+1])/2;
   } else {
      my $indx = (@vals + 1)/2;
      $median = $vals[$indx];
   }

   # -- Get the first quartile
   my $quartile1 = undef;
   if ( @vals % 4 == 0 ){
      my $indx = @vals/4;
      $quartile1 = ($vals[$indx] + $vals[$indx+1])/2;
   } else {
      my $indx = (@vals + 1)/4;
      $quartile1 = $vals[$indx];
   }

   # -- Get the third quartile
   my $quartile3 = undef;
   if ( @vals % 4 == 0 ){
      my $indx = @vals * 3/4;
      $quartile3 = ($vals[$indx] + $vals[$indx+1])/2;
   } else {
      my $indx = (@vals + 1) * 3/4;
      $quartile3 = $vals[$indx];
   }

   # -- Get the min and max values
   my $min = shift @vals;
   my $max = pop   @vals;

   # -- Return
   return $median, $max, $min, $quartile3, $quartile1;
}

sub GetMean {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 1;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my $arr = $_[0];

   # -- Count the number of elements
   return unless ( @$arr > 1 );

   # -- Get the mean
   my $mean = 0;
   foreach my $elem ( @$arr ){ $mean += $elem }
   $mean = $mean/@$arr;

   # -- Return
   return $mean;
}

sub GetStdDev {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 2;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($arr, $mean) = @_;

   # -- Count the number of elements
   return unless ( @$arr > 1 );

   # -- Get the mean if it is not defined
   if ( ! defined($mean) ){
      $mean = 0;
      foreach my $elem ( @$arr ){ $mean =+ $elem }
      $mean = $mean/@$arr;
   }

   # -- Calculate standard deviation
   my $sumofsq = 0;
   foreach my $elem ( @$arr ){ $sumofsq += ( $mean - $elem )*($mean - $elem) }
   my $stddev = sqrt( $sumofsq/(@$arr - 1));

   # -- Return
   return $stddev;
}

sub GetMeanStdDev {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 1;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my $arr = $_[0];

   # -- Count the number of elements
   return unless ( @$arr > 1 );

   # -- Get the mean if it is not defined
   my $mean = 0;
   foreach my $elem ( @$arr ){ $mean += $elem }
   $mean = $mean/@$arr;

   # -- Calculate standard deviation
   my $sumofsq = 0;
   foreach my $elem ( @$arr ){ $sumofsq += ( $mean - $elem )*($mean - $elem) }
   my $stddev = sqrt( $sumofsq/(@$arr - 1));

   # -- Return
   return $mean, $stddev;
}
