# This file is part of ModPipe, Copyright 1997-2020 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.


package MPLib::MPModules;
require Exporter;
@ISA    = qw(Exporter);
@EXPORT = qw( GetProfile GetHits100 Hits2Ali GetHits010
              GetHits001 GetModels Ali2Mod GetPsiBlastProfile 
              GetHits020 GetModelRating RateModels GetHits002
              GetHitsPP ReorderHits CleanHitsModes GetHitsHHBlits 
              GetHitsHHSearch GetHHSuiteProfile);



use strict;
use Cwd;
use File::Basename;

use PLLib::Utils;
use PLLib::Sequence;
use PLLib::MD5Utils;
use PLLib::HHSuiteUtils;
use MPLib::MPInit;
use MPLib::MPUtils;
use MPLib::Version;
use MPLib::Binaries;
use MPLib::Serialize;

sub GetHitsPP {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 12;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $natpdb, $natchn, $outhits, $evaluehits, $scrstat, $prftype,
       $scoretype, $hitcode, $ccmatoff, $gapo, $gape) = @_;

   # --- Get the current directory
   my $currdir = cwd();

   # --- Check if profile exists
   #     Note: BP (profile calculatd by build_profile), 
   #           PB (psi-blast)
   #           SS (single-sequence, for seq-prf)
   my $seqdir = SeqDirMP( $seqid );
   my $prfnam = '';
   if ( $prftype eq 'BP' ){
       $prfnam = PrfFileMP( $seqid );
   } elsif ( $prftype eq 'PB' ){
       $prfnam = PsiBlastPrfFileMP( $seqid );
     }
     elsif ( $prftype eq 'SS' ){
       $prfnam = SeqFileMP( $seqid );
     }

   unless ( -e $prfnam ){
      warn "${subname}__E> Profile file not found: $prfnam\n";
      return;
   }

   # --- Copy the profile file
   my $prflocal = basename( $prfnam );
   unless ( CopyFile($prfnam, $prflocal) ){
      warn "${subname}__E> Could not copy profile file into current directory\n";
      warn "${subname}__E>    Source File: $prfnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # --- Copy the template sequence database over
   my ($xprflist, $xprfpssmdb) = CopyXPRFDB()
      or die "${subname}__E> Failed copying essential database\n";

   # --- Create the options matrix for HITS_PP
   my $hitfile = "${seqid}-prfprf-${hitcode}.hits";
   my $alidir  = "${seqid}-prfprf-${hitcode}-ali";
   my $alibase = "${seqid}-prfprf-${hitcode}";
   my $sumfile = "${seqid}-prfprf-${hitcode}.sum";
   my $mode;
   if ($hitcode eq '0010') {
     $mode = "MODELLER Prf-Prf";
   } elsif ($hitcode eq '0020') {
     $mode = "PSI-BLAST Prf-Prf";
   } elsif ($hitcode eq '0001') {
     $mode = "Seq-Prf";
   } elsif ($hitcode eq '0002') {
     $mode = "Max PSSM Consensus Seq-Prf";
   } elsif ($hitcode eq '0004') {
     $mode = "Max Freq Consensus Seq-Prf";
   }
   $mode .= " ($hitcode)";
   my %hits_pp = (
                 "--profile_list"             => $xprflist,
                 "--profile_pssmdb"           => $xprfpssmdb,
                 "--target_profile"           => $prflocal,
                 "--set_score_statistics"     => $scrstat,
                 "--e_value"                  => $evaluehits,
                 "--target_code"              => $seqid,
                 "--summary_file"             => $sumfile,
                 "--alignment_basename"       => $alibase,
                 "--output_alidir"            => $alidir,
                 "--hitfile_name"             => $hitfile,
                 "--custom_tag"               => $mode,
                 "--score_type"               => $scoretype,
                 "--ccmatrix_offset"          => $ccmatoff,
                 "--gap_open_cost"            => $gapo,
                 "--gap_extend_cost"          => $gape,
                 "--pdb_repository"           => $init::pdbrep,
                 );

   # Add special options
   # PSI-Blast profile to be read in PIR format
   # Single sequence to be read in FASTA format
   $hits_pp{"--profile_format"} = 'PIR' if ($prftype eq 'PB');
   $hits_pp{"--profile_format"} = 'FASTA' if ($prftype eq 'SS');

   # -- Special for benchmarking
   if ( defined($natpdb) ){
      $hits_pp{"--native_pdb"}  = $natpdb;
      $hits_pp{"--native_chn"}  = $natchn;
      $hits_pp{"--native_type"} = 'sequence';
   }

   # --- Call HitsPrfPrf.pl for calculating the profile/profile alignments
   my @command = (GetModPipeScript("src/HitsPrfPrf.pl"));
   push @command, @{[ %hits_pp ]};

   # --- Run command
   my $result = system(@command);

   # --- Check output
   if ( $result ){
      warn "${subname}__E> Failed to calculate hits ($hitcode): $seqid\n";
      warn "${subname}__E> " . join(" ", @command) . "\n";
      return;
   }

   # --- Count hits in hitfile
   my @hitlines = ();
   if ( -e $hitfile ){
      @hitlines = CountHits( $hitfile );
      my $hitcnt = scalar(@hitlines);
      warn "${subname}__M> Number of hits to process ($hitcode): $hitcnt\n";
   } else {
        # --- Stop (legit) if there is nothing to process
        #     No hits against template database
        warn "${subname}__W> Found no hit file ($hitcode): $hitfile\n";
        warn "${subname}__W>    Possibly because no hits were found\n";
        return 1 ;
     }

   # --- Convert hits to MP alignments
   (my $align_count,$outhits)= Hits2Ali($seqid, \@hitlines, $mode, $outhits);
   if (! defined($align_count)) {
      warn "${subname}__E> Could not convert hits ($hitcode) to alignments\n";
      return;
   }

   # --- Return normalcy
   return $outhits;
}



sub GetHits002 {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 6;
   my $mode = "PSI-BLAST Prf-Prf (0020)";

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $natpdb, $natchn, $outhits, $evaluehits, $scrstat) = @_;

   # --- Get the current directory
   my $currdir = cwd();

   # --- Check if profile exists
   my $seqdir = SeqDirMP( $seqid );
   my $prfnam = PsiBlastPrfFileMP( $seqid );
   unless ( -e $prfnam ){
      warn "${subname}__E> Profile file not found: $prfnam\n";
      return;
   }

   # --- Copy the profile file
   my $prflocal = basename( $prfnam );
   unless ( CopyFile($prfnam, $prflocal) ){
      warn "${subname}__E> Could not copy sequence file into current directory\n";
      warn "${subname}__E>    Source File: $prfnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # --- Copy the template sequence database over
   my ($xprflist, $xprfpssmdb) = CopyXPRFDB()
      or die "${subname}__E> Failed copying essential database\n";

   # --- Create the options matrix for HITS_PP
   my $hitfile = "${seqid}-prfprf-020.hits";
   my $alidir  = "${seqid}-prfprf-020-ali";
   my $alibase = "${seqid}-prfprf-020";
   my $sumfile = "${seqid}-prfprf-020.sum";
   my %hits_pp = (
                 "--profile_list"             => $xprflist,
                 "--profile_pssmdb"           => $xprfpssmdb,
                 "--target_profile"           => $prflocal,
                 "--profile_format"           => 'PIR',
                 "--set_score_statistics"     => $scrstat,
                 "--summary_file"             => $sumfile,
                 "--alignment_basename"       => $alibase,
                 "--output_alidir"            => $alidir,
                 "--e_value"                  => $evaluehits,
                 "--target_code"              => $seqid,
                 "--hitfile_name"             => $hitfile,
                 "--custom_tag"               => $mode,
                 );

   # -- Special for benchmarking
   if ( defined($natpdb) ){
      $hits_pp{"--native_pdb"}  = $natpdb;
      $hits_pp{"--native_chn"}  = $natchn;
      $hits_pp{"--native_type"} = 'sequence';
   }

   # --- Call HitsPrfPrf.pl for calculating the profile/profile alignments
   my @command = (GetModPipeScript("src/HitsPrfPrf.pl"));
   push @command, @{[ %hits_pp ]};

   # --- Run command
   my $result = system(@command);

   # --- Check output
   if ( $result ){
      warn "${subname}__E> Failed to calculate profile/profile hits: $seqid\n";
      warn "${subname}__E> " . join(" ", @command) . "\n";
      return;
   }

   # --- Count hits in hitfile
   my @hitlines = ();
   if ( -e $hitfile ){
      @hitlines = CountHits( $hitfile );
      my $hitcnt = scalar(@hitlines);
      warn "${subname}__M> Number of Prf-Prf hits to process: $hitcnt\n";
   } else {
        # --- Stop (legit) if there is nothing to process
        #     No hits against template database
        warn "${subname}__W> Found no hit file: $hitfile\n";
        warn "${subname}__W>    Possibly because no hits were found\n";
        return 1 ;
     }

   # --- Convert hits to MP alignments
   (my $align_count,$outhits)= Hits2Ali($seqid, \@hitlines, $mode, $outhits);
   if (! defined($align_count)) {
      warn "${subname}__E> Could not convert hits to alignments\n";
      return;
   }

   # --- Return normalcy
   return $outhits;
}


# Take generated models from Ali2Mod, and add additional scores
# (MPQS and rating)
sub RateModels {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 2;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my $seqid  = $_[0];
   my $tsvmod_flag=$_[1];

   # --- Get the current directory
   my $currdir = cwd();

   # --- Check if mod file exists
   my $seqdir = SeqDirMP( $seqid );
   my $modnam = ModFileMP( $seqid );
   unless ( -e $modnam ){
      warn "${subname}__E> Model data file not found: $modnam\n";
      return;
   }

   # --- Copy the hit file
   my $modlocal = basename( $modnam );
   unless ( CopyFile($modnam, $modlocal) ){
      warn "${subname}__E> Could not copy model data file into current directory\n";
      warn "${subname}__E>    Source File: $modnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # Read old models file
   my $fhmod = OpenFile( $modlocal );
   my $models = ReadModelsFile($fhmod);
   close ($fhmod);

   # -- Rate each model
   for my $model (@$models) {
      # -- Get the ratings
      my ($indv_rating, $cumm_rating, $mpqs) = GetModelRating($model,$tsvmod_flag);

      $model->rating($indv_rating);
      $model->score->quality($mpqs);
   }

   # -- Open a new file stream to write out the modifications
   my $modnew = (fileparse($modlocal, '\..*'))[0];
   my $fhmodnew = OpenNewFile( $modnew );
   WriteModelsFile($models, $fhmodnew);
   close ($fhmodnew);

   # -- Return new file name
   return $modnew;
}

sub GetModelRating {
   my ($model) = shift@_;
   my ($tsvmod_flag) = shift@_;

   my $tgt_length = $model->sequence->length;
   my $tgt_start = $model->region->[0];
   my $tgt_stop = $model->region->[1];
   my $chi2low = $model->alignment->score_chi_squared;
   my $ksstat = $model->alignment->score_ks;
   my $evalue = $model->alignment->evalue;
   my $perc_gaps = $model->alignment->gap_percentage;
   my $gascore = $model->score->ga341->total;
   my $tsvmod_no35;
   if ($tsvmod_flag ne "OFF") {
       $tsvmod_no35 = $model->score->tsvmod->predicted_no35;
   }
   my $compactness = $model->score->ga341->compactness;
   my $dopezscore = $model->score->normalized_dope;
   my $hseqid = $model->highest_sequence_identity;

   # -- Initialize rating string
   my $indv_rating = "000000000";
   my $cumm_rating = 0;
   my $mpqs = -999.99;

   # -- Now rate the alignment

   # -- Coverage
   if ( 100*($tgt_stop - $tgt_start + 1)/$tgt_length >= 80 ){
      $cumm_rating++;
      substr($indv_rating, 0, 1) = 1;
   }

   # -- Chi2
   if ( $chi2low <= 0.2 ){
      $cumm_rating++;
      substr($indv_rating, 1, 1) = 1;
   }

   # -- Evalue
   if ( $evalue <= 0.0001 ){
      $cumm_rating++;
      substr($indv_rating, 2, 1) = 1;
   }

   # -- Gap ratio
   if ( $perc_gaps <= 20 ){
      $cumm_rating++;
      substr($indv_rating, 3, 1) = 1;
   }

   # -- Sequence Identity
   if ( $hseqid >= 40 ){
      $cumm_rating++;
      substr($indv_rating, 4, 1) = 1;
   }

   # -- GA341
   if ( $gascore >= 0.7 ){
      $cumm_rating++;
      substr($indv_rating, 5, 1) = 1;
   }

   # -- Compactness
   if ( $compactness <= 1.0 ){
      $cumm_rating++;
      substr($indv_rating, 6, 1) = 1;
   }

   # -- DOPE Z-Score
   if ( $dopezscore <= -2.0 ){
      $cumm_rating++;
      substr($indv_rating, 7, 1) = 1;
   }


   # -- Get the ModPipe Quality score
   $mpqs = (($tgt_stop - $tgt_start + 1)/$tgt_length) +
           ($hseqid/100) +
           ($gascore/10) -
           ($perc_gaps/100) -
           $ksstat -
           ($dopezscore/10);

   # -- TSVMod
   if ($tsvmod_flag ne "OFF") {
       if ( $tsvmod_no35 >= 0.4 ){
          $cumm_rating++;
          substr($indv_rating, 8, 1) = 1;
       }
   }

   # -- Return
   return $indv_rating, $cumm_rating, $mpqs;
}


sub GetHits020 {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 6;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $natpdb, $natchn, $outhits, $evaluehits,$template_fast) = @_;

   # --- Get the current directory
   my $currdir = cwd();
   my $mode = "PSI-BLAST Prf-Seq (0200)";

   # --- Check if psi-blast generated profile exists
   my $seqdir = SeqDirMP( $seqid );
   my $prfnam = PsiBlastPrfFileMP( $seqid );
   unless ( -s $prfnam ){
      warn "${subname}__E> Profile file not found: $prfnam\n";
      return;
   }

   # --- Copy the profile file
   my $prflocal = basename( $prfnam );
   unless ( CopyFile($prfnam, $prflocal) ){
      warn "${subname}__E> Could not copy sequence file into current directory\n";
      warn "${subname}__E>    Source File: $prfnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # --- Copy the template sequence database over
   my $tmpseqdb = CopyTEMPLATESEQDB()
      or die "${subname}__E> Failed copying essential database\n";

   # --- Create the options matrix for HITS_PS
   my $hitfile = "${seqid}-prfseq-0200.hits";
   my $alibase = "${seqid}-prfseq-0200";
   my $alidir  = "${seqid}-prfseq-0200-ali";
   my %hits_ps = (
                 "--e_value"            => $evaluehits,
                 "--hitfile_name"       => $hitfile,
                 "--profile_format"     => "PIR",
                 "--output_alidir"      => $alidir,
                 "--template_fast"      => $template_fast, 
                 "--alignment_basename" => $alibase,
                 "--custom_tag"         => $mode,
                 "--pdb_repository"     => $init::pdbrep,
                 );

   # -- Special for benchmarking
   if ( defined($natpdb) ){
      $hits_ps{"--native_pdb"}  = $natpdb;
      $hits_ps{"--native_chn"}  = $natchn;
      $hits_ps{"--native_type"} = 'sequence';
   }

   # --- Call HitsPrfSeq.py for calculating the profile
   my @command = (GetModPipeScript("src/HitsPrfSeq.py"));
   push @command, @{[ %hits_ps ]}, $seqid, $prflocal, $tmpseqdb;

   # --- Run command
   my $result = system(@command);

   # --- Check output
   if ( $result ){
      warn "${subname}__E> Failed to calculate profile/sequence hits (mode 0200): $seqid\n";
      warn "${subname}__E> " . join(" ", @command) . "\n";
      return;
   }

   # --- Count hits in hitfile
   my @hitlines = ();
   if ( -e $hitfile ){
      @hitlines = CountHits( $hitfile );
      my $hitcnt = scalar(@hitlines);
      warn "${subname}__M> Number of Prf-Seq hits to process: $hitcnt\n";
   } else {
        # --- Stop (legit) if there is nothing to process
        #     No hits against template database
        warn "${subname}__W> Found no hit file: $hitfile\n";
        warn "${subname}__W>    Possibly because no hits were found\n";
        return 1 ;
     }

   # --- Convert hits to MP alignments
   (my $align_count,$outhits)= Hits2Ali($seqid, \@hitlines, $mode, $outhits);
   if (! defined($align_count)) {
      warn "${subname}__E> Could not convert hits to alignments\n";
      return;
   }

   # --- Return normalcy
   return $outhits;
}


sub GetPsiBlastProfile {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 3;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $evaluehits, $update) = @_;

   # --- Get the current directory
   my $currdir = cwd();

   # --- Check if sequence exists
   my $seqdir = SeqDirMP( $seqid );
   my $seqnam = SeqFileMP( $seqid );
   unless ( -e $seqnam ){
      warn "${subname}__E> Sequence file not found: $seqnam\n";
      return;
   }

   # --- Copy the sequence file
   my $seqlocal = basename( $seqnam );
   unless ( CopyFile($seqnam, $seqlocal) ){
      warn "${subname}__E> Could not copy sequence file into current directory\n";
      warn "${subname}__E>    Source File: $seqnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # --- Check if profile already exists
   my $prfnam   = PsiBlastPrfFileMP( $seqid );
   my $prflocal = basename( $prfnam );

   if ( -s $prfnam && (($init::prfupdate =~ /^OFF$/i) || ($update eq "OFF"))){
      warn "${subname}__M> Found profile for sequence\n";
      warn "${subname}__M>    Seqid  : $seqid\n";
      warn "${subname}__M>    Profile: $prfnam\n";
      warn "${subname}__M> Will use the above profile\n";
      warn "${subname}__M>    PRFUPDATE: $init::prfupdate\n";
      return 1;
   } elsif ( -s $prfnam && $init::prfupdate =~ /^ON$/i ){
      warn "${subname}__M> Found profile for sequence\n";
      warn "${subname}__M>    Seqid  : $seqid\n";
      warn "${subname}__M>    Profile: $prfnam\n";
      warn "${subname}__M> Will recalculate the profile\n";
      warn "${subname}__M>    PRFUPDATE: $init::prfupdate\n";

      # --- Delete the profile from the source
      unlink ( $prfnam ) or
         warn "${subname}__E> Could not delete profile: $prfnam\n";
     }
     elsif ( ! -s $prfnam ){
      warn "${subname}__M> No profile found for sequence\n";
      warn "${subname}__M>    Seqid  : $seqid\n";
      warn "${subname}__M>    Profile: $prfnam\n";
      warn "${subname}__M> Will calculate a new profile\n";
     }

   # --- Once you are here, one way or other, you need to calculate
   #     the profile. Proceed as required.
   warn "${subname}__M> Calculating profile for sequence: $seqid\n";

   # --- Copy the non-redundant sequence database over
   my $ncbiseqdblocal = CopyNCBISEQDB()
      or die "${subname}__E> Failed copying essential database\n";

   # --- Create the options matrix for RunPsiBlast.pl
   my %psiblastopt = ();
   %psiblastopt = (
                "--query_sequence"        => $seqlocal,
                "--blast_database"        => $ncbiseqdblocal,
                "--output_filename"       => $prflocal,
                "--psiblast_e"            => $evaluehits,
                );

   # --- Call RunPsiBlast.pl for calculating the profile
   my @command = (GetModPipeScript("src/RunPsiBlast.pl"));
   push @command, @{[ %psiblastopt ]};

   # --- Run command
   my $result = system(@command);

   # --- Check output
   if ( $result || ! -e $prflocal ){
      warn "${subname}__E> Failed to calculate a profile for sequence: $seqid\n";
      warn "${subname}__E> " . join(" ", @command) . "\n";
      return;
   }

   # --- Copy the profile to repository
   unless ( CopyFile($prflocal, $prfnam) ){
      warn "${subname}__E> Failed copying profile to repository\n";
      warn "${subname}__E>    Source File: $prflocal\n";
      warn "${subname}__E>    Target Dir : $seqdir\n";
      return;
   }

   # --- Return success
   return 1;
}


sub Ali2Mod {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 4;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $outlines, $hit, $models) = @_;

   # --- Get the alignment directory (repository)
   my $moddir = ModDirMP( $seqid );

   # --- Create the destination directory
   MakeDirSys($moddir);

   # --- Start parsing the output lines (output of MakeModel)
   my $modcnt = 0;
   foreach my $out ( @$outlines ){

      # --- Get the model file name
      my ($details, $modfile) = split(/\#/, $out);

      # --- Skip if model is not found
      unless ( -e $modfile ){
         warn "${subname}__E> File not found: $modfile\n";
         next;
      }

      # --- Get the MD5 for the model
      my $modmd5 = ModMD5($modfile);

      # --- Compress the model file
      my $modfilegz;
      my $finalmod = "${moddir}/${modmd5}.pdb.gz";
      unless ( $modfilegz = CompressFile($modfile) ){
         warn "${subname}__E> Failed compressing model file: $modfile\n";
         warn "${subname}__E>      ... will not copy model\n";
         next;
      }

      # --- Copy the model to the final model dir
      unless ( CopyFile( $modfilegz, $finalmod ) ){
         warn "${subname}__E> Could not copy final model:\n";
         warn "${subname}__E>    Source: $modfile\n";
         warn "${subname}__E>    Target: $finalmod\n";
         next;
      }

      # --- Combine model details with hit to create model metadata
      $details =~ s/^.*?\|//;
      my @flds = split(/\|/, $details);
      my $ga341 = GA341->new(total => $flds[5], compactness => $flds[6],
                             distance => $flds[7], surface_area => $flds[8],
                             combined => $flds[9], z_distance => $flds[10],
                             z_surface_area => $flds[11],
                             z_combined => $flds[12]);
      my $score = Score->new(objfunc => $flds[2], dope => $flds[3],
                             dope_hr => $flds[4], ga341 => $ga341,
                             normalized_dope => $flds[13]);
                    
      my $model = Model->new(sequence => $hit->sequence,
                             alignment => $hit->alignment, 
                             region => $hit->region,
                             fold_assignment_method =>
                                       $hit->fold_assignment_method,
                             highest_sequence_identity =>
                                       $hit->highest_sequence_identity,
                             id => $modmd5, templates => $hit->templates,
                             hetatms => $flds[0], waters => $flds[1],
                             score => $score);
      push @$models, $model;
      $modcnt++;
   }
   return $modcnt;
}



sub GetModels {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 2;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $fh_mod) = @_;

   # --- Get the current directory
   my $currdir = cwd();

   # --- Check if sel file exists
   my $seqdir = SeqDirMP( $seqid );
   my $selnam = SelFileMP( $seqid );
   unless ( -e $selnam ){
      warn "${subname}__E> Sel file not found: $selnam\n";
      return;
   }

   # --- Copy the sel file
   my $sellocal = basename( $selnam );
   unless ( CopyFile($selnam, $sellocal) ){
      warn "${subname}__E> Could not copy sel file into current directory\n";
      warn "${subname}__E>    Source File: $selnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # --- Count hits in selfile
   my $fh_sel = OpenFile($sellocal);
   my $hits = ReadHitsFile($fh_sel);
   close ($fh_sel);
   my $hitcnt = scalar(@$hits);
   warn "${subname}__M> Number of alignments to process: $hitcnt\n";

   my @models;
   # --- Start processing alignments
   foreach my $hit (@$hits) {

      # --- Parse the alignment id
      my $aliid = $hit->alignment->id;

      # --- Copy the alignment file
      my $alilocal = undef;
      unless ( $alilocal = CopyAlignmentMP( $seqid, $aliid ) ){
         warn "${subname}__E> Failed copying alignment: $aliid\n";
         next;
      }

      # --- Create the options matrix for MakeModels.pl
      my $outfile = "${aliid}-models.out";
      my $outmdir = "${aliid}-models";
      my %mmodels = (
                 "--alignment_file"   => $alilocal,
                 "--number_of_models" => $init::nummodels,
                 "--target_code"      => $seqid,
                 "--outfile_name"     => $outfile,
                 "--select_model_by"  => $init::selmodby,
                 "--return"           => $init::retmodels,
                 "--include_hetatm"   => $init::hetatoms,
                 "--include_waters"   => $init::waters,
                 "--output_moddir"    => "${aliid}-models",
                 "--model_basename"   => "${aliid}-model",
                 "--pdb_repository"   => $init::pdbrep,
                    );

      # --- Call MakeModels.pl for calculating the models
      my @command = (GetModPipeScript("src/MakeModels.pl"));
      push @command, @{[ %mmodels ]};

      # --- Run command
      my $result = system(@command);

      # --- Check output
      if ( $result || ! -e $outfile ){
         warn "${subname}__E> Failed to calculate models: $aliid\n";
         warn "${subname}__E> " . join(" ", @command) . "\n";
         next;
      }

      # --- Count hits in hitfile
      my @outlines = CountHits( $outfile );
      my $outcnt = scalar(@outlines);
      warn "${subname}__M> Number of models calculated: $outcnt\n";

      # --- Quit if there is nothing to process
      next if ( $outcnt < 1 );

      # --- Convert hits to MP alignments
      if (! defined(Ali2Mod($seqid, \@outlines, $hit, \@models))) {
         warn "${subname}__E> Could not convert alignments to models\n";
         next;
      }
   }
   WriteModelsFile(\@models, $fh_mod);

   # --- Return normalcy
   return 1;
}

sub GetHits001 {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 5;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $natpdb, $natchn, $outhits, $evaluehits, $scrstat) = @_;

   # --- Get the current directory
   my $currdir = cwd();
   my $mode = "MODELLER Prf-Prf (0010)";

   # --- Check if profile exists
   my $seqdir = SeqDirMP( $seqid );
   my $prfnam = PrfFileMP( $seqid );
   unless ( -e $prfnam ){
      warn "${subname}__E> Profile file not found: $prfnam\n";
      return;
   }

   # --- Copy the profile file
   my $prflocal = basename( $prfnam );
   unless ( CopyFile($prfnam, $prflocal) ){
      warn "${subname}__E> Could not copy sequence file into current directory\n";
      warn "${subname}__E>    Source File: $prfnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # --- Copy the template sequence database over
   my ($xprflist, $xprfpssmdb) = CopyXPRFDB()
      or die "${subname}__E> Failed copying essential database\n";

   # --- Create the options matrix for HITS_PP
   my $hitfile = "${seqid}-prfprf.hits";
   my %hits_pp = (
                 "--profile_list"             => $xprflist,
                 "--profile_pssmdb"           => $xprfpssmdb,
                 "--target_profile"           => $prflocal,
                 "--set_score_statistics"     => $scrstat,
                 "--e_value"                  => $evaluehits,
                 "--target_code"              => $seqid,
                 "--hitfile_name"             => $hitfile,
                 "--custom_tag"               => $mode,
                 );

   # -- Special for benchmarking
   if ( defined($natpdb) ){
      $hits_pp{"--native_pdb"}  = $natpdb;
      $hits_pp{"--native_chn"}  = $natchn;
      $hits_pp{"--native_type"} = 'sequence';
   }

   # --- Call HitsPrfPrf.pl for calculating the profile/profile alignments
   my @command = (GetModPipeScript("src/HitsPrfPrf.pl"));
   push @command, @{[ %hits_pp ]};

   # --- Run command
   my $result = system(@command);

   # --- Check output
   if ( $result ){
      warn "${subname}__E> Failed to calculate profile/profile hits: $seqid\n";
      warn "${subname}__E> " . join(" ", @command) . "\n";
      return;
   }

   # --- Count hits in hitfile
   my @hitlines = ();
   if ( -e $hitfile ){
      @hitlines = CountHits( $hitfile );
      my $hitcnt = scalar(@hitlines);
      warn "${subname}__M> Number of Prf-Prf hits to process: $hitcnt\n";
   } else {
        # --- Stop (legit) if there is nothing to process
        #     No hits against template database
        warn "${subname}__W> Found no hit file: $hitfile\n";
        warn "${subname}__W>    Possibly because no hits were found\n";
        return 1 ;
     }

   # --- Convert hits to MP alignments
   (my $align_count,$outhits)= Hits2Ali($seqid, \@hitlines, $mode, $outhits);
   if (! defined($align_count)) {
      warn "${subname}__E> Could not convert hits to alignments\n";
      return;
   }

   # --- Return normalcy
   return $outhits;
}



sub GetHits010 {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 6;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $natpdb, $natchn, $outhits, $evaluehits, $template_fast) = @_;

   # --- Get the current directory
   my $currdir = cwd();
   my $mode = "MODELLER Prf-Seq (0100)";

   # --- Check if profile exists
   my $seqdir = SeqDirMP( $seqid );
   my $prfnam = PrfFileMP( $seqid );
   unless ( -e $prfnam ){
      warn "${subname}__E> Profile file not found: $prfnam\n";
      return;
   }

   # --- Copy the profile file
   my $prflocal = basename( $prfnam );
   unless ( CopyFile($prfnam, $prflocal) ){
      warn "${subname}__E> Could not copy sequence file into current directory\n";
      warn "${subname}__E>    Source File: $prfnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # --- Copy the template sequence database over
   my $tmpseqdb = CopyTEMPLATESEQDB()
      or die "${subname}__E> Failed copying essential database\n";

   # --- Create the options matrix for HITS_PS
   my $hitfile = "${seqid}-prfseq.hits";
   my %hits_ps = (
                 "--e_value"          => $evaluehits, 
                 "--hitfile_name"     => $hitfile, 
                 "--template_fast"    => $template_fast, 
                 "--custom_tag"       => $mode, 
                 "--pdb_repository"   => $init::pdbrep, 
                 );

   # -- Special for benchmarking
   if ( defined($natpdb) ){
      $hits_ps{"--native_pdb"}  = $natpdb;
      $hits_ps{"--native_chn"}  = $natchn;
      $hits_ps{"--native_type"} = 'sequence';
   }

   # --- Call HitsPrfSeq.py for calculating the profile
   my @command = (GetModPipeScript("src/HitsPrfSeq.py"));
   push @command, @{[ %hits_ps ]}, $seqid, $prflocal, $tmpseqdb;

   # --- Run command
   my $result = system(@command);

   # --- Check output
   if ( $result ){
      warn "${subname}__E> Failed to calculate profile/sequence hits: $seqid\n";
      warn "${subname}__E> " . join(" ", @command) . "\n";
      return;
   }

   # --- Count hits in hitfile
   my @hitlines = ();
   if ( -e $hitfile ){
      @hitlines = CountHits( $hitfile );
      my $hitcnt = scalar(@hitlines);
      warn "${subname}__M> Number of Prf-Seq hits to process: $hitcnt\n";
   } else {
        # --- Stop (legit) if there is nothing to process
        #     No hits against template database
        warn "${subname}__W> Found no hit file: $hitfile\n";
        warn "${subname}__W>    Possibly because no hits were found\n";
        return 1 ;
     }

   # --- Convert hits to MP alignments
   (my $align_count,$outhits)= Hits2Ali($seqid, \@hitlines, $mode, $outhits);
   if (! defined($align_count)) {
      warn "${subname}__E> Could not convert hits to alignments\n";
      return;
   }

   # --- Return normalcy
   return $outhits;
}



sub Hits2Ali{

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 4;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $hitlines, $mode, $outhits) = @_;

   # --- Get the alignment directory (repository)
   my $alidir = AliDirMP( $seqid );

   # --- Create the destination directory
   MakeDirSys($alidir);

   # --- Open the hitfile
   my $alicnt = 0;
   foreach my $hit ( @$hitlines ){

      # --- Get the ali file name
      my ($details, $alifile) = split(/\#/, $hit);

      # --- Skip if alignment is not found
      unless ( -e $alifile ){
         warn "${subname}__E> File not found: $alifile\n";
         next;
      } 

      # --- Skip if alignment does not pass length cutoff
      my ($seqlen, $alibeg, $aliend) = (split(/\|/, $details))[1,2,3];
      my $alilen = $aliend - $alibeg + 1;
      if ( $alilen < $init::minalnlen ){
         warn "${subname}__W> Alignment in $alifile is too short\n";
         warn "${subname}__W>   Length Cutoff    = $init::minalnlen\n";
         warn "${subname}__W>   Alignment Length = $alilen\n";
         next;
      }

      # --- Get the MD5 for the alignment
      my $alimd5 = AliMD5($alifile);

      # --- Copy the alignment to the final ali dir
      my $finalali = "${alidir}/${alimd5}.ali";
      unless ( CopyFile( $alifile, $finalali ) ){
         warn "${subname}__E> Could not copy final alignment:\n";
         warn "${subname}__E>    Source: $alifile\n";
         warn "${subname}__E>    Target: $finalali\n";
         next;
      } 

      # --- Reformat the hit line
      my @flds = split(/\|/, $details);
      my $seq = Sequence->new(id => $seqid, length => $flds[1]);
      my $aln = Alignment->new(score_chi_squared => $flds[5],
                               score_ks => $flds[6], evalue => $flds[7],
                               gap_percentage => $flds[8], id => $alimd5);
      my @tmpl;
      my @templates = split(/;/, $flds[11]);
      pop @templates; # discard everything after final semicolon
      for my $template (@templates) {
         my @details = split(' ', $template);
         my ($code, $chain);
         if (length($details[0]) > 4) {
           $code = substr($details[0], 0, 4);
           $chain = substr($details[0], 4, 1);
         } else {
           $code = $details[0];
           $chain = '';
         }
         push @tmpl, Template->new(code => $code, chain => $chain, 
                                   region =>[parse_residue_number($details[1]),
                                             parse_residue_number($details[2])],
                                   sequence_identity => $details[3]);
      }
      my $outhit = Hit->new(sequence => $seq, alignment => $aln,
                            region => [parse_residue_number($flds[2]),
                                       parse_residue_number($flds[3])],
                            templates => \@tmpl,
                            fold_assignment_method => $mode,
                            highest_sequence_identity => $flds[10]);
      push @$outhits, $outhit;

      # --- Increment ali count
      $alicnt++;
   }

   # --- Return ali count
   return ($alicnt,$outhits);
}

sub GetHHSuiteProfile {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 7;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }
   # Input parameter $profile determines whether to execute Prf-Prf or Seq-Prf 
   # with hhsearch. if "sequence", then "Seq-Prf", if not, it's the profile name

   #TODO: check prflocal - what is it here, why does it get redefined later in the script?

   my ($hhsuite, $seqid, $prflocal, $evaluehits, $type, $profile, $update) = @_;

   my $currdir = cwd();

   my $seqdir = SeqDirMP( $seqid );
   my $input_local = $prflocal;

   # Check the mode:
   if ($profile eq "SP") {
      # --- Check if sequence exists
      my $seqnam = SeqFileMP( $seqid );
      unless ( -e $seqnam ){
         warn "${subname}__E> Sequence file not found: $seqnam\n";
         return;
      }
      # --- Copy the sequence file
      $input_local = basename($seqnam);
      unless ( CopyFile($seqnam, $input_local) ){
         warn "${subname}__E> Could not copy sequence file into current directory\n";
         warn "${subname}__E>    Source File: $seqnam\n";
         warn "${subname}__E>    Target Dir : $currdir\n";
         return;
      }
   } elsif ($profile eq "PP") {
      unless ( -s $input_local) {
         warn "${subname}__E> Could not find local profile in current directory\n";
         warn "${subname}__E>    File: $input_local\n";
         warn "${subname}__E>    TODO bug when profile exists from call: $hhsuite, $seqid, $prflocal, $evaluehits, $type, $profile, $update \n";
         warn "${subname}__E>    Dir : $currdir\n";
         return;
      }
   }
   
  
   #  define HHSuite profile names
   my ($database_file, $prfnam, $hhrnam) = HHSuitePrfFileMP( $hhsuite, $seqid, $profile, $type );
   $prflocal = basename( $prfnam );
   my $hhrlocal = basename( $hhrnam );
   
   if ( -s $prfnam && -s $hhrnam && (($init::prfupdate =~ /^OFF$/i) || ($update eq "OFF")))  {
      warn "${subname}__M> Found profile for sequence\n";
      warn "${subname}__M>    Seqid  : $seqid\n";
      warn "${subname}__M>    Profile: $prfnam\n";
      warn "${subname}__M> Will use the above profile\n";
      warn "${subname}__M>    PRFUPDATE: $init::prfupdate\n";
      return ($prfnam, $hhrnam);
   } elsif (( -s $prfnam || -s $hhrnam)  && $init::prfupdate =~ /^ON$/i ){
      warn "${subname}__M> Found profile for sequence\n";
      warn "${subname}__M>    Seqid  : $seqid\n";
      warn "${subname}__M>    Profile: $prfnam\n";
      warn "${subname}__M> Will recalculate the profile\n";
      warn "${subname}__M>    PRFUPDATE: $init::prfupdate\n";

      # --- Delete the profile from the source
     unlink ( $prfnam ) or
         warn "${subname}__E> Could not delete profile: $prfnam\n";
     unlink ( $hhrnam ) or
         warn "${subname}__E> Could not delete profile: $hhrnam\n";
   } elsif ( ! -s $prfnam  || ! -s $hhrnam ){
      warn "${subname}__M> No profile found for sequence\n";
      warn "${subname}__M>    Seqid  : $seqid\n";
      warn "${subname}__M>    Profile: $prfnam\n";
      warn "${subname}__M> Will calculate a new profile\n";
   }

   # --- Once you are here, one way or other, you need to calculate
   #     the profile. Proceed as required.
   warn "${subname}__M> Calculating profile for sequence: $seqid\n";

   # --- Setup HHSearch options

   my %hhsuiteopt = ();
   if (($hhsuite eq "hhblits") || ($hhsuite eq "hhsearch")) {
      %hhsuiteopt = (
           "--hhsuite"    => $hhsuite,
           "--input"      => $input_local,
           "--database"   => $database_file,
           "--outa3m"     => $prflocal,
           "--outfile"    => $hhrlocal,
           "--evalue"     => $evaluehits,              # e-value
           "--iterations" => "2",                  #number of iterations
           "--cpu"        => "1",                  #number of iterations
     );
   } else {
      warn "${subname}__M> Invalid HH Program: ${hhsuite} \n";
      return;
   }


   my @command = (GetModPipeScript("src/RunHHSuite.pl"));
   push @command, @{[ %hhsuiteopt ]};

   my $result = system(@command);

   if ( $result || !-e $prflocal ) {
      warn "${subname}__E> Failed to calculate a profile for sequence: $seqid\n";
      warn "${subname}__E> " . join(" ", @command) . "\n";
      return;
   }
   
   # --- Copy the profile to repository
   unless ( CopyFile($prflocal, $prfnam) ){
      warn "${subname}__E> Failed copying profile to repository\n";
      warn "${subname}__E>    Source File: $prflocal\n";
      warn "${subname}__E>    Target Dir : $seqdir\n";
      return;
   }

   # --- Copy the hhr file to repository
   unless ( CopyFile($hhrlocal, $hhrnam) ){
      warn "${subname}__E> Failed copying hhrfile to repository\n";
      warn "${subname}__E>    Source File: $hhrlocal\n";
      warn "${subname}__E>    Target Dir : $seqdir\n";
      return;
   }
   # --- Return success
   return ($prflocal, $hhrlocal);
}

sub GetHitsHHBlits {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 5;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
   }
   my ($seqid, $outhits, $evaluehits, $type, $update) = @_;
   my ($prflocal, $hhrlocal);

   if ($type eq "PP") {
 
      # Run sequence against uniprot20
      ($prflocal, $hhrlocal) = GetHHSuiteProfile ('hhblits', $seqid, $seqid, $evaluehits, 
                                                  'uniprot20', 'SP', $update);
   } elsif ($type eq "SP") {
      $prflocal = "${seqid}.fsa";
   }
   # run resulting profile against pdb70
   ($prflocal, $hhrlocal) = GetHHSuiteProfile ('hhblits', $seqid, $prflocal, $evaluehits, 
                                                  'pdb70', 'PP', $update);
   # format alignment files for modeller
   my $hhmakemodel;
   ($outhits) = RunHHMakeModel ($seqid, $evaluehits, $hhrlocal, "HHBlits${type}", $init::pdbrep);

   return ($outhits);

}

sub GetHitsHHSearch {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 5;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
   }
   my ($seqid, $outhits, $evaluehits, $type, $update) = @_;
   my ($prflocal, $hhrlocal);

   if ($type eq "PP") {
 
      # Run sequence against uniprot20
      ($prflocal, $hhrlocal) = GetHHSuiteProfile ('hhblits', $seqid, $seqid, $evaluehits, 
                                                  'uniprot20', 'SP', $update);
   } elsif ($type eq "SP") {
      $prflocal = "${seqid}.fsa";
   }
   # run resulting profile against pdb70
   ($prflocal, $hhrlocal) = GetHHSuiteProfile ('hhsearch', $seqid, $prflocal, $evaluehits, 
                                                  'pdb70', 'PP', $update);
   # format alignment files for modeller
   my $hhmakemodel;
   ($outhits) = RunHHMakeModel ($seqid, $evaluehits, $hhrlocal, "HHSearch${type}", $init::pdbrep);

   return ($outhits);

}

sub GetHits100 {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 6;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ($seqid, $natpdb, $natchn, $outhits, $evaluehits, $template_fast) = @_;

   # --- Get the current directory
   my $currdir = cwd();
   my $mode = "Seq-Seq (1000)";

   # --- Check if sequence exists
   my $seqdir = SeqDirMP( $seqid );
   my $seqnam = SeqFileMP( $seqid );
   unless ( -e $seqnam ){
      warn "${subname}__E> Sequence file not found: $seqnam\n";
      return;
   }

   # --- Copy the sequence file
   my $seqlocal = basename( $seqnam );
   unless ( CopyFile($seqnam, $seqlocal) ){
      warn "${subname}__E> Could not copy sequence file into current directory\n";
      warn "${subname}__E>    Source File: $seqnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # --- Copy the template sequence database over
   my $tmpseqdb = CopyTEMPLATESEQDB()
      or die "${subname}__E> Failed copying essential database\n";

   # --- Create the options matrix for HITS_SS
   my $hitfile = "${seqid}-seqseq.hits";
   my %hits_ss = (
                 "--e_value"              => $evaluehits,
                 "--hitfile_name"         => $hitfile,
                 "--template_fast"        => $template_fast, 
                 "--pdb_repository"       => $init::pdbrep,
                 "--custom_tag"           => $mode,
                 );

   # -- Special for benchmarking
   if ( defined($natpdb) ){
      $hits_ss{"--native_pdb"}  = $natpdb;
      $hits_ss{"--native_chn"}  = $natchn;
      $hits_ss{"--native_type"} = 'sequence';
   }

   # --- Call HitsSeqSeq.py for calculating the sequence-sequence scoring.
   my @command = (GetModPipeScript("src/HitsSeqSeq.py"));
   push @command, @{[ %hits_ss ]}, $seqid, $seqlocal, $tmpseqdb;

   # --- Run command
   my $result = system(@command);

   # --- Check output
   if ( $result ){
      warn "${subname}__E> Failed to calculate sequence/sequence hits: $seqid\n";
      warn "${subname}__E> " . join(" ", @command) . "\n";
      return;
   }

   # --- Count hits in hitfile
   my @hitlines = ();
   if ( -e $hitfile ){
      @hitlines = CountHits( $hitfile );
      my $hitcnt = scalar(@hitlines);
      warn "${subname}__M> Number of Seq-Seq hits to process: $hitcnt\n";
   } else {
        # --- Stop (legit) if there is nothing to process
        #     No hits against template database
        warn "${subname}__W> Found no hit file: $hitfile\n";
        warn "${subname}__W>    Possibly because no hits were found\n";
        return 1 ;
     }

   # --- Convert hits to MP alignments
   (my $align_count,$outhits)= Hits2Ali($seqid, \@hitlines, $mode, $outhits);
   if (! defined($align_count)) {
      warn "${subname}__E> Could not convert hits to alignments\n";
      return;
   }

   # --- Return normalcy
   return $outhits;
}


sub GetProfile {

   # Runs src/BuildProfile.py
   # Argument:
   #   Either $seqid    - MD5 hash sequence ID (required)
   #   or hash reference:
   #       'seqid'     => MD5 hash sequence ID (required)
   #       'seqformat' => sequence format      (Default: 'FASTA') 
   #       'niter'     => iterations           (Default: 5)
   #       'seqdb'     => sequence database    (Default: UniProt90 or other
   #                                                     per conf file)
   #       'prfupdate' => whether to update    (Default: 'ON' or 'OFF' per
   #                                                     conf file)
   #        optional overwrite of seqdb from conf file. 

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 1;

   unless ( scalar(@_) == $nargs ){
      print "${subname}__D> Incorrect number of arguments: ".scalar(@_)
           ." instead of ${nargs}\n";
      return;
   }

   my ($seqid, $evaluehits);

   # Defaults
   my $seqformat = 'FASTA';
   my $niter     = $init::niter||5;
   my $seqdb     = $init::nrseqdb;
   my $nrdbtag   = $init::nrdbtag;
   my $prfupdate = $init::prfupdate;

   # See if hash
   if ( ref( $_[0] ) eq 'HASH' ) {
      my $argref = $_[0];

      $seqid = $argref->{'seqid'};

      if ( $argref->{'seqformat'} ) {
         $seqformat = $argref->{'seqformat'};
      }

      if ( $argref->{'niter'} ) {
         $niter = $argref->{'niter'};
      }

      if ( $argref->{'nrdbtag'} ) {
         $nrdbtag = $argref->{'nrdbtag'};
      }

      if ( $argref->{'prfupdate'} ) {
         $prfupdate = $argref->{'prfupdate'};
      }
      if ( $argref->{'evalue_hits'} ) {
         $evaluehits = $argref->{'evalue_hits'};
      }
   } 
   if (!$seqid) {
      warn "${subname}__E> seqid missing\n";
      return;
   }


   # --- Get the current directory
   my $currdir = cwd();

   # --- Check if sequence exists
   my $seqdir = SeqDirMP( $seqid );
   my $seqnam = SeqFileMP( $seqid );
   unless ( -e $seqnam ){
      warn "${subname}__E> Sequence file not found: $seqnam\n";
      return;
   }

   # --- Copy the sequence file
   my $seqlocal = basename( $seqnam );
   unless ( CopyFile($seqnam, $seqlocal) ){
      warn "${subname}__E> Could not copy sequence file into current directory\n";
      warn "${subname}__E>    Source File: $seqnam\n";
      warn "${subname}__E>    Target Dir : $currdir\n";
      return;
   }

   # --- Check if profile already exists.  Replace conf NRDBTAG with given tag
   my $prfnam   = PrfFileMP( $seqid, $nrdbtag );
   my $prflocal = basename( $prfnam );

   my $inpfile = $seqlocal;

   if ( -e $prfnam && $prfupdate =~ /^OFF$/i ){
      warn "${subname}__M> Found profile for sequence\n";
      warn "${subname}__M>    Seqid  : $seqid\n";
      warn "${subname}__M>    Profile: $prfnam\n";
      warn "${subname}__M> Will use the above profile\n";
      warn "${subname}__M>    PRFUPDATE: $prfupdate\n";
      return 1;
   } elsif ( -e $prfnam && $prfupdate =~ /^ON$/i ){
      warn "${subname}__M> Found profile for sequence\n";
      warn "${subname}__M>    Seqid  : $seqid\n";
      warn "${subname}__M>    Profile: $prfnam\n";
      warn "${subname}__M> Will recalculate the profile\n";
      warn "${subname}__M>    PRFUPDATE: $prfupdate\n";

      # --- If input format is a profile rather than a sequence, set to use
      #     the profile.  Otherwise, delete the profile from the source.
      if ( $seqformat eq 'PROFILE' ) {
         $inpfile = $prfnam;
      } else {
         unlink ( $prfnam ) or
            warn "${subname}__E> Could not delete profile: $prfnam\n";
      }
   } elsif ( ! -e $prfnam ){
      warn "${subname}__M> No profile found for sequence\n";
      warn "${subname}__M>    Seqid  : $seqid\n";
      warn "${subname}__M>    Profile: $prfnam\n";
      warn "${subname}__M> Will calculate a new profile\n";
   }

   # --- Once you are here, one way or other, you need to calculate
   #     the profile. Proceed as required.
   warn "${subname}__M> Calculating profile for sequence: $seqid\n";

   # --- Copy the specified sequence database over
   my $seqdblocal = CopySEQDB( $seqdb )
      or die "${subname}__E> Failed copying sequence database $seqdb\n";


   # --- Create the options matrix for BuildProfile.py
   my %buildpopt = ();
   %buildpopt = (
                "-F"                        => $seqformat,
                "-i"                        => $niter,
                "-o"                        => $prflocal,
                "--evalue_threshold"        => $evaluehits,
                );

   # --- Call BuildProfile.py for calculating the profile
   my @command = (GetModPipeScript("src/BuildProfile.py"));
   push @command, @{[ %buildpopt ]}, $inpfile, $seqdblocal;

   # --- Run command
   my $result = system(@command);

   # --- Check output
   if ( $result || ! -e $prflocal ){
      warn "${subname}__E> Failed to calculate a profile for sequence: $seqid\n";
      warn "${subname}__E> " . join(" ", @command) . "\n";
      warn "${subname}__E> Results message: \n $result \n";
      return;
   }

   # --- Copy the profile to repository
   unless ( CopyFile($prflocal, $prfnam) ){
      warn "${subname}__E> Failed copying profile to repository\n";
      warn "${subname}__E>    Source File: $prflocal\n";
      warn "${subname}__E>    Target Dir : $seqdir\n";
      return;
   }

   # --- Return success
   return 1;
}

sub CleanHitsModes {

   # makes sure only valid hitsmodes are included
   # reorders them by complexity
   # Removes Seq-Seq because it is always used by default

   my @hitsmode = @_;

   @hitsmode   = ("Seq-Prf")  unless ( $hitsmode[0] );
   my @temp=[];

   foreach my $hitsm (@hitsmode) {
      
      # first: take care of legacy hits modes
      if ($hitsm eq "1001") {
          $hitsm = "Seq-Seq,Seq-Prf";
      } elsif ($hitsm eq "1000") {
          $hitsm = "Seq-Seq";
      } elsif ($hitsm eq "1221") {
          $hitsm = "Seq-Seq,Seq-Prf,PSI-Blast-Prf-Seq,PSI-Blast-Prf-Prf";
      } elsif ($hitsm eq "1337") {
          $hitsm = "Seq-Seq,Seq-Prf,PSI-Blast-Prf-Seq,PSI-Blast-Prf-Prf,Prf-Seq,Prf-Prf,Max-PSSM-Seq-Prf,Max-Freq-Seq-Prf";
      } 
      # now: clean up hits modes and separate into array
      $hitsm =~ s/\s+//g;
      push @temp,split(/\,/,$hitsm);
   }
   @hitsmode = @temp;
   @hitsmode = &ReorderHits( @hitsmode );

   return @hitsmode;

}
sub ReorderHits {
   # --Reorders the input hitsmodes to proceed from less complex 
   #   (sequence or precalculated profiles) to complex (new profiles)
   # --Also serves as an input filter to ensure that valid hitsmodes 
   #   are entered. 

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Reassign input arguments
   my @hitsmode = @_;
   my (@reordered, $hhlib, $hhblits, $hhsearch);
   # Seq-Seq is missing here because we always to Seq-Seq before the other methods
   my @available = ( "Seq-Seq","Seq-Prf","Prf-Seq","PSI-Blast-Prf-Seq","Prf-Prf",
                     "PSI-Blast-Prf-Prf","Max-PSSM-Seq-Prf", "Max-Freq-Seq-Prf");
   ($hhlib, $hhblits) = GetHHSuite('hhblits');
   $ENV{'HHLIB'} = $hhlib;
   if (defined ($ENV{'HHLIB'}) && (-e $hhblits)) {
      push @available, "HHBlitsPP";
      push @available, "HHBlitsSP";
   } else {
     warn "${subname}__W> HHBlits hitsmodes not available.\n";
     warn "${subname}__W> HHLIB variable not set, or hhsuite not in ext directory\n";
   }
   ($hhlib, $hhsearch) = GetHHSuite('hhsearch');
   if (defined ($ENV{'HHLIB'}) && (-e $hhsearch)) {
      push @available, "HHSearchPP";
      push @available, "HHSearchSP";
   } else {
     warn "${subname}__W> HHSearch hitsmodes not available.\n";
     warn "${subname}__W> HHLIB variable not set, or hhsuite not in ext directory .\n";
   }
   foreach my $available (@available) {
      foreach my $hitsmode (@hitsmode) {
         if ($hitsmode eq $available) {
            push @reordered,$hitsmode;
         }
      }
   }
   return @reordered;
}
