#!/usr/bin/perl
# This file is part of ModPipe, Copyright 1997-2014 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

use Getopt::Long;
use File::Basename;
use Cwd;
use strict;

# --- Load local modules
use PLLib::Utils;
use MPLib::MPInit;
use MPLib::MPUtils;
use MPLib::Version;
use MPLib::Binaries;

# --- Get command line options
$Getopt::Long::ignorecase = 0;

my ( $unqfile, $logdir, $jobname, $modpipe, $disks, $nodes,
     $priority, $help, $sgefile );

my ( $conffile, $exitstage, @hitsmode, $evaluehits,
     $cleanup);

my @final_modby = ();

GetOptions (
           "unq_file=s"           =>      \$unqfile,
           "log_directory=s"      =>      \$logdir,
           "job_name=s"           =>      \$jobname,
           "modpipe=s"            =>      \$modpipe,
           "disk_requests=s"      =>      \$disks,
           "node_requests=s"      =>      \$nodes,
           "priority=i"           =>      \$priority,
           "sge_file=s"           =>      \$sgefile,
           "help"                 =>      \$help,
           "version"              => sub { VersionMessage() },
           "final_models_by=s"    =>      \@final_modby,
           "conf_file=s"          =>      \$conffile,
           "exit_stage=s"         =>      \$exitstage,
           "hits_mode=s"          =>      \@hitsmode,
           "evalue_hits=f"        =>      \$evaluehits,
           "clean_up=s"           =>      \$cleanup,
           );


# --- Check command line options
if ( $help ){
   &usage;
   exit 0;
}

# --- Get Program name
my $subrname = GetSubrName();

# -- Check for mandatory options
unless ( $unqfile && $disks && $nodes ){
   warn "${subrname}__E> Missing mandatory options\n";
   die  "${subrname}__E>   Try --help for usage help\n";
}

# --- Check for unique file
die "${subrname}__E> Cannot proceed without input file with unique ids\n"
   unless ( $unqfile && -e $unqfile );

# -- Set default values
$modpipe = GetModPipeScript("main/ModPipe.pl") unless ( $modpipe );
$logdir   = 'sge-logs' unless ( $logdir );
$jobname  = 'ModPipe' unless ( $jobname );
$priority = -4 unless ( $priority );
$sgefile  = 'sge-modpipe.csh' unless ( $sgefile );

my $final_modby_str = join ",", @final_modby;
$final_modby_str =~ s/\s//g;

push @final_modby, 'LONGEST_DOPE' unless ( @final_modby );

# --- Set default values
$exitstage  = 'NONE' unless ( $exitstage );
@hitsmode = &CleanHitsMode ( @hitsmode );
$cleanup    = 'ON'   unless ( $cleanup );
$evaluehits = 1.0    unless ( $evaluehits );

# --- Make log directory
MakeDirSys($logdir);

# --- Read and process the file

   # -- Get the sequence ids from the unq file
   my ($ids, $names) = ReadUNQ( $unqfile );
   printf "%s %8d\n", "${subrname}__E> No. of sequences added: ", scalar(@$ids);

   # -- Open the SGE job file
   my $sgefh = OpenNewFile($sgefile);

   # -- Write SGE job file
   WriteSGEMP($sgefh, $logdir, $jobname, $modpipe, $disks, $nodes, $priority,
              $conffile, $exitstage, join(",",$hitsmode), $evaluehits, $cleanup,$final_modby_str,
              scalar(@$ids), $ids);
   


# --- Exit finally
exit 0;

# --- Usage
sub usage {
print <<EOF;

${0}:

This script will take a ModPipe generated *.unq file and create a SGE job
file that is ready for submission to the nodes. There are a few compulsory
options without which this script will not produce output.

Options:

      --version                 Report version number of this program.
      --help                    This help. Pipe it through 'more' if it
                                scrolls off the screen.
      --unq_file                Input file with unique sequence ids typically
                                the output of AddSeqMP. It will proceed without 
                                this option.
      --log_directory           Directory for storing the logs from SGE tasks.
                                Will be created if it does not exist. You can
                                use paths relative to the current directory.
      --job_name                Name for your job. Default: ModPipe
      --modpipe                 The location of the ModPipe.pl binary on the
                                cluster. Will default to the binary in this
                                ModPipe installation.
      --disk_requests           The names of the disk-complexes defined in SGE.
                                Specify as a string enclosed within double quotes.
                                Will not proceed without this option.
      --node_requests           The names of the node-complexes defined in SGE.
                                Specify as a string enclosed within double quotes.
                                Will not proceed without this option.
      --priority                Priority for running the tasks.
                                Default: -4
      --sge_file                Output filename to write the SGE job script.
                                Default: sge-modpipe.csh


MODPIPE Options:
  These are options that you would normally specify on the command-line of
  ModPipe.pl. These will form the basis for the SGE task included in the
  SGE job file.

      --conf_file               ModPipe configuration file. Cannot proceed 
                                without this option.
      --exit_stage              Choose the exit stage for the program. You can
                                quit after one of the following: PROFILE,
                                ALIGNMENTS, MODELS.
                                Default: NONE
      --hits_mode               Mode for calculating template hits.
                                Seq-Seq, Prf-Seq, Prf-Prf, etc. 
                                Can be given as separate options of
                                komma-separated. See ModPipe.pl -h for 
                                list of allowed hits modes. 
      --evalue_hits             The E-value threshold to get hits against
                                template databases. This value controls hits
                                from all three searches.
                                Default: 1.0
      --clean_up                Flag to clean up the temporary directory
                                after all operations. Can be OFF or ON.
                                Default: ON
      --final_models_by         For local gathering (MPQS,DOPE,LONGEST_DOPE,etc.)
EOF
}

