#!/usr/bin/python
# This file is part of ModPipe, Copyright 1997-2020 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

from optparse import OptionParser
import modpipe.version
import modpipe.sequence
import modeller

def get_options():
    parser = OptionParser(version=modpipe.version.message())

    parser.set_usage("""
 Build a Modeller profile from a target sequence and a database.

 Usage: %prog [options] sequence database

 sequence is a file containing the input, and database the sequence database.

 Run `%prog -h` for help information
""")

    parser.add_option("-D", "--database_format", dest="dbfmt", metavar="FORMAT",
                      type="choice", choices=('FASTA', 'PIR', 'BINARY'),
                      help="""Format of the sequence database
                              (FASTA, PIR, BINARY, default BINARY)""",
                      default="BINARY")
    parser.add_option("-F", "--sequence_format", dest="seqfmt",
                      metavar="FORMAT",
                      type="choice", choices=('FASTA', 'PIR', 'PROFILE'),
                      help="""Format of the input sequence file
                              (FASTA, PIR, PROFILE, default PIR)""",
                      default="PIR")
    parser.add_option("-i", "--num_iterations", dest="iterations",
                      metavar="INTEGER", type="int",
                      help="Number of iterations (default 5)", default=5)
    parser.add_option("-e", "--evalue_threshold", dest="evalue",
                      metavar="FLOAT", type="float",
                      help="E-Value threshold (default 0.1)", default=0.1)
    parser.add_option("-c", "--check_digression", dest="digression",
                      metavar='BOOL', type="choice", choices=('ON', 'OFF'),
                      help="Flag to check profile digression (default ON)",
                      default='ON')
    parser.add_option("-g", "--include_gaps", dest="gaps",
                      metavar='BOOL', type="choice", choices=('ON', 'OFF'),
                      help="Flag to include gaps in target (default OFF)",
                      default='OFF')
    parser.add_option("-s", "--score_file", dest="score_file",
                      metavar='FILE', type="string",
                      help="File to store the scores",
                      default=None)
    parser.add_option("-o", "--output", dest="outfile",
                      metavar='FILE', type="string",
                      help="Name for output file (default buildp.prf)",
                      default='buildp.prf')
    parser.add_option("-O", "--output_format", dest="outfmt",
                      metavar='FORMAT', type="choice",
                      choices=('FASTA', 'PIR', 'PROFILE'),
                      help="""Format of the output profile
                              (FASTA, PIR, PROFILE, default PROFILE)""",
                      default='PROFILE')
    parser.add_option("-m", "--matrix", dest="matrix",
                      metavar='FILE', type="string",
                      help="""Substitution matrix (default
                              ${LIB}/blosum62.sim.mat)""",
                      default="${LIB}/blosum62.sim.mat")
    parser.add_option("-z", "--matrix_offset", dest="matrix_offset",
                      metavar='FLOAT', type="float",
                      help="""Matrix offset (default -450.0)""",
                      default=-450.0)
    parser.add_option("--gap_open", dest="gap_open",
                      metavar='FLOAT', type="float",
                      help="""Gap open penalty (default -500.0)""",
                      default=-500.0)
    parser.add_option("--gap_extension", dest="gap_extension",
                      metavar='FLOAT', type="float",
                      help="""Gap extension penalty (default -50.0)""",
                      default=-50.0)
    parser.add_option("-w", "--pssm_weighting", dest="pssm_weighting",
                      metavar='BOOL', type="choice", choices=('HH0', 'HH1'),
                      help="PSSM weighting scheme (HH0, HH1, default HH1)",
                      default='HH1')
    parser.add_option("-t", "--score_statistics", dest="score_statistics",
                      metavar='BOOL', type="choice", choices=('ON', 'OFF'),
                      help="Flag to calculate score statistics (default ON)",
                      default='ON')
    parser.add_option("-v", "--verbose", dest="verbose",
                      action="count", default=0,
                      help="Be verbose (repeat the option for extra output)")
    opts, args = parser.parse_args()

    if len(args) != 2:
        parser.error("You must specify the sequence and database file names")
    return args[0], args[1], opts

def set_modeller_verbosity(verbose):
    if verbose == 0:
        modeller.log.none()
    elif verbose == 1:
        modeller.log.minimal()
    else:
        modeller.log.verbose()

def build_profile(env, sequence, database, opts):
    # Read in sequence database
    sdb = modeller.sequence_db(env)
    sdb.read(seq_database_file=database, seq_database_format=opts.dbfmt,
             chains_list='ALL')

    # Read and convert input sequence(s) into profile format or read
    # in the profile.
    if opts.seqfmt == 'PROFILE':
        prf = modeller.profile(env, file=sequence, profile_format='TEXT')
    else:
        aln = modeller.alignment(env, file=sequence,
                                 alignment_format=opts.seqfmt)
        prf = aln.to_profile()

    # Build the profile
    prf.build(sdb, n_prof_iterations=opts.iterations,
              matrix_offset=opts.matrix_offset,
              rr_file=opts.matrix,
              gap_penalties_1d=(opts.gap_open, opts.gap_extension),
              max_aln_evalue=opts.evalue,
              output_score_file=opts.score_file,
              check_profile=(opts.digression == 'ON'),
              pssm_weights_type=opts.pssm_weighting,
              gaps_in_target=(opts.gaps == 'ON'),
              score_statistics=(opts.score_statistics == 'ON'))

    # Write out resulting profile
    if opts.outfmt == 'PROFILE':
        prf.write(file=opts.outfile, profile_format='TEXT')
    else:
        aln = prf.to_alignment()
        aln.write(file=opts.outfile, alignment_format=opts.outfmt)

def main():
    sequence, database, opts = get_options()
    set_modeller_verbosity(opts.verbose)
    env = modeller.environ()
    build_profile(env, sequence, database, opts)


if __name__ == '__main__':
    main()
