#!/usr/bin/python
# This file is part of ModPipe, Copyright 1997-2020 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import print_function
from modeller import *
from optparse import OptionParser
from shutil import rmtree, move
import modpipe.pdbutils
from modpipe.alnutils import *
from modpipe.pdbutils import *
from modpipe.modutils import *
import modpipe.version
import sys, os, subprocess, tempfile, re

def main():

    # Parse command line options
    parser = OptionParser(version=modpipe.version.message())

    # Set defaults
    parser.set_usage("""
 This script takes the profiles of a target-template pair and takes it
 through multiple cycles of alignment, model building and evaluation. The
 final alignment resulting in the best scored model is returned.

 Usage: %prog [options]

 Run `%prog -h` for help information
 """)

    parser.set_defaults(tprf='',
                        tseq='',
                        xprf='',
                        outalif='',
                        outdir='',
                        )

    # Populate options list
    parser.add_option("-t", "--target_profile",
                 dest="tprf",
                 type='string',
                 help="""Name of the file containing the target profile.
                      This is a mandatory option.""",
                 metavar="FILE")
    parser.add_option("-c", "--target_code",
                 dest="tseq",
                 type='string',
                 help="""The alignment code for the target sequence in
                      its profile. Also a mandatory argument.""",
                 metavar="CODE")
    parser.add_option("-x", "--template_profile",
                 dest="xprf",
                 type='string',
                 help="""Name of file containing the template profile.
                      The profile can have multiple structures in it and
                      all of them will be used for modeling.
                      This is a mandatory option.""",
                 metavar="FILE")
    pdb = modpipe.pdbutils.get_pdb_repository(include_local=True)
    parser.add_option("-d", "--pdb_repository",
                      dest="pdbrep", type='string',
                      help="""Directory containing PDB files. The default
                              value is """ + pdb, default=pdb,
                      metavar="DIR")
    parser.add_option("-o", "--output_alignment_file",
                 dest="outalif",
                 type='string',
                 help="""File to store the alignment output.""",
                 metavar="FILE")
    parser.add_option("-r", "--output_directory",
                 dest="outdir",
                 type='string',
                 help="""Directory to store program output. If not
                      specified a temporary directory will be
                      created.""",
                 metavar="DIR")

    # Check mandatory options
    opts, args = parser.parse_args()

    if not opts.tprf or not opts.tseq or not opts.xprf:
        parser.print_help()
        sys.exit(1)

    # -- Initialize some modeller stuff
    log.none()
    env = environ()
    env.io.atom_files_directory = opts.pdbrep

    # Create temporary directory and change into it
    if opts.outdir:
        if not os.path.isdir(opts.outdir):
            os.mkdir(opts.outdir)
        outdir = opts.outdir
    else:
        outdir = tempfile.mkdtemp(dir=os.getcwd())

    # Create a temporary file to be used as the profile list file
    (fd, name) = tempfile.mkstemp(dir=outdir)
    f=os.fdopen(fd, "w")
    f.write(os.path.abspath(opts.xprf))
    f.close()

    # Convert the template profile into a database
    env.make_pssmdb(profile_list_file = name,
                    matrix_offset = -450,
                    rr_file = '${LIB}/blosum62.sim.mat',
                    pssmdb_name = name + '.pssm',
                    profile_format = 'text',
                    pssm_weights_type = 'HH1')

    # Read in the database
    psm = pssmdb(env, pssmdb_name = name + '.pssm', pssmdb_format = 'text')

    # Read in the target profile
    prf = profile(env, file=opts.tprf, profile_format='text')

    # Loop through gap values to create different alignments
    ref_gaps = ()
    ref_mpqs = 0
    idx = 0
    for ogp in range(-400, -1200, -50):
        for frac in [0.01, 0.05, 0.1]:
            egp = int(frac*ogp)
            idx += 1
            prf.scan(profile_list_file = name,
                     matrix_offset = -450,
                     ccmatrix_offset = -100,
                     rr_file = '${LIB}/blosum62.sim.mat',
                     gap_penalties_1d = (ogp, egp),
                     score_statistics = False,
                     output_alignments = True,
                     output_score_file = None,
                     profile_format = 'text',
                     aln_base_filename = opts.tseq,
                     pssm_weights_type = 'HH1',
                     summary_file = None)

            # Read in the alignment as a profile object
            ppaln = profile(env, file=opts.tseq+'_0001.ali', profile_format='text')

            # Fetch the sequence corresponding to the target
            target = get_alnseq_by_code(ppaln, opts.tseq)

            # Filter the profile object to retain only the
            # target and template sequences
            templates = []
            templates = get_alnseq_by_prottyp(ppaln, 'X')

            # Create an empty alignment object
            aln = alignment(env)

            # Extract the template sequences and add them to alignment
            for t in templates:
                s = get_alignment_positions(ppaln.positions, t)
                aln.append_sequence(''.join(s))

                # Specify some alignment parameters
                p = len(aln) - 1
                aln[p].code = t.code
                aln[p].prottyp = 'structureX'
                aln[p].atom_file = t.code[0:4]

                # Get PDB range
                aln[p].range = get_PDB_range(env, aln, aln[p],
                                 t.code[0:4], t.code[4:5])

            # Extract the sequence from the target and add it to alignment
            s = get_alignment_positions(ppaln.positions, target)
            aln.append_sequence(''.join(s))

            # Specify some alignment parameters
            p = len(aln) - 1
            aln[p].code = target.code
            aln[p].prottyp = 'sequence'

            # Trim modeling alignment
            aln.edit(edit_align_codes='all', base_align_codes='all',
                     min_base_entries=2, overhang=0)

            # Write alignment to file
            alifile = 'gaptimizer_' + str(idx).zfill(4) + '.ali'
            aln.write(file=alifile)
            move(alifile, outdir)

            # Calculate sequence identity & percentage gaps
            seqid = get_highest_sequence_identity(aln)

            pcgap = get_percent_gaps(aln)

            # Full length of target (assuming that the sequence
            # in the profile is unaltered
            s = get_alignment_sequence(ppaln.positions, target)
            target_len = len(s)

            # Length of aligned part of the target (model)
            model_len = len(aln[-1].residues)

            # Call automodel for modeling + evaluation
            out = build_models(env, outdir + '/' + alifile,
                               number_of_models=1,
                               return_scores_for='best',
                               best_model_by='DOPE-HR score')

            # Copy the model file
            modfile = 'gaptimizer_' + str(idx).zfill(4) + '.pdb'
            move(out[0]['name'], outdir + '/' + modfile)

            # Calculate MPQS
            mpqs = (model_len/target_len) + (seqid/100) + \
                   (out[0]['GA341 score']/10) - (pcgap/100) - \
                   (out[0]['Normalized DOPE score']/10)

            # Print gap values, sequence based quality scores,
            # structure based scores & MPQS
            print("GAPTIMIZER> %5d %5d %3d %s %5d %5d %5.0f %5.0f %6.2f %6.2f %6.2f" % \
                  (ogp, egp, out[0]['num'], modfile, model_len, \
                   target_len, seqid, pcgap, out[0]['GA341 score'], \
                   out[0]['Normalized DOPE score'], mpqs))

            # Store the MPQS & gap values
            if mpqs > ref_mpqs:
                ref_mpqs = mpqs
                ref_gaps = (ogp, egp)

            # Clean up
            os.unlink(opts.tseq+'_0001.ali')
            del(ppaln)
            del(aln)

    # Recalculate an alignment once with the best parameters
    print("Best parameters are: %s %6.2f" % (repr(ref_gaps), ref_mpqs))

    # Use the alignment to build 10x models

    # Return the best model by DOPE-HR

    # Clean up temporary directory
#   rmtree(tmpdir)


if __name__ == "__main__":
    main()
