import unittest
import os
import modeller
import modpipe.test
import modpipe.binaries
import subprocess

os.environ['HHLIB'] = modpipe.binaries.get_hhsuite('lib/hh')
hhmakemodel = modpipe.binaries.get_modpipe_script('src/hhmakemodel_modpipe.pl')

def write_hhr_header(fh):
    fh.write("""Query         514866acb765eb8370ffa874ee90340dMAADRGWI
Match_columns 424
No_of_seqs    4675 out of 5032

""")

def write_hhr_body(fh, prefix, template_name, query_seq, template_seq):
    fh.write("""\n%s
>%s
Probab=100.00  E-value=6.6e-91  Score=696.44  Aligned_cols=410  Identities=29%%  Similarity=0.467  Sum_probs=387.3

Q 514866acb765eb   14 %s   90 (424)
Q Consensus        14 ~~~~~~~~~~~~~~~~~~~~   90 (424)
                      +++|||++++++++++++++
T Consensus        43 ~~~~~~~~~~~~~~~~~~~~  122 (470)
T %s           43 %s  122 (470)
T ss_dssp             CHHHHHHHHHHHHHTHHHHH
T ss_pred             CchHHHHHHHHHHHHHHHhc
Confidence            36799999999999999999
""" % (prefix, template_name, query_seq, template_name, template_seq))

def write_hhr(fh, query_seq, template_seq):
    write_hhr_header(fh)
    write_hhr_body(fh, 'No 6', '1abc_A', query_seq, template_seq)

class HHMakeModelTests(modpipe.test.TestCase):
    _environ = None
    seq = 'QMEFLQAFHEILYSLKPLFM'

    def get_environ(self):
        if not self._environ:
            HHMakeModelTests._environ = e = modeller.environ()
            e.libs.topology.read('${LIB}/top_heav.lib')
            e.libs.parameters.read('${LIB}/par.lib')
        return self._environ

    def test_simple(self):
        """Check hhmakemodel with simple PDB"""
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence(self.seq)
        m.chains[0].name = 'A'
        m.write(file='pdb1abc.ent')
        self.run_test(e)
        os.unlink('pdb1abc.ent')

    def test_mismatch_unaligned_start(self):
        """Test mismatch between template sequence and PDB"""
        # Due to a mismatch between the template sequence and the PDB,
        # hhmakemodel previously had trouble figuring out the correct number
        # for the first residue
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence('EDLVLCVNGKYISQMKCC')
        m.chains[0].name = 'A'
        m.write(file='pdb1abc.ent')
        self.run_test(e, query_seq='ECDD-DQANCHSGT-GGTTV',
                      template_seq='DSPMDDLVLCVNGKYISQMK',
                      range=('2:A', '+15:'))
        os.unlink('pdb1abc.ent')

    def test_unk(self):
        """Test handling of PDB files containing UNK residues"""
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence('EDLXVL')
        m.chains[0].name = 'A'
        m.write(file='pdb1abc.ent')
        self.run_test(e, query_seq='EDLXVL',
                      template_seq='EDLXVL',
                      range=('1:A', '+6:'))
        os.unlink('pdb1abc.ent')

    def test_mismatch_unaligned_start_dot(self):
        """Test mismatch between template sequence and PDB, with dots"""
        # The second template forces a large insertion in the query sequence
        # before it aligns with the unaligned-N-terminus template.
        # hhmakemodel uses '.' rather than '-' to represent this insertion.
        # Make sure the code to handle dangling N termini handles this case.
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence('EDLVLCVNGKYISQMKCC')
        m.chains[0].name = 'A'
        m.write(file='pdb1abc.ent')
        m = modeller.model(e)
        m.build_sequence('CETFTKRRTMGYQDFDVVYTQFQVNGKYISQMKDGINDGDQSDE'
                         'LKQGKGFHCKSGVCIPSQYQC')
        m.chains[0].name = 'A'
        m.write(file='pdb1xyz.ent')
        fh = open('test.hhr', 'w')
        write_hhr_header(fh)
        write_hhr_body(fh, 'No 1', '1abc_A', 'ECDD-DQANCHSGT-GGTTV',
                       'DSPMDDLVLCVNGKYISQMK')
        write_hhr_body(fh, 'No 2', '1xyz_A', 'CDDDQANCHSGT-GGTTV--',
                       'CQGKGFHCKSGVCIPSQY--')
        fh.close()
        self.run_hhmakemodel('test.hhr', 'test.ali', ['-m', '1', '2'])
        a = modeller.alignment(e, file='test.ali')
        a[1].reread()
        a[2].reread()
        self.assertEqual(a[1].range[0], '2:A')
        self.assertEqual(a[1].range[1], '+15:')
        self.assertEqual(a[2].range[0], '1:A')
        self.assertEqual(a[2].range[1], '+63:')
        os.unlink('pdb1abc.ent')
        os.unlink('pdb1xyz.ent')
        os.unlink('test.hhr')
        os.unlink('test.ali')

    def test_offset_start(self):
        """Check hhmakemodel with first PDB residue not at first aln pos"""
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence(self.seq[10:])
        m.chains[0].name = 'A'
        m.write(file='pdb1abc.ent')
        self.run_test(e, range=('1:A', '+10:'))
        os.unlink('pdb1abc.ent')

    def test_no_pdb(self):
        """Check hhmakemodel with missing PDB file"""
        e = self.get_environ()
        self.run_test(e, query_only=True, has_pdb=False)

    def test_pdb_wrong_chain(self):
        """Check hhmakemodel with wrong PDB chain"""
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence(self.seq)
        m.chains[0].name = 'B'
        m.write(file='pdb1abc.ent')
        self.assertRaises(OSError, self.run_test, e, query_only=True)
        os.unlink('test.hhr')
        os.unlink('pdb1abc.ent')

    def test_pdb_wrong_seq(self):
        """Check hhmakemodel with wrong PDB sequence"""
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence('CCG')
        m.chains[0].name = 'A'
        m.write(file='pdb1abc.ent')
        self.run_test(e, query_only=True)
        os.unlink('pdb1abc.ent')

    def test_first_zero(self):
        """Check hhmakemodel with first residue numbered '0'"""
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence(self.seq)
        m.chains[0].name = 'A'
        m.residues[0].num = '0'
        m.write(file='pdb1abc.ent')
        self.run_test(e, range=('0:A', '+20:'))
        os.unlink('pdb1abc.ent')

    def test_insertion_code(self):
        """Check hhmakemodel with insertion codes"""
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence(self.seq)
        m.chains[0].name = 'A'
        m.residues[3].num = '4'
        m.residues[4].num = '4A'
        m.write(file='pdb1abc.ent')
        self.run_test(e)
        os.unlink('pdb1abc.ent')

    def test_residue_hetatm(self):
        """Check hhmakemodel with HETATM amino acid"""
        e = self.get_environ()
        m = modeller.model(e)
        m.build_sequence(self.seq)
        m.chains[0].name = 'A'
        m.write(file='pdb.tmp')
        fin = open('pdb.tmp')
        fout = open('pdb1abc.ent', 'w')
        for line in fin:
            # A regular amino acid marked as HETATM is ignored by Modeller
            if line[17:26] == 'PHE A   4':
                line = 'HETATM' + line[6:]
            # ... but an MSE marked as HETATM is treated like MET
            elif line[17:26] == 'MET A   2':
                line = 'HETATM' + line[6:17] + "MSE" + line[20:]
            fout.write(line)
        fin.close()
        fout.close()
        self.run_test(e, range=('1:A', '+19:'))
        os.unlink('pdb1abc.ent')
        os.unlink('pdb.tmp')

    def run_test(self, e, query_only=False, has_pdb=True,
                 range=('1:A','+20:'), query_seq=None, template_seq=None):
        if query_seq is None:
            query_seq = self.seq
        if template_seq is None:
            template_seq = self.seq
        write_hhr(open('test.hhr', 'w'), query_seq, template_seq)
        self.run_hhmakemodel('test.hhr', 'test.ali')
        a = modeller.alignment(e, file='test.ali')
        if query_only:
            self.assertEqual(len(a), 1)
        else:
            a[1].reread()
            self.assertEqual(a[1].range[0], range[0])
            self.assertEqual(a[1].range[1], range[1])
        ali_res = open('AliResidues.txt').readlines()
        self.assertEqual(ali_res[0][:3], 'SEQ')
        self.assertEqual(ali_res[1][:4], 'PROB')
        self.assertEqual(ali_res[2][:6], 'EVALUE')
        if has_pdb:
            self.assertEqual(ali_res[3][:3], 'PDB')
        os.unlink('test.ali')
        os.unlink('test.hhr')
        os.unlink('AliResidues.txt')

    def run_hhmakemodel(self, hhrfile, pirfile, extra_args=[]):
        p = subprocess.call([hhmakemodel, '-i', hhrfile, '-d', '.',
                             '-pir', pirfile] + extra_args)
        if p != 0:
            raise OSError("subprocess failed")

if __name__ == '__main__':
    unittest.main()
