#!/usr/bin/python
# This file is part of ModPipe, Copyright 1997-2020 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

from modeller import *
from optparse import OptionParser
import modpipe.version
import sys, os

sys.argv[0] = 'modpipe make_hdf5'

def main():

    # Parse command line options
    parser = OptionParser(version=modpipe.version.message())

    # Set defaults
    parser.set_usage("""
 This script takes an ascii file with protein sequences in PIR or
 FASTA format and creates a architecture independent HDF5 file.

 Usage: %prog [options] textfile binfile

 textfile is the input sequence file, and binfile is the output HDF5 file.

 Run `%prog -h` for help information
 """)

    parser.set_defaults(sformat='FASTA')

    # Populate options list
    parser.add_option("-f", "--sequence_format",
                 dest="sformat",
                 type='string',
                 help="""Format of the sequence file. Acceptable formats
                      are PIR or FASTA.""",
                 metavar="FORMAT")

    opts, args = parser.parse_args()

    if len(args) != 2:
        parser.error("You must specify an input file and an output file")
    inpfile, outfile = args

    opts.sformat = opts.sformat.lower()
    if not opts.sformat in ['fasta', 'pir']:
        parser.error("""Sequence format can only be FASTA or PIR.""")

    # -- Initialize some modeller stuff
    log.verbose()
    env = environ()

    sdb = sequence_db(env)
    sdb.convert(seq_database_file=inpfile,
                seq_database_format=opts.sformat,
                chains_list='all',
                minmax_db_seq_len=[1, 40000],
                clean_sequences=True,
                outfile=outfile)

if __name__ == "__main__":
    main()
