# This file is part of ModPipe, Copyright 1997-2020 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.


import modpipe

def find_seq_in_seq(searchseq, refseq):
    """This returns the bounds of the region where searchseq
    matches refseq. The sequences here are assumed to be
    lists."""
    searchseq = ''.join(remove_gaps(searchseq))
    refseq = ''.join(refseq)
    try:
        beg = refseq.index(searchseq)
    except ValueError:
        raise modpipe.Error("Cannot find sequence %s in %s" \
                            % (searchseq, refseq))
    end = beg + len(searchseq) - 1
    return beg, end


def remove_gaps(seq):
    """This removes all gap characters from an aligned
    sequence (assumed as a list)."""
    return [x for x in seq if x != '-']

def get_overlap(ref, model):
    """Given a reference and model sequence range - each a (start,end) pair
       of residue numbers - return the overlaps and non-overlaps between them"""
    # Get the extent of overlap
    overlap_region = [max(ref[0], model[0]), min(ref[1], model[1])]
    overlap = overlap_region[1] - overlap_region[0] + 1

    # Calculate percentage overlap
    pct_overlap = 100.0 * overlap / (model[1] - model[0] + 1)

    # Get the extent of the non-overlapping region
    nonoverlap = 0
    if model[0] < ref[0]:
        nonoverlap += ref[0] - model[0] + 1
    if model[1] > ref[1]:
        nonoverlap += model[1] - ref[1] + 1

    # Calculate the percentage non-overlap
    pct_nonoverlap = 100.0 * nonoverlap / (model[1] - model[0] + 1)

    return (overlap, pct_overlap, nonoverlap, pct_nonoverlap, overlap_region)
