# This file is part of ModPipe, Copyright 1997-2020 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

package PLLib::Alignment;
require Exporter;
@ISA    = qw(Exporter);
@EXPORT = qw( ReadAlignment InitAlignment CompareAlignments
              MapSeqToAlignment AreAlignmentsSimilar ClusterAlignments
              WriteAlignment QuickAli GetDPMat GetTraceback 
              AddSequenceToAlignment MergeAlignments BatchMergeAlignments);
             

use strict;
use PLLib::Utils;
use PLLib::Sequence;
use Types::IntAlignment;

# -- This routine takes a reference alignment and recursively merges
# alignments from specified files.
sub BatchMergeAlignments {

  # -- Get subroutine name
  my $subname = GetSubrName();

  # -- Check arguments
  my $nargs = 5;

  unless (scalar(@_) == $nargs){
     print "${subname}__E> Insufficient arguments\n" ;
     return;
  }

  # -- reassign the variables
  my ( $seqfile, $tgtcode, $seqfmt, 
       $alifiles, $alifmt ) = @_;

  # -- Read in the target sequence
  my $ali = undef;
  $ali = ReadAlignment($ali, $seqfile, $seqfmt, 'HORIZONTAL');

  # -- Abort if target is not found in newali
  my $tgtidx = IndexInList($ali->code, $tgtcode);
  unless ( $ali->code->[$tgtidx] eq $tgtcode ){
    warn "${subname}__E> Target code not found in alignment: $tgtcode\n";
    return;
  }

  # -- Update PIR fields for target
  if ( $seqfmt =~ /\bFASTA\b/i ){
     $ali->type->[$tgtidx] = 'sequence';
     $ali->file->[$tgtidx] = $tgtcode;
     $ali->inipos->[$tgtidx] = 1;
     $ali->endpos->[$tgtidx] = length($ali->sequence->[$tgtidx]);
     $ali->name->[$tgtidx] = 'Target Sequence';
  }

  # -- Iteratively add alignments
  for my $alif ( @$alifiles ){

    # -- Read in the next alignment
    my $nextali = undef;
    $nextali = ReadAlignment($nextali, $alif, $alifmt, 'HORIZONTAL');

    $ali = MergeAlignments($ali, $tgtcode, $nextali);
  }

  # -- Arbitrarily tag the align_codes with a number to account
  #    for duplicate template entries
  my $cnt = 0;
  foreach my $idx ( 0 .. $#{ $ali->code } ){
    if ( $ali->code->[$idx] ne $tgtcode ){
      $cnt = sprintf "%03d", $cnt + 1;
      $ali->code->[$idx] =~ s/$/_$cnt/;
    }
  }

  # -- Write out the final alignment
  my $outali = WriteAlignment($ali);
  
  return $outali;
}

# -- This routine takes two alignment objects, one of which that
# contains the reference sequence (target) and adjusts the gaps in the other
# to be able to merge them together. For instance, it will merge two or more
# pairwise target-template alignments and merge them into a multiple sequence
# alignment.
sub MergeAlignments {

  use List::Util qw( max min );

  # -- Get subroutine name
  my $subname = GetSubrName();

  # -- Check arguments
  my $nargs = 3;

  unless (scalar(@_) == $nargs){
     print "${subname}__E> Insufficient arguments\n" ;
     return;
  }

  # -- reassign the variables
  my ( $ali, $tgtcode, $newali ) = @_;

  # -- Abort if target code is not found in reference alignment
  my $refidx = IndexInList($ali->code, $tgtcode);
  unless ( $ali->code->[$refidx] eq $tgtcode ){
    warn "${subname}__E> Target code not found in reference alignment\n" .
    warn "${subname}__E>    Alignment file: " . $ali->filename . "\n";
    return;
  }

  # -- Abort if target is not found in newali
  my $tgtidx = IndexInList($newali->code, $tgtcode);
  unless ( $newali->code->[$tgtidx] eq $tgtcode ){
    warn "${subname}__E> Target code not found in alignment\n" .
    warn "${subname}__E>    Alignment file: " . $newali->filename . "\n";
    return;
  }

  # -- Find the part of target found in the alignment
  my ($s_beg, $s_end) = FindSeqInSeq(RemoveDashes($newali->sequence->[$tgtidx]),
                                     RemoveDashes($ali->sequence->[$refidx]));

  # -- Pad alignment sequences until the first match with target sequence
  my $ngap = 0; my $rcnt = 0;
  foreach my $p ( 0 .. length($ali->sequence->[$refidx]) ){
    my $tres = substr($ali->sequence->[$refidx], $p, 1);
    if ( IsGAP($tres) ){
      $ngap++;
    } else {
        $rcnt++;
      }
    last if ($rcnt == $s_beg);
  }

  # -- Total number of positions to be adjusted
  my $p_beg = $s_beg + $ngap - 1;

  # -- Add leading gaps to all sequences in the alignment
  foreach my $seq ( @{ $newali->sequence } ){
    $seq =~ s/^/chr(45) x $p_beg/e;
  }

  # -- Step through each position of the alignment and add gaps to the target sequence
  my $chk = 1;
  while ( $chk ){
    my $match = 0;
    my $shorter = min(length($ali->sequence->[$refidx]), length($newali->sequence->[$tgtidx]));
    foreach my $p ( $p_beg .. $shorter - 1 ){

      # -- Fetch the residues at this position
      my $tres = substr($ali->sequence->[$refidx], $p, 1);
      my $ares = substr($newali->sequence->[$tgtidx], $p, 1);

      if ( $ares eq $tres ){
        $match++;
        next;
      } elsif ( IsGAP($ares) ){
         foreach my $idx ( 0 .. $#{ $ali->code } ){
           substr($ali->sequence->[$idx], $p, 0) = '-';
         }
         last;
      } elsif ( IsGAP($tres) ){
         foreach my $idx ( 0 .. $#{ $newali->code } ){
           substr($newali->sequence->[$idx], $p, 0) = '-';
         }
         last;
      } else {
         die "${subname}__E> Found differences between the reference sequence\n" .
             "${subname}__E> and the target sequence in one of the alignments:\n" .
             "${subname}__E> $tres (target) vs $ares (ali)\n" . 
             "${subname}__E> This routine cannot handle that\n" ;
       }
    }

    $chk = 0 if ( $match == length($newali->sequence->[$tgtidx]) - $p_beg);
  }

  # -- Pad with trailing gaps to adjust for length
  my $p_end = length($ali->sequence->[$refidx]) - length($newali->sequence->[$tgtidx]);
  foreach my $seq ( @{ $newali->sequence } ){
    $seq =~ s/$/chr(45) x $p_end/e;
  }

  # -- Add all sequences (except target) to the primary alignment
  foreach my $idx ( 0 .. $#{$newali->code} ){
    if ( $idx != $tgtidx ){
      $ali = AddSequenceToAlignment($ali, $newali, $newali->code->[$idx]);
    }
  }
  
  return $ali;
}

# - This routine takes an alignment object and adds a specified
#   sequence from another alignment into it without changing it.
sub AddSequenceToAlignment {

  # -- Get subroutine name
  my $subname = GetSubrName();

  # -- Check arguments
  my $nargs = 3;

  unless (scalar(@_) == $nargs){
     print "${subname}__E> Insufficient arguments\n" ;
     return;
  }

  # -- reassign the variables
  my ( $ali, $newali, $code ) = @_;

  # -- Abort if target is not found in newali
  my $idx = IndexInList($newali->code, $code);
  unless ( $newali->code->[$idx] eq $code ){
    warn "${subname}__E> Target code not found in alignment: $code\n";
    return;
  }

  # -- Append alignment attributes
  push @{ $ali->code }, $newali->code->[$idx];
  push @{ $ali->type }, $newali->type->[$idx];
  push @{ $ali->file }, $newali->file->[$idx];
  push @{ $ali->inipos }, $newali->inipos->[$idx];
  push @{ $ali->chain1 }, $newali->chain1->[$idx];
  push @{ $ali->endpos }, $newali->endpos->[$idx];
  push @{ $ali->chain2 }, $newali->chain2->[$idx];
  push @{ $ali->name }, $newali->name->[$idx];
  push @{ $ali->source }, $newali->source->[$idx];
  push @{ $ali->resoln }, $newali->resoln->[$idx];
  push @{ $ali->rfactr }, $newali->rfactr->[$idx];
  push @{ $ali->sequence }, $newali->sequence->[$idx];

  return $ali;
}


sub GetTraceback {

   # This routine does the traceback

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 5;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Get the command line variables
   my ($T, $s1, $s2, $final_i, $final_j) = @_;

   # -- Return the alignment as two lists
   my @a1 = my @a2 = ();

   my $i = $final_i; my $j = $final_j;
   while ( $i > 0 || $j > 0 ){

     # -- See what the next step is
     if ( $T->[$i][$j] eq 'D' ){
        push @a1, $i;
        push @a2, $j;
        $i--;
        $j--;
     }
       elsif ( $T->[$i][$j] eq 'H' ){
          push @a1, 0;
          push @a2, $j;
          $j--;
       }
       elsif ( $T->[$i][$j] eq 'V' ){
          push @a1, $i;
          push @a2, 0;
          $i--;
       }
   }

   # -- Reverse the alignment
   @a1 = reverse @a1;
   @a2 = reverse @a2;

   # -- Transfer matrix indices into residues
   my $ali1 = ''; my $ali2 = '';
   foreach my $i ( 0 .. scalar(@a1) - 1 ){
      my $r1 = $a1[$i];
      my $r2 = $a2[$i];
      $ali1 .= ( $r1 == 0 ) ? '-' : $s1->[$r1-1],
      $ali2 .= ( $r2 == 0 ) ? '-' : $s2->[$r2-1];
   }

   # -- Return sequences
   return($ali1, $ali2);
}

sub GetDPMat {

   # This routine does the actual rudimentary DP.

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 7;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Get the command line variables
   my ($seq1, $seq2, $len1, $len2, $match, $mismatch, $gap) = @_;

   # -- Initalize matrices
   my @F = ();
   my @T = ();

   # -- Split the sequence into an array
   my @s1 = split('', $seq1);
   my @s2 = split('', $seq2);

   # -- Initialize the first row
   for(my $j=0; $j<= $len2; $j++){
      $F[0][$j] = $gap * $j;
      $T[0][$j] = 'H';
   }

   # -- Initialize the first column
   for(my $i=0; $i<= $len1; $i++){
      $F[$i][0] = $gap * $i;
      $T[$i][0] = 'V';
   }

   # -- Fill up the matrix
   my $final = -999999;
   my $final_i = my $final_j = 0;

   for(my $i=1; $i<=$len1; $i++){
      for(my $j=1; $j<=$len2; $j++){
         my $diag = ($s1[$i-1] eq $s2[$j-1]) ? $F[$i-1][$j-1] + 1 : $F[$i-1][$j-1] - 1;
         my $hori = $F[$i-1][$j] - 2;
         my $vert = $F[$i][$j-1] - 2;

         $F[$i][$j] = $hori; $T[$i][$j] = 'V';
         if ( $diag >= $F[$i][$j] ){ $F[$i][$j] = $diag; $T[$i][$j] = 'D' }
         if ( $vert >= $F[$i][$j] ){ $F[$i][$j] = $vert; $T[$i][$j] = 'H' }

         if ( $F[$i][$j] > $final ){
            $final = $F[$i][$j];
            $final_i = $i;
            $final_j = $j;
         }
      }
   }

   # -- Get the traceback
   my ( $ali1, $ali2 ) = GetTraceback(\@T, \@s1, \@s2, $len1, $len2);

   # -- Return values
   return($ali1, $ali2);
}



sub QuickAli {

   # This routine will use a very rudimentary dynamic programing logic
   # to get the alignment between two sequences. Use this only to compare
   # almost identical sequences, such as to compare the PDB sequence 
   # derived by CE to that of Modeller. It uses a linear gap penalty 
   # function and fixed scores for matches and mismatches.

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 5;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Get the command line variables
   my ($seq1, $seq2, $match, $mismatch, $gap) = @_;

   # -- Remove all gaps from sequences
   $seq1 = RemoveDashes($seq1);
   $seq2 = RemoveDashes($seq2);

   # -- Get the lengths of the two sequences
   my $len1 = length($seq1);
   my $len2 = length($seq2);

   # -- Return unless there is something to do
   return unless ($len1 > 0 && $len2 > 0 );

   # -- Do the DP to get the matrix
   my ($ali1, $ali2) = GetDPMat($seq1, $seq2, $len1, $len2, 
                                $match, $mismatch, $gap);

   # -- Return aligned sequences
   return($ali1, $ali2);
}


sub WriteAlignment {

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 1;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Get the command line variables
   my $ali = $_[0];

   # -- Return if there are no sequences
   unless ( @{ $ali->code } > 0 ){
      warn "${subname}__E> No sequences in alignment\n";
      return;
   }

   # -- Get the number of entries in the alignment
   my $numali = $#{ $ali->code };

   # -- Get the number of entries in the alignment
   my $numseq = grep /^sequence/, @{ $ali->type };

   # -- If there is only one entry of type 'sequence' in the
   #    alignment, assume it is a modeling alignment and write
   #    the structure entries first and the sequence entry last
   my $fmtseq = '';
   if ( $numseq == 1 ){

      # -- Get the index of the sequence in the alignment
      my $s0 = IndexInList($ali->type, 'sequence');

      # -- Format the sequence
      $fmtseq = WritePIR($ali->code->[$s0], $ali->type->[$s0], 
                         $ali->file->[$s0], $ali->inipos->[$s0], 
                         $ali->chain1->[$s0], $ali->endpos->[$s0], 
                         $ali->chain2->[$s0], $ali->name->[$s0], 
                         $ali->source->[$s0], $ali->resoln->[$s0], 
                         $ali->rfactr->[$s0], $ali->sequence->[$s0],
                         80); 
      
   }

   # -- Write out the remaining sequences in the alignment
   my $fmtstr = '';
   foreach my $idx ( 0 .. $numali ){

      # -- Skip the 'sequence' line if a modeling alignment
      next if ( $fmtseq ne '' && $ali->type->[$idx] =~ /^sequence/ );

      # -- Format the sequence
      $fmtstr .= WritePIR($ali->code->[$idx], $ali->type->[$idx], 
                          $ali->file->[$idx], $ali->inipos->[$idx], 
                          $ali->chain1->[$idx], $ali->endpos->[$idx], 
                          $ali->chain2->[$idx], $ali->name->[$idx], 
                          $ali->source->[$idx], $ali->resoln->[$idx], 
                          $ali->rfactr->[$idx], $ali->sequence->[$idx],
                          80); 
   }

   # -- Create a single string will all sequences and return
   return $fmtstr . $fmtseq;
}

sub ClusterAlignments {

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 8;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Get the command line variables
   my ($aliobjs, $seqid,
       $ovlpcut, $pcovlpcut,
       $nonovlpcut, $pcnonovlpcut, 
       $idcolcut, $pcidcolcut) = @_;

   # -- Return if there are insufficient alignments to cluster
   return if ( @$aliobjs < 1 );

   # -- Cluster Alignments
   my @org_stack = my @rep_stack = my @clusters = ();
   @org_stack = @$aliobjs;

   while ( @org_stack > 0 ){

      # -- Pop the first alignment in the original stack
      #    as a representative
      push(@rep_stack, $org_stack[0]);

      # -- Initialize the auxillary stack
      my @aux_stack = ();

      # -- Compare all elements of the original stack
      #    against the current representative, store
      #    the ones that do not cluster in the auxillary
      #    stack
      foreach my $org ( @org_stack ){

         # -- Compare the elements
         my $similar = AreAlignmentsSimilar($rep_stack[$#rep_stack], $org, 
                             $seqid, $ovlpcut, $pcovlpcut,$nonovlpcut, 
                             $pcnonovlpcut, $idcolcut, $pcidcolcut);

         # -- Append appropriate arrays
         if ( $similar ){
            push @{ $clusters[$#rep_stack] }, $org;
         } else {
              push(@aux_stack, $org);
           }
      }

      # -- Now copy the auxilary stack back to the
      #    original for the next round
      @org_stack = @aux_stack;
   }

   # -- Return the clusters and representatives
   return ( \@rep_stack, \@clusters );
}


sub AreAlignmentsSimilar {

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 9;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Get the command line variables
   my ($ali1, $ali2, $seqid, 
       $ovlpcut, $pcovlpcut,
       $nonovlpcut, $pcnonovlpcut, 
       $idcolcut, $pcidcolcut) = @_;

   # -- Compare the two alignments
   my ($ovlp, $pcovlp, $nonovlp, $pcnonovlp, $idcol, $pcidcol)
       = CompareAlignments($ali1, $ali2, $seqid);
   # If we couldn't compare the alignments, they can't be similar
   if (!defined($ovlp)) {
      return 0;
   }

   # -- Flag the similarity
   my $similar = ( $ovlp > $ovlpcut && $pcovlp > $pcovlpcut &&
                   $nonovlp < $nonovlpcut && $pcnonovlp < $pcnonovlpcut &&
                   $idcol > $idcolcut && $pcidcol > $pcidcolcut ) ? 1 : 0;

   # -- Return the similarity flag
   return $similar;
}

sub CompareAlignments {

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 3;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Get the command line variables
   my ($ali1, $ali2, $seqid) = @_;


   # -- Verify if seqid exists in both alignments
   unless ( grep { $seqid eq $_ } @{ $ali1->code } ){
      warn "${subname}__E> Reference code (seqid) not found in alignment:\n";
      warn "${subname}__E>      Code: $seqid\n";
      warn "${subname}__E>      File: ", $ali1->filename, "\n";
      return;
   }
                 

   unless ( grep { $seqid eq $_ } @{ $ali2->code } ){
      warn "${subname}__E> Reference code (seqid) not found in alignment:\n";
      warn "${subname}__E>      Code: $seqid\n";
      warn "${subname}__E>      File: ", $ali2->filename, "\n";
      return;
   }

   # -- First test: If the number of sequences are different in
   #    the two alignments, there is nothing to be done here
   unless ( scalar( @{ $ali1->code } ) == scalar( @{ $ali2->code } ) ){
      return;
   }

   # -- Get the index of the seqid in the alignment arrays
   my $idx1 = IndexInList($ali1->code, $seqid);
   my $idx2 = IndexInList($ali2->code, $seqid);
   my @template_codes_ali1;
   my @template_codes_ali2;
   foreach my $ali1_code (@{$ali1->code}) {
       unless ($ali1_code eq $seqid) {
          push @template_codes_ali1,$ali1_code;
       }
   }
   foreach my $ali2_code (@{$ali2->code}) {
       unless ($ali2_code eq $seqid) {
          push @template_codes_ali2,$ali2_code;
       }
   }
   my $template_codes_ali1 = join("",sort @template_codes_ali1);
   my $template_codes_ali2 = join("",sort @template_codes_ali2);
   
# up: test whether this causes the failed runs
#   unless ($template_codes_ali1 eq $template_codes_ali2) {
#      # -- the alignments are not from the same templates
#      # -- Thus they are not considered similar
#      return;
#   }

   

   # -- Now, index the reference sequences from the alignment
   #    to get the seq no. to ali pos mapping
   my $aln2seq1 = MapAlnPosToSeqIdx($ali1->sequence->[$idx1], 
                                    $ali1->inipos->[$idx1],
                                    $ali1->endpos->[$idx1]);

   my $aln2seq2 = MapAlnPosToSeqIdx($ali2->sequence->[$idx2], 
                                    $ali2->inipos->[$idx2],
                                    $ali2->endpos->[$idx2]);

   # -- You have to reverse these hashes to make them useful
   my %seq2aln1 = reverse %{ $aln2seq1 };
   my %seq2aln2 = reverse %{ $aln2seq2 };

   # -- Second test: If the reference sequences in the two alignments
   #    do not overlap, there is nothing to be done
   my ($ovlp, $pcovlp, $nonovlp, $pcnonovlp, $ovlpbeg, $ovlpend) 
      = GetOverlap($ali1->inipos->[$idx1], $ali1->endpos->[$idx1],
         $ali2->inipos->[$idx2], $ali2->endpos->[$idx2]);

   unless ($ovlp > 0){
      warn "${subname}__W> Comparison Failed!\n";
      warn "${subname}__W>   Alignment 1: ", $ali1->filename, "\n";
      warn "${subname}__W>   Alignment 2: ", $ali2->filename, "\n";
      warn "${subname}__W>   The two alignments do not overlap\n";
      return;
   }

   # -- Now, you are ready to count the identically aligned
   #    columns in the overlapping region. We'll worry about
   #    the normalization later below.
   my $idcol = 0;
   foreach my $seqnum ( $ovlpbeg .. $ovlpend ){

      # -- Convert sequence no. to alignment position
      my $alipos1 = $seq2aln1{$seqnum};
      my $alipos2 = $seq2aln2{$seqnum};

      # -- Increment counter if aligned columns are
      #    exactly identical
      $idcol++ if ( $ali1->seqvert->[$alipos1] eq $ali2->seqvert->[$alipos2] );
   }

   # -- Calculate the percentage identity: calculated as
   #    the ratio of identical columns over the overlapping
   #    region
   my $pcidcol = 100*$idcol/$ovlp;

   # -- Return the results
   return ($ovlp, $pcovlp, $nonovlp, $pcnonovlp, $idcol, $pcidcol);
}

sub MapAlnPosToSeqIdx {

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 3;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Get the command line variables
   my ($ali, $beg, $end) = @_;

   # -- Create a hash for mapping the sequence numbers
   #    to the alignment positions
   my $count = 0; my %seq2aln = ();
   for(my $i = 0; $i < length($ali); $i++){
      
      # -- Extract the residue
      my $res = substr($ali, $i, 1);

      # -- Increment the actual index
      my $idx = $i + 1;

      # -- If it is a standard residue, store the 
      #    residue number
      if ( IsSTDAA($res) ){
         $count++;
         my $seqnum = $beg + $count - 1;
         $seq2aln{$idx} = $seqnum;
      } else {
           $seq2aln{$idx} = 0;
        }
   }

   # -- Make a quick check to see if the indexing
   #    is okay
   unless ( $beg + $count - 1 == $end ){
      warn "${subname}__E> Problems indexing the sequence within alignment!\n";
      warn "${subname}__E> Maybe due to some non-standard residues?\n";
      warn "${subname}__E> Sequence in alignment: $ali\n";
      return;
   }

   # -- Return the index array
   return \%seq2aln;
}

sub InitAlignment {

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 0;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Create a new alignment object
   my $ali = IntAlignment->new();

   # -- Return the object
   return $ali;
}

sub ReadAlignment {

   # -- Get subroutine name
   my $subname = GetSubrName();

   # -- Check arguments
   my $nargs = 4;

   die "${subname}__E> Insufficient arguments\n"
     unless (scalar(@_) == $nargs);

   # -- Get input arguments
   my ( $ali, $alifile, $inpformat, $outformat ) = @_;

   # -- Unless initialized already, init a new ali object
   $ali = InitAlignment() unless ( defined($ali) );

   # -- Make sure the file exists
   my $fhali = OpenFile( $alifile ) or return;

   # -- Update the alignment object with filename
   $ali->filename( $alifile);

   # -- Process the file
   my $idx = 0;
   while ( defined(my $seq = ReadNextSeq($fhali, $inpformat)) ){

      # -- Parse the sequence fields
      ( $ali->code->[$idx], 
        $ali->type->[$idx], 
        $ali->file->[$idx], 
        $ali->inipos->[$idx],
        $ali->chain1->[$idx], 
        $ali->endpos->[$idx], 
        $ali->chain2->[$idx], 
        $ali->name->[$idx],
        $ali->source->[$idx], 
        $ali->resoln->[$idx], 
        $ali->rfactr->[$idx], 
        $ali->sequence->[$idx] ) 
        = ParseSeq($seq, $inpformat, 'OFF');

      # -- Increment index
      $idx++;
   }

   # -- Adjust the index value since it is always incremented
   #    after parsing a sequence. So it is one more than what
   #    there actually is.
   $idx--;

   # -- Make sure that the length of all sequences in the alignment
   #    is the same
   foreach my $i ( 0 .. $idx ){
      unless ( length($ali->sequence->[$i]) == length($ali->sequence->[0]) ){
         warn "${subname}__E> The length of at least one sequence in the\n"; 
         warn "${subname}__E> alignment is inconsistent\n";
         warn "${subname}__E>    File: $alifile\n";
         warn "${subname}__E>    Code: $ali->idcode->[$i]\n";
         return;
      }
   }

   # -- Reorder the sequence if the vertical (columnwise) format
   #    is chosen. The indices of the seqvert array starts at
   #    1 instead of 0, because it doesn't make too much sense 
   #    to have alignment positions start at 0.
   if ( $outformat =~ /\bVERTICAL\b/i ){
      # -- Get columnwise strings
      foreach my $i ( 0 .. length($ali->sequence->[0])-1 ){
         foreach my $j ( 0 .. $idx ){
            $ali->seqvert->[$i+1] .= substr($ali->sequence->[$j], $i, 1);
         }
      }
   }

   # -- Return the alignment object
   return ( $ali );
}
