# This file is part of ModPipe, Copyright 1997-2020 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

package MPLib::Serialize;
require Exporter;
@ISA    = qw(Exporter);
@EXPORT = qw(WriteHitsFile ReadHitsFile WriteModelsFile ReadModelsFile
             CountHitsFile CountModelsFile);

use Class::Struct;
use MPLib::Version;
use Types::Model;
use Types::Alignment;
use Types::Score;
use Types::GA341;
use Types::TSVMod;
use Types::Hit;
use Types::Sequence;
use Types::Template;
use strict;

sub wrap_join {
  my ($sep, $first_indent, $other_indent, $width, @elements) = @_;

  my $str = "";
  my $current_indent = $first_indent;
  my $i = scalar(@elements);
  for my $element (@elements) {
    my $out = ($i == 1 ? $element : "$element$sep");
    $i--;
    if ($current_indent + length($out) > $width
        && $other_indent + length($out) < $width) {
      $current_indent = $other_indent;
      $str .= "\n" . " " x $other_indent;
    }
    $str .= $out;
    $current_indent += length($out);
  }
  return $str;
}

sub dump_yaml {
  my ($fh, $data) = @_;
  dump_yaml_object($fh, $data, 0);
}

sub yaml_repr {
  my ($obj, $first_indent, $indent, $folded) = @_;
  my $repr = sprintf "%s", $obj;
  $repr =~ s/^\s+//;
  $repr =~ s/\s+$//;
  if ($repr =~ /^ARRAY\(0x[\da-f]+\)$/) {
    if (!$folded && scalar(@$obj) == 2
        && sprintf "%s", $obj->[0] =~ /^\s*-?\d+\w?\s*$/) {
      my @repr_objs;
      for my $element (@$obj) {
        push @repr_objs, yaml_repr($element, 0, 0, $folded);
      }
      return "[" . join(", ", @repr_objs) . "]";
    } else {
      my @repr_objs;
      for my $element (@$obj) {
        push @repr_objs, yaml_repr($element, $indent, $indent, $folded);
      }
      my $in = "\n" . " " x ($indent - 3) . "- ";
      return $in . join($in, @repr_objs);
    }
  } elsif ($repr =~ /(\w+)=HASH\(0x[\da-f]+\)$/) {
    my $structname = $1;
    if ($structname ne "Template" and $structname ne "Score") {
      my @hash_elements = get_yaml_hash_elements($obj, 0, $folded);
      $first_indent += 5 + length($structname) * 2;
      return "!<$structname> {" .
             wrap_join(", ", $first_indent, $indent, 80, @hash_elements) . "}";
    } else {
      my @hash_elements = get_yaml_hash_elements($obj, $indent - 3, $folded);
      my $ind = "\n" . " " x ($indent - 1);
      return "!<$structname>$ind" .  join($ind, @hash_elements);
    }
  }

  if ($repr =~ /^-?\d+\.\d+(e-?\d+)?$/) {
    # Float
    return $obj + 0.;
  } elsif ($repr =~ /[: {}<>]/ or $repr eq "" or $repr =~ /^0\d+$/) {
    # Quote empty strings, those containing YAML control characters or those
    # starting with a zero
    return "'" . $obj . "'";
  } elsif ($repr =~ /^-?\d+$/) {
    # Int, as long as the string representation of the int matches that of
    # the original object (this is not true for particularly long strings of
    # digits, which would be represented in standard index notation)
    my $intrepr = sprintf("%s", $obj + 0);
    if ($intrepr eq $repr) {
      return $obj + 0;
    } else {
      return $obj;
    }
  } else {
    return $obj;
  }
}

sub dump_yaml_object {
  my ($fh, $data, $first_indent) = @_;

  my $repr = sprintf "%s", $data;
  if ($repr =~ /^ARRAY\(0x[\da-f]+\)$/) {
    for my $element (@$data) {
      dump_yaml_object($fh, $element, "- ");
    }
  } elsif ($repr =~ /(\w+)=HASH\(0x[\da-f]+\)$/) {
    my $structname = $1;
    my $folded = (scalar(keys %$data) < 3);
    my @hash_elements = get_yaml_hash_elements($data, length($first_indent),
                                               $folded);
    print $fh "$first_indent!<$structname>";
    if ($folded) {
      print $fh " {" . join(", ", @hash_elements) . "}\n";
    } else {
      print $fh "\n";
      my $indent = " " x length($first_indent);
      for my $element (@hash_elements) {
        print $fh $indent . $element . "\n";
      }
    }
  } else {
    die "Unsupported data type $data";
  }
}

sub get_yaml_hash_elements {
  my ($data, $indent, $folded) = @_;
  my @elements;
  for my $key (keys %$data) {
    my $k = $key;
    $k =~ s/^.*:://;
    my $v = $data->{$key};
    if (!defined($v)) {
      next;
    }
    push @elements, "$k: " . yaml_repr($v, $indent + 1 + length($k),
                                       $indent + 3, $folded);
  }
  # Sort the hash elements to give more predictable output
  return sort @elements;
}

sub get_folded_structure {
  my ($structname, $lines, $index, $start) = @_;
  my $struc = $structname->new();
  my $line = $start;
  while (1) {
    while ($line !~ /^\s*$/) {
      if ($line =~ s/^\s*(\w+):\s*\[(-?\d+\w?),\s*(-?\d+\w?)\s*\]\s*([,}])//) {
        my ($key, $val1, $val2, $ending) = ($1, $2, $3, $4);
        $struc->{$structname . "::" . $key} = [ $val1, $val2 ];
        if ($ending eq '}') {
          return $struc, $line;
        }
      } elsif ($line =~ s/^\s*(\w+):\s*!<(\w+)>\s*{//) {
        my ($key, $subname) = ($1, $2);
        my $s;
        ($s, $line) = get_folded_structure($subname, $lines, $index, $line);
        $struc->{$structname . "::" . $key} = $s;
        $line =~ s/^\s*,//;
        if ($line =~ s/^\s*}//) {
          return $struc, $line;
        }
      } elsif ($line =~ s/^\s*(\w+):\s*'([^']+)'\s*([,}])// ||
          $line =~ s/^\s*(\w+):\s*([^,}]+)\s*([,}])//) {
        my ($key, $value, $ending) = ($1, $2, $3);
        $struc->{$structname . "::" . $key} = $value;
        if ($ending eq '}') {
          return $struc, $line;
        }
      } else {
        die "Could not match word in line: $line";
      }
    }
    $$index++;
    $line = $lines->[$$index];
  }
}

sub get_full_structure {
  my ($structname, $lines, $index, $indent) = @_;
  my $struc = $structname->new();
  while ($$index < scalar(@$lines)) {
    my $line = $lines->[$$index];
    if (length($line) < $indent || substr($line, 0, $indent) ne " " x $indent) {
      $$index--; # force the line to be reparsed at lower indentation level
      last;
    }
    if ($line =~ /^\s+(\w+):\s*\[(-?\d+\w?),\s*(-?\d+\w?)\s*\]\s*$/) {
      # List of ints with optional trailing insertion code
      $struc->{$structname . "::" . $1} = [$2, $3];
    } elsif ($line =~ /^\s+(\w+):\s*(-?\d+)\s*$/) {
      if ($2 + 0 eq $2) {
        # Int
        $struc->{$structname . "::" . $1} = $2 + 0;
      } else {
        # Int that would be represented with standard index notation; store as 
        # string so as not to lose precision
        $struc->{$structname . "::" . $1} = $2;
      } 
    } elsif ($line =~ /^\s+(\w+):\s*(-?\d+\.\d+(?:e-?\d+)?)\s*$/) {
      # Float
      $struc->{$structname . "::" . $1} = $2 + 0.0;
    } elsif ($line =~ /^\s+(\w+):\s*$/) {
      my $name = $1;
      $$index++;
      # Substructure
      my $s = parse_yaml($lines, $indent, $index);
      $struc->{$structname . "::" . $name} = $s;
    } elsif ($line =~ /^\s+(\w+):\s*!<(\w+)>\s*$/) {
      my ($name, $substruc) = ($1, $2);
      $$index++;
      # Substructure
      my $s = get_full_structure($substruc, $lines, $index, $indent + 2);
      $struc->{$structname . "::" . $name} = $s;
    } elsif ($line =~ /^\s+(\w+):\s*'([^']+)'\s*$/) {
      # Quoted string
      $struc->{$structname . "::" . $1} = $2;
    } elsif ($line =~ /^\s+(\w+):\s*(\S+)\s*$/) {
      # Unquoted string
      $struc->{$structname . "::" . $1} = $2;
    } elsif ($line =~ /^\s+(\w+):\s*!<(\w+)>\s*{(.*)$/) {
      my ($s, $newline) = get_folded_structure($2, $lines, $index, $3);
      # Substructure
      $struc->{$structname . "::" . $1} = $s;
    }
    $$index++;
  }
  return $struc;
}

sub parse_yaml {
  my ($lines, $indent, $index) = @_;
  my $yaml_list;
  my $indent_prefix = " " x $indent;
  while ($$index < scalar(@$lines)) {
    my $line = $lines->[$$index];
    if ($line =~ /^${indent_prefix}- !<(\w+)>\s+{(.*)$/) {
      my ($s, $newline) = get_folded_structure($1, $lines, $index, $2);
      push @$yaml_list, $s;
    } elsif ($line =~ /^${indent_prefix}- !<(\w+)>\s*$/) {
      $$index++;
      push @$yaml_list, get_full_structure($1, $lines, $index, $indent + 2);
    } elsif ($indent > 0) {
      # Force this line to be reparsed by caller
      $$index--;
      last;
    } else {
      die "Cannot parse YAML line $line\n";
    }
    $$index++;
  }
  return $yaml_list;
}

sub read_yaml_file {
  my ($fh) = @_;
  my @lines;
  while (<$fh>) {
    chomp;
    push @lines, $_;
  }

  my $index = 0;
  return parse_yaml(\@lines, 0, \$index);
}

sub count_yaml_file {
  my ($fh, $count_line) = @_;
  my $count = 0;
  while (<$fh>) {
    chomp;
    if ($_ eq $count_line) {
      $count++;
    }
  }
  return $count;
}

struct ModPipeVersion => { modpipe => '$', file => '$' };

sub WriteHitsFile {
  my ($hits, $fh) = @_;
  my $version = ModPipeVersion->new(modpipe => GetVersion(), file => 1);
  my @yaml_objs;
  push @yaml_objs, $version;
  push @yaml_objs, @$hits;
  dump_yaml($fh, \@yaml_objs);
}

sub ReadHitsFile {
  my ($fh) = @_;
  my $hits = read_yaml_file($fh);
  shift @$hits;
  return $hits;
}

sub WriteModelsFile {
  my ($models, $fh) = @_;
  my $version = ModPipeVersion->new(modpipe => GetVersion(), file => 1);
  my @yaml_objs;
  push @yaml_objs, $version;
  push @yaml_objs, @$models;
  dump_yaml($fh, \@yaml_objs);
}

sub ReadModelsFile {
  my ($fh) = @_;
  my $models = read_yaml_file($fh);
  shift @$models;
  return $models;
}

sub CountModelsFile {
  my ($fh) = @_;
  return count_yaml_file($fh, '- !<Model>')
}

sub CountHitsFile {
  my ($fh) = @_;
  return count_yaml_file($fh, '- !<Hit>')
}

1;
