#!/usr/bin/perl -w
# File: convertdump
# ALINE - Charles.Bond@uwa.edu.au
# 
# Copyright (c) 2008, Charlie Bond and Alex Schuettelkopf
# 
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 
#     * Redistributions of source code must retain the above copyright
#     * notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
#     * copyright notice, this list of conditions and the following
#     * disclaimer in the documentation and/or other materials provided
#     * with the distribution.  Neither the name of the University of
#     * Dundee, the University of Western Australia nor the names of its
#     * contributors may be used to endorse or promote products derived
#     * from this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#use strict;
use Data::Dumper;
require 'dumpvar.pl';
use Time::HiRes 'time';

my (%par,@seq,@categories,$t,$i);

sub loadpackaline($$$$);
sub savepackaline($$$);
sub n2a64($);

my $sep=chr(3); # must be 0 < x < 32
my $ext=chr(5); # must be 0 < x < 32

print STDERR "Grabbing file...\n";
$t=time; { local $/=undef; $_=<>; } print STDERR "Time = ".(time()-$t)." s\n";

print STDERR "Interpreting file...\n"; $t=time;
if (/^### Aline 1.0, /) {
  eval $_;
  print STDERR "Time = ".(time()-$t)." s\n";

  print STDERR "Saving packed dump...\n"; $t=time;
  print savepackaline(\%par,\@seq,\@categories);
  print STDERR "Time = ".(time()-$t)." s\n";

} elsif (/^Aline 1.0 packed state R001/) {
  if (($i=loadpackaline(\%par,\@seq,\@categories,$_))) { print STDERR "Error loading file ($i).\n"; exit 3; }
  print STDERR "Time = ".(time()-$t)." s\n";

  print STDERR "Saving ASCII dump...\n"; $t=time;
  print '### Aline 1.0, 2004/10/22 C.S.Bond@dundee.ac.uk http://stein.bioch.dundee.ac.uk/~charlie/scripts/'."\n";
  print Data::Dumper->Dump([\%par,\@seq,\@categories],["*par","*seq","*categories"]);
  print STDERR "Time = ".(time()-$t)." s\n";

} else {
  print STDERR "Unknown input format, exiting.\n";
}

exit 0;


# savepackaline takes a pre-munged data thingy

sub savepackaline($$$)
{ my ($pp,$sp,$cp)=@_; local $_;
  my (@v,%keycache,%txcache,$buf1,$i,$j,$k,$c,$ce,$co,$num,$key,$oldnum,%ocache,$x,$okey,$otx,$ott);

  $x="Aline 1.0 packed state R001\nWritten by the Aline dump converter\n";
  @v=(); foreach (keys %$pp) { push @v,$_.$sep.$pp->{$_}; }
  $x.=join($sep,@v)."\n";

  %txcache=(); %ocache=(); %keycache=(); $buf1=''; $okey=0;
  foreach (@$sp) {
    $c={}; %$c=%$_;
    # save {p}, {n} and length of {o}
    $buf1.=$c->{p}.$sep.(defined($c->{n})?$c->{n}:'u').$sep.scalar(@{$c->{o}}).$sep; @v=();

    # as well as {t} items
    foreach $i (keys %{$c->{t}}) { push @v,$i.$sep.$c->{t}{$i}; }
    $buf1.=join($sep,@v)."\n";

    # then remaining non-{e|p|o|t} keys
    $ce=$c->{e}; $co=$c->{o}; foreach $i (qw(n e o p t)) { delete $c->{$i}; }
    @v=(); foreach $i (keys %$c) { push @v,$i.$sep.$c->{$i}; }
    $buf1.='>'.join($sep,@v)."\n";

    # sequence
    $oldnum=0;
    foreach $i (@$ce) {
      $c={}; %$c=%$i; $num=undef;
      if (!exists($c->{seqnumber}))     { $c->{seqnumber}=-1; }
      elsif (!defined($c->{seqnumber})) { $c->{seqnumber}=0; }
      else {
        if (($c->{seqnumber}-$oldnum)==1) { $c->{seqnumber}=1; $oldnum++; }
        else { $num=$c->{seqnumber}; $oldnum=$num; $c->{seqnumber}=2; }
      }
      @v=(); foreach $j (sort keys %$c) { push @v,$j.$sep.$c->{$j}; }
      $j=join($sep,@v);
      if (exists($txcache{$j})) { $key=$txcache{$j}; } else {
        $key=substr($c->{text}.'~',0,1);
        if (exists($keycache{$key})) {
          $keycache{$key}++;
	  if ($keycache{$key}<256) { $key.=chr($keycache{$key}); }
          else { $key.=$ext.$keycache{$key}.$ext; }
        } else { $keycache{$key}=32; $key.=' '; }
        $txcache{$j}=$key;
      }
      $buf1.=$key.(defined($num)?($num.$sep):'');
    }
    $buf1.="\n";

    # object data
    foreach $i (@$co) {
      $c={}; %$c=%$i; $ce=$c->{e};
      $buf1.=$c->{multi}.$sep.$c->{z}.$sep.
             (defined($c->{rev})?"$c->{rev}[0]$sep$c->{rev}[1]":'u'.$sep.'u').$sep.
             (defined($c->{fwd})?"$c->{fwd}[0]$sep$c->{fwd}[1]":'u'.$sep.'u')."\n";
      foreach $j (qw(rev fwd e z multi)) { delete $c->{$j}; }
      @v=(); foreach $j (keys %$c) { push @v,$j.$sep.$c->{$j}; } $buf1.='>'.join($sep,@v)."\n";

      # object items
      $oldnum=0;
      foreach $j (@$ce) {
	$c={}; %$c=%$j; $num=$c->{xpos}; delete $c->{xpos}; $ott='text';
        if ($c->{type}!~/Graph$/) { $ott='otext'; }
        $otx=exists($c->{$ott})?$c->{$ott}:undef; delete $c->{$ott};
        @v=(); foreach $k (sort keys %$c) { push @v,$k.$sep.$c->{$k}; } $k=join($sep,@v);
        if (!exists($ocache{$k})) {
          $ocache{$k}=n2a64($okey); $okey++;
        }
        $buf1.="$ocache{$k}$sep".((($num-$oldnum)==1)?'+':$num).(defined($otx)?($ext.$otx):'').$sep;
        $oldnum=$num;
      }
      $buf1=~s/$sep$/\n/;
    }
  }
  foreach $i (keys %txcache) { $x.=$txcache{$i}.$sep.$i."\n"; } $x.="$ext\n";
  foreach $i (keys %ocache) { $x.=$ocache{$i}.$sep.$i."\n"; } $x.="$ext\n";
  $buf1.="$ext\n";
  foreach $i (0..($#$cp-1)) {
    @v=($cp->[$i][0],@{$cp->[$i][1]});
    if ($i) { for ($j=1;$j<=@{$cp->[$i][1]};$j++) { if ($v[$j] eq $cp->[$i-1][1][$j-1]) { $v[$j]=$ext; } } }
    if ($v[2] eq $v[1]) { $v[2]='='; } $buf1.=join($sep,@v)."\n";
  }
  return $x.$buf1;
}

sub n2a64($)
{ my $o=''; my $v=shift; my ($i);
  return '0' if !$v;
  while ($v) {
    $i=$v&63; $v>>=6;
    if ($i<10)    { $o=chr(48+$i).$o; }
    elsif ($i<38) { $o=chr(55+$i).$o; }
    else          { $o=chr(59+$i).$o; }
  }
  return $o;
}

sub loadpackaline($$$$)
{ my ($pp,$sp,$cp,$d)=@_; my ($i,$j,$key,%ocache,%txcache,@v,@lines,$lo,$e,$t,$oldnum,$ott,$tv);
  if ($d!~s/^Aline 1\.0 packed state R([0-9]+)[\r\n]+[^\r\n]*[\r\n]+//) { return 1; } $i=$1+0;
  return 2 if $i>1;

  %$pp=(); @$sp=(); @$cp=(); %txcache=(); %ocache=();
  @lines=split(/[\r\n]+/,$d);

  return 3 if !@lines;
  @v=split(/$sep/,shift(@lines)); while (@v>1) { $j=shift @v; $pp->{$j}=0+shift(@v); }

  # read caches
fcache1:
  return 4 if !defined($i=shift(@lines));
  if ($i ne $ext) {
    @v=split(/$sep/,$i); $key=shift @v; $txcache{$key}={};
    while (@v>1) { $j=shift @v; $txcache{$key}{$j}=shift(@v); }
    goto fcache1;
  }
fcache2:
  return 5 if !defined($i=shift(@lines));
  if ($i ne $ext) {
    @v=split(/$sep/,$i); $key=shift @v; $ocache{$key}={};
    while (@v>1) { $j=shift @v; $ocache{$key}{$j}=shift(@v); }
    goto fcache2;
  }

  while (@lines && ($lines[0] ne $ext)) {
    # new sequence
    @v=split(/$sep/,shift(@lines));
    return 6 if @v<3; $lo=$v[2];
    $e=[]; $t={}; push @$sp,{p=>$v[0],n=>(($v[1] eq 'u')?undef:($v[1]+0)),t=>$t,e=>$e,o=>[]};
    splice @v,0,3; while (@v>1) { $j=shift @v; $t->{$j}=shift @v; }
    return 7 if !defined($i=shift(@lines)); return 8 if $i!~s/^>//;
    @v=split(/$sep/,$i); while (@v>1) { $j=shift @v; $sp->[-1]{$j}=shift @v; }
    return 9 if !defined($i=shift(@lines));
    $oldnum=0;
    while (length($i)>1) {
      $key=substr($i,0,2); $i=substr($i,2);
      if (substr($key,1,1) eq $ext) { return 10 if $i!~s/^(.+?$ext)//; $key.=$1; }
      return 11 if !exists($txcache{$key});
      push @$e,{}; %{$e->[-1]}=%{$txcache{$key}};
      if ($e->[-1]{seqnumber}==-1) { delete $e->[-1]{seqnumber}; }
      elsif ($e->[-1]{seqnumber}==0) { $e->[-1]{seqnumber}=undef; }
      elsif ($e->[-1]{seqnumber}==1) { $oldnum++; $e->[-1]{seqnumber}=$oldnum; }
      else { return 12 if $i!~s/^(.+?)$sep//; $oldnum=$1+0.0; $e->[-1]{seqnumber}=$oldnum; }
    }
    return 13 if @lines<(3*$lo);
    while ($lo) {
      @v=split(/$sep/,shift @lines); $t=$sp->[-1]{o}; $e=[]; return 14 if @v!=6;
      push @$t,{}; $t=$t->[-1]; %$t=(multi=>$v[0],z=>$v[1],rev=>[$v[2],$v[3]],fwd=>[$v[4],$v[5]],e=>$e);
      if ($t->{rev}[0] eq 'u') { $t->{rev}=undef; } if ($t->{fwd}[0] eq 'u') { $t->{fwd}=undef; }
      $i=shift @lines; return 15 if $i!~s/^>//; @v=split(/$sep/,$i);
      while (@v>1) { $j=shift @v; $t->{$j}=shift(@v); }
      @v=split(/$sep/,shift @lines); $oldnum=0;
      while (@v>1) {
        $key=shift @v; push @$e,{}; return 16 if !exists($ocache{$key});
        %{$e->[-1]}=%{$ocache{$key}}; $j=shift @v;
	if ($j=~s/$ext(.*)$//) {
          $tv=$1; $ott='text'; if ($e->[-1]{type}!~/Graph$/) { $ott='otext'; } $e->[-1]{$ott}=$tv;
        }
        if ($j eq '+') { $oldnum++; } else { $oldnum=$j; }
	$e->[-1]{xpos}=$oldnum;
      }
      $lo--;
    }
  }
  return 17 if !@lines; shift @lines;
  foreach $i (@lines) {
    @v=split(/$sep/,$i); if ($v[2] eq '=') { $v[2]=$v[1]; } push @$cp,[shift(@v),[]]; @{$cp->[-1][1]}=@v;
    for ($j=0;$j<@{$cp->[-1][1]};$j++) { if ($cp->[-1][1][$j] eq $ext) { $cp->[-1][1][$j]=$cp->[-2][1][$j]; } }
  }
  push @$cp,[100,['','','','','','','','']];
  return 0;
}
