#! /usr/bin/perl -w

use autodie;
use strict;

our %IMPL =
  ("c++"	=> ["map", "uomap"],
   "golang"	=> undef,
   "libavl"	=> ["avl", "pavl", "rtavl", "tavl",
		    "rb", "prb", "rtrb", "trb"],
   "lisp"	=> ["ccl", "clisp", "cmucl", "ecl", "sbcl"],
   "mLib"	=> ["sym"],
   "perl"	=> undef,
   "python"	=> undef,
   "qptrie"	=> ["qp-fanf", "qp-mdw", "qp-list",
		    "fn-fanf", "fn-mdw", "fn-list"],
   "rust"	=> ["btree", "hash"],
   "sgt"	=> ["tree234"],
   "xyla"	=> ["avl", "rb", "splay", "treap"]);
our @IMPL;
for my $base (sort keys %IMPL) {
  if (!defined $IMPL{$base}) { push @IMPL, $base; }
  else { for my $var ($IMPL{$base}->@*) { push @IMPL, "$base-$var"; } }
}

our @DICT = ("small", "medium", "large", "huge", "insane");

our %WC;
our %BYTES;
for my $dict (@DICT) {
  open my $f, "<", "DICT.$dict";
  my $wc = 0; my $bytes = 0;
  while (<$f>) { chomp; $wc++; $bytes += length; }
  $WC{$dict} = $wc; $BYTES{$dict} = $bytes;
  close $f;
}

sub closeish ($$) {
  my ($x, $y) = @_;

  return abs($x - $y) < 1e-3;
}

our %DATA;
our %SLOW;
for my $dict (@DICT) {
  open my $f, "<", "DATA.$dict";
  my %seen = map { $_ => 0 } @IMPL;
  my $wc = $WC{$dict};
  while (<$f>) {
    my ($impl, $data, $stat) = m{ ^ (\S+)
				    ((?: \s+ [0-9.]+)*) \s* \;
				    ((?: \s+ [0-9.]+){6}) \s* $ }x
      or die "bad line";
    my @data = sort { $a <=> $b } map { $_ + 0.0 } split ' ', $data;
    my @stat = map { $_ + 0.0 } split ' ', $stat;
    my $i = @data/2;
    my $mid = @data%2 ? ($data[$i] + $data[$i + 1])/2 : $data[$i];
    my $sum = 0; for my $x (@data) { $sum += $x; }
    my $avg = $sum/@data;
    closeish($mid, $stat[2]) && closeish($avg, $stat[3])
      or die "stat miscalculation";
    if ($seen{$impl}) { die "duplicate entry for `$impl'"; }
    elsif (exists $seen{$impl}) { $seen{$impl} = 1; }
    else { die "unknown implementation `$impl'" }
    my $unit = 1000000*$mid/$wc;
    if ($unit >= 1200) { $SLOW{$impl} = 1; }
    push $DATA{$impl}->@*, $mid/$wc;
  }
  close $f;
  for my $impl (@IMPL)
    { die "missing implementation `$impl'" unless $seen{$impl}; }
}

{ open my $f, ">summary.dat";
  printf $f "%-15s ", "impl";
  for my $dict (@DICT) { printf $f " %7s", $dict }
  print $f "\n";

  IMPL: for my $impl (@IMPL) {
    next IMPL if $SLOW{$impl};
    printf $f  "%-15s ", $impl;
    for my $t ($DATA{$impl}->@*) { printf $f " %7.2f", 1e6*$t; }
    print $f "\n";
  }
  close $f;
}

our %RANK;
for (my $i = 0; $i < @DICT; $i++) {
  my @pairs;
  for my $impl (keys %DATA) { push @pairs, [$impl, $DATA{$impl}->[$i]]; }
  my $k = 0;
  for my $pair (sort { $a->[1] <=> $b->[1] } @pairs)
    { push $RANK{$pair->[0]}->@*, ++$k; }
}

{ open my $f, ">sumtab.tex";
  printf $f "\\begin{tabular}[C]{l*%d{r\@{ }r}} \\hlx*{hv}\n",
    scalar @DICT;
  print $f "  \\textbf{Impl}\n";
  for my $dict (@DICT) {
    print $f "    & \\multicolumn{2}{c}{\\textbf{\\textsf{$dict}}}\n";
  }
  print $f "    \\\\\n";

  sub row ($) {
    my ($impl) = @_;
    print $f "  \\textsf{$impl}\n";
    for (my $i = 0; $i < @DICT; $i++) {
      printf $f "    & %.0f & (%d)\n",
	1e6*$DATA{$impl}->[$i],
	$RANK{$impl}->[$i];
    }
    print $f "    \\\\\n";
  }

  for my $base (sort keys %IMPL) {
    print $f "    \\hlx{vhv}\n";
    if (!defined $IMPL{$base}) { row $base; }
    else { for my $var ($IMPL{$base}->@*) { row "$base-$var"; } }
  }
  print $f "  \\hlx*{vh}\n";
  print $f "\\end{tabular}\n";
  close $f;
}

for my $dict (@DICT) {
  printf "%-7s %d %d\n", $dict, $WC{$dict}, $BYTES{$dict};
}

IMPL: for my $impl (@IMPL) {
  DICT: for my $dict (@DICT) {
    -e "massif.$impl.$dict" or next IMPL;
    my $peak_alloc; my $peak_waste;

    open my $f, "<", "massif.$impl.$dict";
    my $alloc; my $waste; my $snap = "unset";
    while (<$f>) {
      chomp;
      if (/^snapshot=/) {
	if ($snap eq "peak") { $peak_alloc = $alloc; $peak_waste = $waste; }
	$alloc = $waste = $snap = undef;
      } elsif (/^mem_heap_B=(\d+)$/) { $alloc = $1; }
      elsif (/^mem_heap_extra_B=(\d+)$/) { $waste = $1; }
      elsif (/^heap_tree=(\w+)$/) { $snap = $1; }
    }
    if ($snap eq "peak") { $peak_alloc = $alloc; $peak_waste = $waste; }
    close $f;
    defined $peak_alloc or next DICT;

    printf "%-15s %-7s %.3f %.3f\n",
      $impl, $dict,
      ($peak_alloc - $BYTES{$dict})/$WC{$dict},
      $peak_waste/$WC{$dict};
  }
}