chiark - git - mdw - wordchain/blob - summarize

   1 #! /usr/bin/perl -w
   2
   3 use autodie;
   4 use strict;
   5
   6 our %IMPL =
   7   ("c++"        => ["map", "uomap"],
   8    "golang"     => undef,
   9    "libavl"     => ["avl", "pavl", "rtavl", "tavl",
  10                     "rb", "prb", "rtrb", "trb"],
  11    "lisp"       => ["ccl", "clisp", "cmucl", "ecl", "sbcl"],
  12    "mLib"       => ["sym"],
  13    "perl"       => undef,
  14    "python"     => undef,
  15    "qptrie"     => ["qp-fanf", "qp-mdw", "qp-list",
  16                     "fn-fanf", "fn-mdw", "fn-list"],
  17    "rust"       => ["btree", "hash"],
  18    "sgt"        => ["tree234"],
  19    "xyla"       => ["avl", "rb", "splay", "treap"]);
  20 our @IMPL;
  21 for my $base (sort keys %IMPL) {
  22   if (!defined $IMPL{$base}) { push @IMPL, $base; }
  23   else { for my $var ($IMPL{$base}->@*) { push @IMPL, "$base-$var"; } }
  24 }
  25
  26 our @DICT = ("small", "medium", "large", "huge", "insane");
  27
  28 our %WC;
  29 our %BYTES;
  30 for my $dict (@DICT) {
  31   open my $f, "<", "DICT.$dict";
  32   my $wc = 0; my $bytes = 0;
  33   while (<$f>) { chomp; $wc++; $bytes += length; }
  34   $WC{$dict} = $wc; $BYTES{$dict} = $bytes;
  35   close $f;
  36 }
  37
  38 sub closeish ($$) {
  39   my ($x, $y) = @_;
  40
  41   return abs($x - $y) < 1e-3;
  42 }
  43
  44 our %DATA;
  45 our %SLOW;
  46 for my $dict (@DICT) {
  47   open my $f, "<", "DATA.$dict";
  48   my %seen = map { $_ => 0 } @IMPL;
  49   my $wc = $WC{$dict};
  50   while (<$f>) {
  51     my ($impl, $data, $stat) = m{ ^ (\S+)
  52                                     ((?: \s+ [0-9.]+)*) \s* \;
  53                                     ((?: \s+ [0-9.]+){6}) \s* $ }x
  54       or die "bad line";
  55     my @data = sort { $a <=> $b } map { $_ + 0.0 } split ' ', $data;
  56     my @stat = map { $_ + 0.0 } split ' ', $stat;
  57     my $i = @data/2;
  58     my $mid = @data%2 ? ($data[$i] + $data[$i + 1])/2 : $data[$i];
  59     my $sum = 0; for my $x (@data) { $sum += $x; }
  60     my $avg = $sum/@data;
  61     closeish($mid, $stat[2]) && closeish($avg, $stat[3])
  62       or die "stat miscalculation";
  63     if ($seen{$impl}) { die "duplicate entry for `$impl'"; }
  64     elsif (exists $seen{$impl}) { $seen{$impl} = 1; }
  65     else { die "unknown implementation `$impl'" }
  66     my $unit = 1000000*$mid/$wc;
  67     if ($unit >= 1200) { $SLOW{$impl} = 1; }
  68     push $DATA{$impl}->@*, $mid/$wc;
  69   }
  70   close $f;
  71   for my $impl (@IMPL)
  72     { die "missing implementation `$impl'" unless $seen{$impl}; }
  73 }
  74
  75 { open my $f, ">summary.dat";
  76   printf $f "%-15s ", "impl";
  77   for my $dict (@DICT) { printf $f " %7s", $dict }
  78   print $f "\n";
  79
  80   IMPL: for my $impl (@IMPL) {
  81     next IMPL if $SLOW{$impl};
  82     printf $f  "%-15s ", $impl;
  83     for my $t ($DATA{$impl}->@*) { printf $f " %7.2f", 1e6*$t; }
  84     print $f "\n";
  85   }
  86   close $f;
  87 }
  88
  89 our %RANK;
  90 for (my $i = 0; $i < @DICT; $i++) {
  91   my @pairs;
  92   for my $impl (keys %DATA) { push @pairs, [$impl, $DATA{$impl}->[$i]]; }
  93   my $k = 0;
  94   for my $pair (sort { $a->[1] <=> $b->[1] } @pairs)
  95     { push $RANK{$pair->[0]}->@*, ++$k; }
  96 }
  97
  98 { open my $f, ">sumtab.tex";
  99   printf $f "\\begin{tabular}[C]{l*%d{r\@{ }r}} \\hlx*{hv}\n",
 100     scalar @DICT;
 101   print $f "  \\textbf{Impl}\n";
 102   for my $dict (@DICT) {
 103     print $f "    & \\multicolumn{2}{c}{\\textbf{\\textsf{$dict}}}\n";
 104   }
 105   print $f "    \\\\\n";
 106
 107   sub row ($) {
 108     my ($impl) = @_;
 109     print $f "  \\textsf{$impl}\n";
 110     for (my $i = 0; $i < @DICT; $i++) {
 111       printf $f "    & %.0f & (%d)\n",
 112         1e6*$DATA{$impl}->[$i],
 113         $RANK{$impl}->[$i];
 114     }
 115     print $f "    \\\\\n";
 116   }
 117
 118   for my $base (sort keys %IMPL) {
 119     print $f "    \\hlx{vhv}\n";
 120     if (!defined $IMPL{$base}) { row $base; }
 121     else { for my $var ($IMPL{$base}->@*) { row "$base-$var"; } }
 122   }
 123   print $f "  \\hlx*{vh}\n";
 124   print $f "\\end{tabular}\n";
 125   close $f;
 126 }
 127
 128 for my $dict (@DICT) {
 129   printf "%-7s %d %d\n", $dict, $WC{$dict}, $BYTES{$dict};
 130 }
 131
 132 IMPL: for my $impl (@IMPL) {
 133   DICT: for my $dict (@DICT) {
 134     -e "massif.$impl.$dict" or next IMPL;
 135     my $peak_alloc; my $peak_waste;
 136
 137     open my $f, "<", "massif.$impl.$dict";
 138     my $alloc; my $waste; my $snap = "unset";
 139     while (<$f>) {
 140       chomp;
 141       if (/^snapshot=/) {
 142         if ($snap eq "peak") { $peak_alloc = $alloc; $peak_waste = $waste; }
 143         $alloc = $waste = $snap = undef;
 144       } elsif (/^mem_heap_B=(\d+)$/) { $alloc = $1; }
 145       elsif (/^mem_heap_extra_B=(\d+)$/) { $waste = $1; }
 146       elsif (/^heap_tree=(\w+)$/) { $snap = $1; }
 147     }
 148     if ($snap eq "peak") { $peak_alloc = $alloc; $peak_waste = $waste; }
 149     close $f;
 150     defined $peak_alloc or next DICT;
 151
 152     printf "%-15s %-7s %.3f %.3f\n",
 153       $impl, $dict,
 154       ($peak_alloc - $BYTES{$dict})/$WC{$dict},
 155       $peak_waste/$WC{$dict};
 156   }
 157 }