chiark - git - mdw - wordchain/blob - summarize

   1 #! /usr/bin/perl -w
   2
   3 use autodie;
   4 use strict;
   5
   6 our %IMPL =
   7   ("c++"        => ["map", "uomap"],
   8    "golang"     => undef,
   9    "libavl"     => ["avl", "pavl", "rtavl", "tavl",
  10                     "rb", "prb", "rtrb", "trb"],
  11    "lisp"       => ["ccl", "clisp", "cmucl", "ecl", "sbcl"],
  12    "mLib"       => ["sym"],
  13    "perl"       => undef,
  14    "posix"      => ["hsearch1", "hsearchg", "tsearch"],
  15    "python"     => undef,
  16    "qptrie"     => ["qp-fanf", "qp-mdw", "qp-list",
  17                     "fn-fanf", "fn-mdw", "fn-list"],
  18    "rust"       => ["btree", "hash"],
  19    "sgt"        => ["tree234"],
  20    "xyla"       => ["avl", "rb", "splay", "treap"]);
  21 our @IMPL;
  22 for my $base (sort keys %IMPL) {
  23   if (!defined $IMPL{$base}) { push @IMPL, $base; }
  24   else { for my $var ($IMPL{$base}->@*) { push @IMPL, "$base-$var"; } }
  25 }
  26
  27 our @DICT = ("small", "medium", "large", "huge", "insane");
  28
  29 our %WC;
  30 our %BYTES;
  31 for my $dict (@DICT) {
  32   open my $f, "<", "DICT.$dict";
  33   my $wc = 0; my $bytes = 0;
  34   while (<$f>) { chomp; $wc++; $bytes += length; }
  35   $WC{$dict} = $wc; $BYTES{$dict} = $bytes;
  36   close $f;
  37 }
  38
  39 sub closeish ($$) {
  40   my ($x, $y) = @_;
  41
  42   return abs($x - $y) < 1e-3;
  43 }
  44
  45 our %DATA;
  46 our %SLOW;
  47 for my $dict (@DICT) {
  48   open my $f, "<", "DATA.$dict";
  49   my %seen = map { $_ => 0 } @IMPL;
  50   my $wc = $WC{$dict};
  51   while (<$f>) {
  52     my ($impl, $data, $stat) = m{ ^ (\S+)
  53                                     ((?: \s+ [0-9.]+)*) \s* \;
  54                                     ((?: \s+ [0-9.]+){6}) \s* $ }x
  55       or die "bad line";
  56     my @data = sort { $a <=> $b } map { $_ + 0.0 } split ' ', $data;
  57     my @stat = map { $_ + 0.0 } split ' ', $stat;
  58     my $i = @data/2;
  59     my $mid = @data%2 ? ($data[$i] + $data[$i + 1])/2 : $data[$i];
  60     my $sum = 0; for my $x (@data) { $sum += $x; }
  61     my $avg = $sum/@data;
  62     closeish($mid, $stat[2]) && closeish($avg, $stat[3])
  63       or die "stat miscalculation";
  64     if ($seen{$impl}) { die "duplicate entry for `$impl'"; }
  65     elsif (exists $seen{$impl}) { $seen{$impl} = 1; }
  66     else { die "unknown implementation `$impl'" }
  67     my $unit = 1000000*$mid/$wc;
  68     if ($unit >= 1200) { $SLOW{$impl} = 1; }
  69     push $DATA{$impl}->@*, $mid/$wc;
  70   }
  71   close $f;
  72   for my $impl (@IMPL)
  73     { die "missing implementation `$impl'" unless $seen{$impl}; }
  74 }
  75
  76 { open my $f, ">summary.dat";
  77   printf $f "%-15s ", "impl";
  78   for my $dict (@DICT) { printf $f " %7s", $dict }
  79   print $f "\n";
  80
  81   IMPL: for my $impl (@IMPL) {
  82     next IMPL if $SLOW{$impl};
  83     printf $f  "%-15s ", $impl;
  84     for my $t ($DATA{$impl}->@*) { printf $f " %7.2f", 1e6*$t; }
  85     print $f "\n";
  86   }
  87   close $f;
  88 }
  89
  90 our %RANK;
  91 for (my $i = 0; $i < @DICT; $i++) {
  92   my @pairs;
  93   for my $impl (keys %DATA) { push @pairs, [$impl, $DATA{$impl}->[$i]]; }
  94   my $k = 0;
  95   for my $pair (sort { $a->[1] <=> $b->[1] } @pairs)
  96     { push $RANK{$pair->[0]}->@*, ++$k; }
  97 }
  98
  99 { open my $f, ">sumtab.tex";
 100   printf $f "\\begin{tabular}[C]{l*%d{r\@{ }r}} \\hlx*{hv}\n",
 101     scalar @DICT;
 102   print $f "  \\textbf{Impl}\n";
 103   for my $dict (@DICT) {
 104     print $f "    & \\multicolumn{2}{c}{\\textbf{\\textsf{$dict}}}\n";
 105   }
 106   print $f "    \\\\\n";
 107
 108   sub row ($) {
 109     my ($impl) = @_;
 110     print $f "  \\textsf{$impl}\n";
 111     for (my $i = 0; $i < @DICT; $i++) {
 112       printf $f "    & %.0f & (%d)\n",
 113         1e6*$DATA{$impl}->[$i],
 114         $RANK{$impl}->[$i];
 115     }
 116     print $f "    \\\\\n";
 117   }
 118
 119   for my $base (sort keys %IMPL) {
 120     print $f "    \\hlx{vhv}\n";
 121     if (!defined $IMPL{$base}) { row $base; }
 122     else { for my $var ($IMPL{$base}->@*) { row "$base-$var"; } }
 123   }
 124   print $f "  \\hlx*{vh}\n";
 125   print $f "\\end{tabular}\n";
 126   close $f;
 127 }
 128
 129 for my $dict (@DICT) {
 130   printf "%-7s %d %d\n", $dict, $WC{$dict}, $BYTES{$dict};
 131 }
 132
 133 IMPL: for my $impl (@IMPL) {
 134   DICT: for my $dict (@DICT) {
 135     -e "massif.$impl.$dict" or next IMPL;
 136     my $peak_alloc; my $peak_waste;
 137
 138     open my $f, "<", "massif.$impl.$dict";
 139     my $alloc; my $waste; my $snap = "unset";
 140     while (<$f>) {
 141       chomp;
 142       if (/^snapshot=/) {
 143         if ($snap eq "peak") { $peak_alloc = $alloc; $peak_waste = $waste; }
 144         $alloc = $waste = $snap = undef;
 145       } elsif (/^mem_heap_B=(\d+)$/) { $alloc = $1; }
 146       elsif (/^mem_heap_extra_B=(\d+)$/) { $waste = $1; }
 147       elsif (/^heap_tree=(\w+)$/) { $snap = $1; }
 148     }
 149     if ($snap eq "peak") { $peak_alloc = $alloc; $peak_waste = $waste; }
 150     close $f;
 151     defined $peak_alloc or next DICT;
 152
 153     printf "%-15s %-7s %.3f %.3f\n",
 154       $impl, $dict,
 155       ($peak_alloc - $BYTES{$dict})/$WC{$dict},
 156       $peak_waste/$WC{$dict};
 157   }
 158 }