#!/usr/bin/perl -w
+#
+# i18n-diff-auditor
+# Copyright (C)2018 Ian Jackson
+# GPLv3+, NO WARRANTY, see below.
+#
+#
+# Usage:
+# something like this
+# git-log -n1 -p | ./i18n-diff-auditor -D 2>&1 |less -j10 +/'^!.*'
+#
+# -D is for debug. Currently only one level.
+#
+# Output is the relevant diff hunks, with each line prepended with
+# space for ok lines and ! for questionable ones, and with relevant
+# diff lines prepended with lines starting !! (and lines starting #
+# for debug output), so ovrall:
+#
+# !! <message> reasoning for subsequent questionable diff line(s)
+# !+ diff line found to be questionable
+# !- diff line found to be questionable
+# @@@ etc. diff furniture
+# + diff line checked and ok
+# - diff line checked and ok
+# # <stuff> debug output (normally precedes relevant output)
+#
+# Changes are generally marked as ok if they correspond to a known
+# intended code change pattern. (That includes changing error calls
+# to different error calls.) If they don't correspond to any known
+# pattern, they are "questionable" and the first thing that doesn't
+# match the most common pattern is reported.
+#
+# Might be useful for projects other than dgit, provided it uses
+# the same gettext aliases (__ f_ i_) and similar error calls
+# (die, confess, fail).
+#
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
use strict;
use Carp;
use Data::Dumper;
sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }
sub is_trans ($) { grep { $_[0]{E} eq $_ } qw(__ f_ i_); }
+sub qp ($) {
+ my ($p) = @_;
+ $p =~ s{\\}{\\\\}g;
+ $p =~ s{\'}{\\'}g;
+ $p =~ s{\n}{\\n}g;
+ $p =~ s{\t}{\\t}g;
+ return "'$p'";
+};
+
sub semiparse ($) {
($_) = @_;
my @o;
+ #my $in = $_;
# entries contain
# T type
# E exact input text (does not contain here doc contents)
$q ||= '"';
push @o, { T => 'heredoc', Q => $q, Delim => $d,
E => $&, P => "<<$q$d$q" };
- s{^
- ( .* \n )
- ( (?: (?! $d) .* \n )* )
- $d \n
- }{ $1 }x or die "missing end of here doc $d\n";
- $o[$#o]{V} = $2;
+ if (s{^
+ ( .* \n )
+ ( (?: (?! $d \n ) .* \n )*? )
+ $d \n
+ }{ $1 }xe) {
+ $o[$#o]{V} = $2;
+ } else {
+ m{^.*\n} or confess;
+ $_ = $&;
+ $o[$#o]{V} = $';
+ $o[$#o]{Invented} = 1;
+ }
} elsif (s{^ (["'])( (?: [^\\'"]
| \\ [^"']
| (?! \1 ) [^"]
Q => $q, V => $v};
} elsif (s{^$perlop_re|^\;}{}) {
push @o, { T => 'op', E => $&, P => $& };
- } elsif (s/[[{(]//) {
+ } elsif (s/^[[{(]//) {
push @o, { T => 'bra', E => $&, P => $& };
- } elsif (s/[]})]//) {
+ } elsif (s/^[]})]//) {
push @o, { T => 'ket', E => $&, P => $& };
} elsif (s/^( [\$\@\%] )( \{ )//x) {
push @o, { T => 'deref', E => $1, P => $1 },
push @o, { T => 'specvar', E => $&, P => $& };
} elsif (!length) {
last;
+ } elsif (s{^\#.*\n}{}) {
} else {
m{^.{0,10}};
die "cannot tokenise \`$&'";
next if grep { !is_string($_) } @inputs;
my $q = $inputs[0]{Q};
next if grep { $_->{Q} ne $q } @inputs;
+ next if grep { $_->{Invented} } @inputs;
my $new = { T => 'joinedstrings',
E => (join '.', map { $_->{E} } @inputs),
P => (join '.', map { $_->{P} } @inputs),
$i--; # counteracts $i++
}
debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o;
+ # debug $ichunkstart, "semiparsed V: ".join ' ', map { defined $_->{V} ? ">$_->{V}<" : '-' } @o;
return @o;
}
our @analysed_y;
sub analyse_chunk_core () {
+ $before //= '';
die "plain deletion\n" unless defined $after;
- die "plain insertion\n" unless defined $before;
my @xs = semiparse $before;
my @ys = semiparse $after;
@analysed_x = @analysed_y = ();
my $next_x = sub { $next_something->(\@xs, \@analysed_x, \$x, 'before'); };
my $next_y = sub { $next_something->(\@ys, \@analysed_y, \$y, 'after' ); };
our @y_expect_suffix = ();
+ ANALYSE:
for (;;) {
while (my $e = shift @y_expect_suffix) {
$next_y->();
next if $x->{E} eq $y->{E};
next if $x->{E} eq 'sprintf' and $y->{E} eq 'f_';
next if $x->{E} eq 'die' and $y->{E} eq 'confess';
+ next if $x->{E} eq 'die' and $y->{E} eq 'fail';
+ foreach my $with_fh (qw(0 1)) {
+ next unless $x->{E} eq 'printf';
+ next unless $y->{E} eq 'print';
+ next unless @xs >= $with_fh;
+ next unless @ys > $with_fh;
+ if ($with_fh) {
+ next unless $xs[0]{E} eq $ys[0]{E};
+ next unless
+ $xs[0]{E} =~ m{^[A-Z]+$} or
+ $xs[0]{T} eq 'ident' && $xs[0]{E} =~ m{^\$};
+ }
+ next unless $ys[$with_fh]{E} eq 'f_';
+ # yay!
+ $next_x->() if $with_fh;
+ $next_y->() if $with_fh;
+ $next_y->(); # f_
+ next ANALYSE;
+ }
if ($y->{E} eq '+'
and @ys >= 3
and $ys[0]{E} eq '('
and is_trans($ys[1])) {
$next_y->(); # (
- $next_y->(); # __ f_
+ $next_y->(); # __ f_ i_
+ @y_expect_suffix = ')';
+ } elsif ($y->{E} eq '('
+ and @ys > 2
+ and is_trans($ys[0])
+ and @analysed_y
+ and (grep { $_ eq $analysed_y[-1] } (qw( => [ { ? : . ),
+ '(', ',') )) {
+ $next_y->(); # __ f_ i_
@y_expect_suffix = ')';
}
my $string_changed;
if (is_trans($y)) {
$next_y->();
die "__ on non-string $y->{P}\n" unless is_string($y);
- die "__ on was non-string $y->{P}\n" unless is_string($x);
+ die "__ on was non-string $x->{P}\n" unless is_string($x);
if ($y->{Q} ne "'") {
die "var subst in new string\n"
if $y->{V} =~ m{(?<!\\) [\$\@]};
}
if ($ye eq 'f_') {
my $fmt = $y->{V};
- die "no percent in _f string\n" unless $fmt =~ m{\%};
+ die "no percent in f_ string\n" unless $fmt =~ m{\%};
next unless $string_changed;
die "f_ old string '-quoted\n" if $x->{Q} ne '"';
my $xs = $x->{V};
my ($lit, $what) = @_;
my $xl = substr($xs, 0, length($lit));
if ($xl ne $lit) {
- debug $ichunkstart, "not exactly x: \"..$xs\"";
- debug $ichunkstart, "not exactly y: $lit";
+ debug $ichunkstart, "not exactly x: ..".qp($xs);
+ debug $ichunkstart, "not exactly y: ".qp($lit);
my $next = @ys ? $ys[0]{P} : '(end)';
die "string contents mismatch near $what before $next\n";
}
$xs = substr($xs, length($lit));
};
for (;;) {
+ #print STDERR Dumper($fmt, $xs, \@xs, @ys);
if ($fmt !~ m{\%[^\%]}) {
$exactly->($fmt, '(tail)');
$fmt = '';
+ die "text deleted from end of string: ".qp($xs)."\n"
+ if length $xs;
last;
}
$exactly->($`, '(literal)');
pop @analysed_y;
last;
}
- $xs =~ s{^\s+}{};
+ $xs =~ s{^\s+}{} if $bras;
+ if (is_string($y) and $y->{Q} eq '"') {
+ $exactly->($y->{V}, $y->{P});
+ next;
+ }
$exactly->($y->{E}, $y->{P});
if ($y->{T} eq 'bra' or $y->{E} eq '?') {
$bras++;
return unless length $@;
if ($@ =~ m{^missing end of here doc (\S+)\n}) {
# fudge this
+ # (this never happens now, but in the future we might
+ # want this code again eg to try adding to the chunk)
$before .= "\n$1\n";
$after .= "\n$1\n";
next;