X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=dgit.git;a=blobdiff_plain;f=i18n-diff-auditor;h=6868ca7d2c8df16f3b6fee1169a3b7c04903e06c;hp=fbf57757b265e1e73f44d42a21a88af13b76786c;hb=0c244082ddb4b2443ccfbc1c438ead4fda1c33ca;hpb=f64fea4c6110991979cc1bbaaabca7fc425be072 diff --git a/i18n-diff-auditor b/i18n-diff-auditor index fbf57757..6868ca7d 100755 --- a/i18n-diff-auditor +++ b/i18n-diff-auditor @@ -1,4 +1,53 @@ #!/usr/bin/perl -w +# +# i18n-diff-auditor +# Copyright (C)2018 Ian Jackson +# GPLv3+, NO WARRANTY, see below. +# +# +# Usage: +# something like this +# git-log -n1 -p | ./i18n-diff-auditor -D 2>&1 |less -j10 +/'^!.*' +# +# -D is for debug. Currently only one level. +# +# Output is the relevant diff hunks, with each line prepended with +# space for ok lines and ! for questionable ones, and with relevant +# diff lines prepended with lines starting !! (and lines starting # +# for debug output), so ovrall: +# +# !! reasoning for subsequent questionable diff line(s) +# !+ diff line found to be questionable +# !- diff line found to be questionable +# @@@ etc. diff furniture +# + diff line checked and ok +# - diff line checked and ok +# # debug output (normally precedes relevant output) +# +# Changes are generally marked as ok if they correspond to a known +# intended code change pattern. (That includes changing error calls +# to different error calls.) If they don't correspond to any known +# pattern, they are "questionable" and the first thing that doesn't +# match the most common pattern is reported. +# +# Might be useful for projects other than dgit, provided it uses +# the same gettext aliases (__ f_ i_) and similar error calls +# (die, confess, fail). +# +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + use strict; use Carp; use Data::Dumper; @@ -107,11 +156,22 @@ our ($ihunkhead, $ihunkend); our ($ichunkstart, $ichunkend); our ($before, $after); -sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }; +sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; } +sub is_trans ($) { grep { $_[0]{E} eq $_ } qw(__ f_ i_); } + +sub qp ($) { + my ($p) = @_; + $p =~ s{\\}{\\\\}g; + $p =~ s{\'}{\\'}g; + $p =~ s{\n}{\\n}g; + $p =~ s{\t}{\\t}g; + return "'$p'"; +}; sub semiparse ($) { ($_) = @_; my @o; + #my $in = $_; # entries contain # T type # E exact input text (does not contain here doc contents) @@ -127,12 +187,18 @@ sub semiparse ($) { $q ||= '"'; push @o, { T => 'heredoc', Q => $q, Delim => $d, E => $&, P => "<<$q$d$q" }; - s{^ - ( .* \n ) - ( (?: (?! $d) .* \n )* ) - $d \n - }{ $1 }x or die "missing end of here doc $d\n"; - $o[$#o]{V} = $2; + if (s{^ + ( .* \n ) + ( (?: (?! $d \n ) .* \n )*? ) + $d \n + }{ $1 }xe) { + $o[$#o]{V} = $2; + } else { + m{^.*\n} or confess; + $_ = $&; + $o[$#o]{V} = $'; + $o[$#o]{Invented} = 1; + } } elsif (s{^ (["'])( (?: [^\\'"] | \\ [^"'] | (?! \1 ) [^"] @@ -143,9 +209,9 @@ sub semiparse ($) { Q => $q, V => $v}; } elsif (s{^$perlop_re|^\;}{}) { push @o, { T => 'op', E => $&, P => $& }; - } elsif (s/[[{(]//) { + } elsif (s/^[[{(]//) { push @o, { T => 'bra', E => $&, P => $& }; - } elsif (s/[]})]//) { + } elsif (s/^[]})]//) { push @o, { T => 'ket', E => $&, P => $& }; } elsif (s/^( [\$\@\%] )( \{ )//x) { push @o, { T => 'deref', E => $1, P => $1 }, @@ -154,17 +220,20 @@ sub semiparse ($) { push @o, { T => 'specvar', E => $&, P => $& }; } elsif (!length) { last; + } elsif (s{^\#.*\n}{}) { } else { m{^.{0,10}}; die "cannot tokenise \`$&'"; } } - for (my $i=@o-2; $i>0; --$i) { + for (my $i=0; $i+2 < @o; $i++) { next unless $o[$i+1]{E} eq '.'; my @inputs = @o[$i, $i+2]; + #print STDERR Dumper(\@inputs); next if grep { !is_string($_) } @inputs; my $q = $inputs[0]{Q}; next if grep { $_->{Q} ne $q } @inputs; + next if grep { $_->{Invented} } @inputs; my $new = { T => 'joinedstrings', E => (join '.', map { $_->{E} } @inputs), P => (join '.', map { $_->{P} } @inputs), @@ -172,8 +241,10 @@ sub semiparse ($) { Q => $q, }; @o = (@o[0..$i-1], $new, @o[$i+3..$#o]); + $i--; # counteracts $i++ } debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o; + # debug $ichunkstart, "semiparsed V: ".join ' ', map { defined $_->{V} ? ">$_->{V}<" : '-' } @o; return @o; } @@ -181,8 +252,8 @@ our @analysed_x; our @analysed_y; sub analyse_chunk_core () { + $before //= ''; die "plain deletion\n" unless defined $after; - die "plain insertion\n" unless defined $before; my @xs = semiparse $before; my @ys = semiparse $after; @analysed_x = @analysed_y = (); @@ -197,6 +268,7 @@ sub analyse_chunk_core () { my $next_x = sub { $next_something->(\@xs, \@analysed_x, \$x, 'before'); }; my $next_y = sub { $next_something->(\@ys, \@analysed_y, \$y, 'after' ); }; our @y_expect_suffix = (); + ANALYSE: for (;;) { while (my $e = shift @y_expect_suffix) { $next_y->(); @@ -207,20 +279,49 @@ sub analyse_chunk_core () { $next_x->(); $next_y->(); next if $x->{E} eq $y->{E}; + next if $x->{E} eq 'sprintf' and $y->{E} eq 'f_'; + next if $x->{E} eq 'die' and $y->{E} eq 'confess'; + next if $x->{E} eq 'die' and $y->{E} eq 'fail'; + foreach my $with_fh (qw(0 1)) { + next unless $x->{E} eq 'printf'; + next unless $y->{E} eq 'print'; + next unless @xs >= $with_fh; + next unless @ys > $with_fh; + if ($with_fh) { + next unless $xs[0]{E} eq $ys[0]{E}; + next unless + $xs[0]{E} =~ m{^[A-Z]+$} or + $xs[0]{T} eq 'ident' && $xs[0]{E} =~ m{^\$}; + } + next unless $ys[$with_fh]{E} eq 'f_'; + # yay! + $next_x->() if $with_fh; + $next_y->() if $with_fh; + $next_y->(); # f_ + next ANALYSE; + } if ($y->{E} eq '+' and @ys >= 3 and $ys[0]{E} eq '(' - and ($ys[1]{E} eq '__' or $ys[2]{E} eq 'f_')) { + and is_trans($ys[1])) { $next_y->(); # ( - $next_y->(); # __ f_ + $next_y->(); # __ f_ i_ + @y_expect_suffix = ')'; + } elsif ($y->{E} eq '(' + and @ys > 2 + and is_trans($ys[0]) + and @analysed_y + and (grep { $_ eq $analysed_y[-1] } (qw( => [ { ? : . ), + '(', ',') )) { + $next_y->(); # __ f_ i_ @y_expect_suffix = ')'; } my $string_changed; my $ye = $y->{E}; - if ($ye eq '__' or $ye eq 'f_') { + if (is_trans($y)) { $next_y->(); die "__ on non-string $y->{P}\n" unless is_string($y); - die "__ on was non-string $y->{P}\n" unless is_string($x); + die "__ on was non-string $x->{P}\n" unless is_string($x); if ($y->{Q} ne "'") { die "var subst in new string\n" if $y->{V} =~ m{(?{V}; - die "no percent in _f string\n" unless $fmt =~ m{\%}; + die "no percent in f_ string\n" unless $fmt =~ m{\%}; next unless $string_changed; die "f_ old string '-quoted\n" if $x->{Q} ne '"'; my $xs = $x->{V}; @@ -247,17 +352,20 @@ sub analyse_chunk_core () { my ($lit, $what) = @_; my $xl = substr($xs, 0, length($lit)); if ($xl ne $lit) { - debug $ichunkstart, "not exactly x: ..\"$xs\""; - debug $ichunkstart, "not exactly y: $lit"; + debug $ichunkstart, "not exactly x: ..".qp($xs); + debug $ichunkstart, "not exactly y: ".qp($lit); my $next = @ys ? $ys[0]{P} : '(end)'; die "string contents mismatch near $what before $next\n"; } $xs = substr($xs, length($lit)); }; for (;;) { + #print STDERR Dumper($fmt, $xs, \@xs, @ys); if ($fmt !~ m{\%[^\%]}) { $exactly->($fmt, '(tail)'); $fmt = ''; + die "text deleted from end of string: ".qp($xs)."\n" + if length $xs; last; } $exactly->($`, '(literal)'); @@ -266,6 +374,15 @@ sub analyse_chunk_core () { elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } $next_y->(); die "expected comma, got $y->{P}\n" unless $y->{E} eq ','; + if (!length $fmt and + !length $xs and + @xs and + $xs[0]{E} eq '.') { + # X has "" . + # Y has "%s" [other args] , + $next_x->(); # eat the '.' + next; + } if ($xs =~ m{^\@}) { $next_y->(); die "\@... => not string" unless is_string($y); @@ -291,7 +408,11 @@ sub analyse_chunk_core () { pop @analysed_y; last; } - $xs =~ s{^\s+}{}; + $xs =~ s{^\s+}{} if $bras; + if (is_string($y) and $y->{Q} eq '"') { + $exactly->($y->{V}, $y->{P}); + next; + } $exactly->($y->{E}, $y->{P}); if ($y->{T} eq 'bra' or $y->{E} eq '?') { $bras++; @@ -313,6 +434,8 @@ sub analyse_chunk () { return unless length $@; if ($@ =~ m{^missing end of here doc (\S+)\n}) { # fudge this + # (this never happens now, but in the future we might + # want this code again eg to try adding to the chunk) $before .= "\n$1\n"; $after .= "\n$1\n"; next;