4 # Copyright (C)2018 Ian Jackson
5 # GPLv3+, NO WARRANTY, see below.
10 # git-log -n1 -p | ./i18n-diff-auditor -D 2>&1 |less -j10 +/'^!.*'
12 # -D is for debug. Currently only one level.
14 # Output is the relevant diff hunks, with each line prepended with
15 # space for ok lines and ! for questionable ones, and with relevant
16 # diff lines prepended with lines starting !! (and lines starting #
17 # for debug output), so ovrall:
19 # !! <message> reasoning for subsequent questionable diff line(s)
20 # !+ diff line found to be questionable
21 # !- diff line found to be questionable
22 # @@@ etc. diff furniture
23 # + diff line checked and ok
24 # - diff line checked and ok
25 # # <stuff> debug output (normally precedes relevant output)
27 # Changes are generally marked as ok if they correspond to a known
28 # intended code change pattern. (That includes changing error calls
29 # to different error calls.) If they don't correspond to any known
30 # pattern, they are "questionable" and the first thing that doesn't
31 # match the most common pattern is reported.
33 # Might be useful for projects other than dgit, provided it uses
34 # the same gettext aliases (__ f_ i_) and similar error calls
35 # (die, confess, fail).
38 # This program is free software: you can redistribute it and/or modify
39 # it under the terms of the GNU General Public License as published by
40 # the Free Software Foundation, either version 3 of the License, or
41 # (at your option) any later version.
43 # This program is distributed in the hope that it will be useful,
44 # but WITHOUT ANY WARRANTY; without even the implied warranty of
45 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46 # GNU General Public License for more details.
48 # You should have received a copy of the GNU General Public License
49 # along with this program. If not, see <http://www.gnu.org/licenses/>.
57 GetOptions("debug|D+" => \$debug
63 push @{ $debug[$i] }, $s if $debug;
67 unshift @d, "# dummy line to make line 1 index 1 in \@d\n";
69 our $i_last_l_ok = -1;
70 our $count_i_last_l_ok;
75 if ($i == $i_last_l_ok) {
76 confess $i if $count_i_last_l_ok++ > 50;
78 $count_i_last_l_ok = 0;
82 return unless $i < @d;
84 #print STDERR "L $i\n";
90 confess $i unless l_ok $i;
93 our $perlop_text = <<'END'; # c&p from man perlop
94 left terms and list operators (leftward)
98 right ! ~ \ and unary + and -
103 nonassoc named unary operators
104 nonassoc < > <= >= lt gt le ge
105 nonassoc == != <=> eq ne cmp ~~
112 right = += -= *= etc. goto last next redo dump
114 nonassoc list operators (rightward)
119 **= += *= &= &.= <<= &&=
129 foreach (split /\n/, $perlop_text) {
133 s{^(?: left | right | nonassoc ) \s+}{}x;
134 next if m{^terms and list operators};
135 next if m{^named unary};
136 next if m{^list operators};
140 foreach my $op (split /\s+/) {
141 next unless length $op;
142 next if $op =~ m{^\w+$};
147 $perlop_re = '(?: '.(join ' | ', @ops).' )';
148 $perlop_re = qr{$perlop_re}x;
149 #print STDERR "$perlop_re\n";
154 our ($ifilehead, $ifirsthunkhead);
155 our ($ihunkhead, $ihunkend);
156 our ($ichunkstart, $ichunkend);
157 our ($before, $after);
159 sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }
160 sub is_trans ($) { grep { $_[0]{E} eq $_ } qw(__ f_ i_); }
177 # E exact input text (does not contain here doc contents)
178 # P something to print in messages
179 # V value, only for: heredoc string
180 # Q quote characcter, only for: heredoc string
183 if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) {
184 push @o, { T => 'ident', E => $&, P => $& };
185 } elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) {
186 my ($q,$d) = ($1,$2);
188 push @o, { T => 'heredoc', Q => $q, Delim => $d,
189 E => $&, P => "<<$q$d$q" };
192 ( (?: (?! $d \n ) .* \n )*? )
200 $o[$#o]{Invented} = 1;
202 } elsif (s{^ (["'])( (?: [^\\'"]
207 my ($q,$v) = ($1,$2);
208 push @o, { T => 'string', E => $&, P => "$q$q",
210 } elsif (s{^$perlop_re|^\;}{}) {
211 push @o, { T => 'op', E => $&, P => $& };
212 } elsif (s/^[[{(]//) {
213 push @o, { T => 'bra', E => $&, P => $& };
214 } elsif (s/^[]})]//) {
215 push @o, { T => 'ket', E => $&, P => $& };
216 } elsif (s/^( [\$\@\%] )( \{ )//x) {
217 push @o, { T => 'deref', E => $1, P => $1 },
218 { T => 'bra', E => $2, P => $2 };
219 } elsif (s/^ [\$\@\%] [^[^{] //x) {
220 push @o, { T => 'specvar', E => $&, P => $& };
223 } elsif (s{^\#.*\n}{}) {
226 die "cannot tokenise \`$&'";
229 for (my $i=0; $i+2 < @o; $i++) {
230 next unless $o[$i+1]{E} eq '.';
231 my @inputs = @o[$i, $i+2];
232 #print STDERR Dumper(\@inputs);
233 next if grep { !is_string($_) } @inputs;
234 my $q = $inputs[0]{Q};
235 next if grep { $_->{Q} ne $q } @inputs;
236 next if grep { $_->{Invented} } @inputs;
237 my $new = { T => 'joinedstrings',
238 E => (join '.', map { $_->{E} } @inputs),
239 P => (join '.', map { $_->{P} } @inputs),
240 V => (join '', map { $_->{V} } @inputs),
243 @o = (@o[0..$i-1], $new, @o[$i+3..$#o]);
244 $i--; # counteracts $i++
246 debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o;
247 # debug $ichunkstart, "semiparsed V: ".join ' ', map { defined $_->{V} ? ">$_->{V}<" : '-' } @o;
254 sub analyse_chunk_core () {
256 die "plain deletion\n" unless defined $after;
257 my @xs = semiparse $before;
258 my @ys = semiparse $after;
259 @analysed_x = @analysed_y = ();
260 my $next_something = sub {
261 my ($ary,$anal,$var,$what) = @_;
262 die "ran out of $what\n" unless @$ary;
264 push @$anal, $r->{P};
268 my $next_x = sub { $next_something->(\@xs, \@analysed_x, \$x, 'before'); };
269 my $next_y = sub { $next_something->(\@ys, \@analysed_y, \$y, 'after' ); };
270 our @y_expect_suffix = ();
272 while (my $e = shift @y_expect_suffix) {
275 or die "suffix mismatch, expected $e got $y->{E}\n";
277 last unless @xs or @ys;
280 next if $x->{E} eq $y->{E};
281 next if $x->{E} eq 'sprintf' and $y->{E} eq 'f_';
282 next if $x->{E} eq 'die' and $y->{E} eq 'confess';
283 next if $x->{E} eq 'die' and $y->{E} eq 'fail';
287 and is_trans($ys[1])) {
289 $next_y->(); # __ f_ i_
290 @y_expect_suffix = ')';
291 } elsif ($y->{E} eq '('
295 and (grep { $_ eq $analysed_y[-1] } (qw( => [ { ? : . ),
297 $next_y->(); # __ f_ i_
298 @y_expect_suffix = ')';
304 die "__ on non-string $y->{P}\n" unless is_string($y);
305 die "__ on was non-string $x->{P}\n" unless is_string($x);
306 if ($y->{Q} ne "'") {
307 die "var subst in new string\n"
308 if $y->{V} =~ m{(?<!\\) [\$\@]};
311 die "__ string changed\n" unless $y->{V} eq $x->{V};
312 die "__ string quote changed\n" unless $y->{Q} eq $x->{Q};
314 $string_changed = $@;
318 die "percent $& in __ ' string\n" if m{\%};
319 die $string_changed if length $string_changed;
323 die $string_changed if length $string_changed;
328 die "no percent in f_ string\n" unless $fmt =~ m{\%};
329 next unless $string_changed;
330 die "f_ old string '-quoted\n" if $x->{Q} ne '"';
333 my ($lit, $what) = @_;
334 my $xl = substr($xs, 0, length($lit));
336 debug $ichunkstart, "not exactly x: ..".qp($xs);
337 debug $ichunkstart, "not exactly y: ".qp($lit);
338 my $next = @ys ? $ys[0]{P} : '(end)';
339 die "string contents mismatch near $what before $next\n";
341 $xs = substr($xs, length($lit));
344 #print STDERR Dumper($fmt, $xs, \@xs, @ys);
345 if ($fmt !~ m{\%[^\%]}) {
346 $exactly->($fmt, '(tail)');
348 die "text deleted from end of string: ".qp($xs)."\n"
352 $exactly->($`, '(literal)');
354 if ($& eq '%%') { $exactly->('%', '%%'); next; }
355 elsif ($& ne '%s') { die "unhandled %-subst $&\n"; }
357 die "expected comma, got $y->{P}\n" unless $y->{E} eq ',';
362 # X has "<earlier>" . <something>
363 # Y has "<earlier>%s" [other args] , <something>
364 $next_x->(); # eat the '.'
369 die "\@... => not string" unless is_string($y);
370 die "\@... => $y->{P}" if $y->{Q} ne '"';
371 $exactly->($y->{V}, $y->{P});
376 if (!$bras and !@ys) {
381 (grep { $y->{E} eq $_ } qw( or xor and not ; :
382 if unless while when )
386 # lookahead shows close of containing scope
387 # or lower precedence operator
392 $xs =~ s{^\s+}{} if $bras;
393 if (is_string($y) and $y->{Q} eq '"') {
394 $exactly->($y->{V}, $y->{P});
397 $exactly->($y->{E}, $y->{P});
398 if ($y->{T} eq 'bra' or $y->{E} eq '?') {
400 } elsif ($y->{T} eq 'ket' or $y->{E} eq ':') {
401 die "too many kets at $y->{E}\n" unless $bras;
408 die "mismatch $x->{P} => $y->{P}\n";
412 sub analyse_chunk () {
414 eval { analyse_chunk_core(); };
415 return unless length $@;
416 if ($@ =~ m{^missing end of here doc (\S+)\n}) {
418 # (this never happens now, but in the future we might
419 # want this code again eg to try adding to the chunk)
430 our $last_filehead = -1;
432 sub report_on_hunk () {
433 return unless @report;
434 if ($last_filehead != $ifilehead) {
435 foreach (my $i=$ifilehead; $i<$ifirsthunkhead; $i++) {
438 $last_filehead = $ifilehead;
440 my $dummy_r = { S => (scalar @d)+1, E => (scalar @d)+1 };
442 for (my $i=$ihunkhead; ; $i++) {
444 $r //= shift @report;
446 last if $i < $r->{E};
447 confess unless $r->{Done} == 03;
451 last unless $i<$ihunkend;
453 foreach my $ds (@{ $debug[$i] }) {
469 confess unless $r = $dummy_r;
472 for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) {
474 $ifirsthunkhead = $ifilehead;
475 while (l_ok $ifirsthunkhead and
476 m{^diff|^index|^---|^\Q+++\E}) {
479 $ihunkhead = $ifirsthunkhead;
480 while (l_ok $ihunkhead) {
481 m{^\@\@} or confess "$ihunkhead $_ ?";
482 my $i = $ihunkhead + 1;
484 if (!l_ok $i or m{^ } or m{^\@\@}) {
485 if (defined $ichunkstart) {
487 eval { analyse_chunk(); 1; };
489 debug $ichunkstart, "done x: @analysed_x";
490 debug $ichunkstart, "done y: @analysed_y";
491 push @report, { M => $@,
495 $ichunkstart = $ichunkend = $before = $after = undef;
500 my $which = $& eq '-' ? \$before : \$after;