X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=i18n-diff-auditor;h=116b0f82fd314fe005ef3de278ba39bbeae922c5;hb=1c9a99297143d028936205973ebbeba47f5e8213;hp=f40c8e97e7ec0f94685b76c55815df04f248f46f;hpb=357f88dd77ad0e34975250b3314c64d8ea53bdad;p=dgit.git diff --git a/i18n-diff-auditor b/i18n-diff-auditor index f40c8e97..116b0f82 100755 --- a/i18n-diff-auditor +++ b/i18n-diff-auditor @@ -107,7 +107,17 @@ our ($ihunkhead, $ihunkend); our ($ichunkstart, $ichunkend); our ($before, $after); -sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }; +sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; } +sub is_trans ($) { grep { $_[0]{E} eq $_ } qw(__ f_ i_); } + +sub qp ($) { + my ($p) = @_; + $p =~ s{\\}{\\\\}g; + $p =~ s{\'}{\\'}g; + $p =~ s{\n}{\\n}g; + $p =~ s{\t}{\\t}g; + return "'$p'"; +}; sub semiparse ($) { ($_) = @_; @@ -128,10 +138,10 @@ sub semiparse ($) { push @o, { T => 'heredoc', Q => $q, Delim => $d, E => $&, P => "<<$q$d$q" }; s{^ - ( .* \n ) - ( (?: (?! $d) .* \n )* ) - $d \n - }{ $1 }x or die "missing end of here doc $d\n"; + ( .* \n ) + ( (?: (?! $d \n ) .* \n )*? ) + $d \n + }{ $1 }xe or die "missing end of here doc $d\n"; $o[$#o]{V} = $2; } elsif (s{^ (["'])( (?: [^\\'"] | \\ [^"'] @@ -139,13 +149,13 @@ sub semiparse ($) { )* ) \1 }{}x) { my ($q,$v) = ($1,$2); - push @o, { T => 'string', E => $&, P => "$q-string", + push @o, { T => 'string', E => $&, P => "$q$q", Q => $q, V => $v}; } elsif (s{^$perlop_re|^\;}{}) { push @o, { T => 'op', E => $&, P => $& }; - } elsif (s/[[{(]//) { + } elsif (s/^[[{(]//) { push @o, { T => 'bra', E => $&, P => $& }; - } elsif (s/[]})]//) { + } elsif (s/^[]})]//) { push @o, { T => 'ket', E => $&, P => $& }; } elsif (s/^( [\$\@\%] )( \{ )//x) { push @o, { T => 'deref', E => $1, P => $1 }, @@ -159,9 +169,10 @@ sub semiparse ($) { die "cannot tokenise \`$&'"; } } - for (my $i=@o-2; $i>0; --$i) { + for (my $i=0; $i+2 < @o; $i++) { next unless $o[$i+1]{E} eq '.'; my @inputs = @o[$i, $i+2]; + #print STDERR Dumper(\@inputs); next if grep { !is_string($_) } @inputs; my $q = $inputs[0]{Q}; next if grep { $_->{Q} ne $q } @inputs; @@ -172,36 +183,67 @@ sub semiparse ($) { Q => $q, }; @o = (@o[0..$i-1], $new, @o[$i+3..$#o]); - print STDERR Dumper(\@o); + $i--; # counteracts $i++ } debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o; + # debug $ichunkstart, "semiparsed V: ".join ' ', map { defined $_->{V} ? ">$_->{V}<" : '-' } @o; return @o; } +our @analysed_x; +our @analysed_y; + sub analyse_chunk_core () { die "plain deletion\n" unless defined $after; die "plain insertion\n" unless defined $before; my @xs = semiparse $before; my @ys = semiparse $after; + @analysed_x = @analysed_y = (); my $next_something = sub { - my ($ary,$var,$what) = @_; + my ($ary,$anal,$var,$what) = @_; die "ran out of $what\n" unless @$ary; - $$var = shift @$ary; + my $r = shift @$ary; + push @$anal, $r->{P}; + $$var = $r; }; my ($x,$y); - my $next_x = sub { $next_something->(\@xs, \$x, 'before'); }; - my $next_y = sub { $next_something->(\@ys, \$y, 'after' ); }; + my $next_x = sub { $next_something->(\@xs, \@analysed_x, \$x, 'before'); }; + my $next_y = sub { $next_something->(\@ys, \@analysed_y, \$y, 'after' ); }; + our @y_expect_suffix = (); for (;;) { + while (my $e = shift @y_expect_suffix) { + $next_y->(); + $y->{E} eq $e + or die "suffix mismatch, expected $e got $y->{E}\n"; + } last unless @xs or @ys; $next_x->(); $next_y->(); next if $x->{E} eq $y->{E}; + next if $x->{E} eq 'sprintf' and $y->{E} eq 'f_'; + next if $x->{E} eq 'die' and $y->{E} eq 'confess'; + if ($y->{E} eq '+' + and @ys >= 3 + and $ys[0]{E} eq '(' + and is_trans($ys[1])) { + $next_y->(); # ( + $next_y->(); # __ f_ i_ + @y_expect_suffix = ')'; + } elsif ($y->{E} eq '(' + and @ys > 2 + and is_trans($ys[0]) + and @analysed_y + and (grep { $_ eq $analysed_y[-1] } (qw( => [ { ? : . ), + '(', ',') )) { + $next_y->(); # __ f_ i_ + @y_expect_suffix = ')'; + } my $string_changed; my $ye = $y->{E}; - if ($ye eq '__' or $ye eq 'f_') { + if (is_trans($y)) { $next_y->(); die "__ on non-string $y->{P}\n" unless is_string($y); - die "__ on was non-string $y->{P}\n" unless is_string($x); + die "__ on was non-string $x->{P}\n" unless is_string($x); if ($y->{Q} ne "'") { die "var subst in new string\n" if $y->{V} =~ m{(?{V}; die "no percent in _f string\n" unless $fmt =~ m{\%}; @@ -228,8 +274,8 @@ sub analyse_chunk_core () { my ($lit, $what) = @_; my $xl = substr($xs, 0, length($lit)); if ($xl ne $lit) { - debug $ichunkstart, "not exactly x: $xl"; - debug $ichunkstart, "not exactly y: $lit"; + debug $ichunkstart, "not exactly x: ..".qp($xs); + debug $ichunkstart, "not exactly y: ".qp($lit); my $next = @ys ? $ys[0]{P} : '(end)'; die "string contents mismatch near $what before $next\n"; } @@ -239,6 +285,8 @@ sub analyse_chunk_core () { if ($fmt !~ m{\%[^\%]}) { $exactly->($fmt, '(tail)'); $fmt = ''; + die "text deleted from end of string: ".qp($xs)."\n" + if length $xs; last; } $exactly->($`, '(literal)'); @@ -247,6 +295,15 @@ sub analyse_chunk_core () { elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } $next_y->(); die "expected comma, got $y->{P}\n" unless $y->{E} eq ','; + if (!length $fmt and + !length $xs and + @xs and + $xs[0]{E} eq '.') { + # X has "" . + # Y has "%s" [other args] , + $next_x->(); # eat the '.' + next; + } if ($xs =~ m{^\@}) { $next_y->(); die "\@... => not string" unless is_string($y); @@ -261,14 +318,22 @@ sub analyse_chunk_core () { } $next_y->(); if (!$bras and - (grep { $y->{E} eq $_ } qw( or xor and not ; : ) + (grep { $y->{E} eq $_ } qw( or xor and not ; : + if unless while when ) + or $y->{E} eq ',' or $y->{T} eq 'ket' )) { + # lookahead shows close of containing scope + # or lower precedence operator unshift @ys, $y; + pop @analysed_y; last; } - $xs =~ s{^\s+}{}; - #debug $ichunkstart, "TOKEN $y->{P}\n"; + $xs =~ s{^\s+}{} if $bras; + if (is_string($y) and $y->{Q} eq '"') { + $exactly->($y->{V}, $y->{P}); + next; + } $exactly->($y->{E}, $y->{P}); if ($y->{T} eq 'bra' or $y->{E} eq '?') { $bras++; @@ -359,6 +424,8 @@ for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { $ichunkend = $i; eval { analyse_chunk(); 1; }; if (length $@) { + debug $ichunkstart, "done x: @analysed_x"; + debug $ichunkstart, "done y: @analysed_y"; push @report, { M => $@, S => $ichunkstart, E => $ichunkend };