X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=i18n-diff-auditor;h=b9bd4e50e17e1643c894ad961bfbe2e7771f594a;hb=9f63cef14f7581cabba2ccfd4ac4d23ccabe5042;hp=777bf47ededa166703f847d7290a1c839d394727;hpb=78ab0ab57128eec1ef2447cdd3e6008230ee5c04;p=dgit-junk.git diff --git a/i18n-diff-auditor b/i18n-diff-auditor index 777bf47..b9bd4e5 100755 --- a/i18n-diff-auditor +++ b/i18n-diff-auditor @@ -1,38 +1,301 @@ #!/usr/bin/perl -w use strict; use Carp; +use Data::Dumper; my @d = <>; +unshift @d, "# dummy line to make line 1 index 1 in \@d\n"; + +our $i_last_l_ok = -1; +our $count_i_last_l_ok; sub l_ok ($) { my ($i) = @_; + + if ($i == $i_last_l_ok) { + confess $i if $count_i_last_l_ok++ > 50; + } else { + $count_i_last_l_ok = 0; + $i_last_l_ok = $i; + } + return unless $i < @d; $_ = $d[$i]; + #print STDERR "L $i\n"; 1; } -sub l ($) { confess unless l_ok $_[0]; }; +sub l ($) { + my ($i) = @_; + confess $i unless l_ok $i; +}; + +our $perlop_text = <<'END'; # c&p from man perlop + left terms and list operators (leftward) + left -> + nonassoc ++ -- + right ** + right ! ~ \ and unary + and - + left =~ !~ + left * / % x + left + - . + left << >> + nonassoc named unary operators + nonassoc < > <= >= lt gt le ge + nonassoc == != <=> eq ne cmp ~~ + left & + left | ^ + left && + left || // + nonassoc .. ... + right ?: + right = += -= *= etc. goto last next redo dump + left , => + nonassoc list operators (rightward) + right not + left and + left or xor + + **= += *= &= &.= <<= &&= + -= /= |= |.= >>= ||= + .= %= ^= ^.= //= + x= +END + +our $perlop_re; + +sub prep_perlop () { + my @ops; + foreach (split /\n/, $perlop_text) { + next unless m{\S}; + s{\s+$}{}; + s{^\s+}{}; + s{^(?: left | right | nonassoc ) \s+}{}x; + next if m{^terms and list operators}; + next if m{^named unary}; + next if m{^list operators}; + s{ and unary.*}{}; + s{ etc\. }{ }; + s{\?\:}{ ? : }; + foreach my $op (split /\s+/) { + next unless length $op; + next if $op =~ m{^\w+$}; + $op =~ s/\W/\\$&/g; + push @ops, $op; + } + } + $perlop_re = '(?: '.(join ' | ', @ops).' )'; + $perlop_re = qr{$perlop_re}x; + #print STDERR "$perlop_re\n"; +} -our ($ifilehead, $ihunkhead, $ichunk); +prep_perlop(); + +our ($ifilehead, $ihunkhead, $ichunkstart, $ichunkend); our ($before, $after); +sub semiparse ($) { + ($_) = @_; + my @o; + # entries contain + # T type + # E exact input text (does not contain here doc contents) + # P something to print in messages + # V value, only for: heredoc string + # Q quote characcter, only for: heredoc string + for (;;) { + s{^\s+}{}; + if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) { + push @o, { T => 'ident', E => $&, P => $& }; + } elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) { + my ($q,$d) = ($1,$2); + $q //= '"'; + push @o, { T => 'heredoc', Q => $q, Delim => $d, + E => $&, P => "<<$q$d..." }; + s{^ + ( .* \n ) + ( (?: (?! $d) .* \n )* ) + $d \n + }{ $1 }x or die "missing end of here doc $d\n"; + $o[$#o]{V} = $2; + } elsif (s{^ (["'])( (?: [^\\'"] + | \\ [^"'] + | (?! \1 ) [^"] + )* + \1 )}{}x) { + my ($q,$v) = ($1,$2); + push @o, { T => 'string', E => $&, P => "$q-string", + Q => $q, V => $v}; + } elsif (s{^$perlop_re|\;}{}) { + push @o, { T => 'op', E => $&, P => $& }; + } elsif (s/[[{(]//) { + push @o, { T => 'bra', E => $&, P => $& }; + } elsif (s/[]})]//) { + push @o, { T => 'ket', E => $&, P => $& }; + } elsif (s/^( [\$\@\%] )( \{ )//x) { + push @o, { T => 'deref', E => $1, P => $1 }, + { T => 'bra', E => $2, P => $2 }; + } elsif (s/^ [\$\@\%] [^[^{] //x) { + push @o, { T => 'specvar', E => $&, P => $& }; + } elsif (!length) { + last; + } else { + m{^.{0,10}}; + die "cannot tokenise \`$&'"; + } + } + # coalesce concatenated strings + return @o; +} + +sub analyse_chunk_core () { + die "plain deletion\n" unless defined $after; + die "plain insertion\n" unless defined $before; + my @xs = semiparse $before; + my @ys = semiparse $after; + my $next_something = sub { + my ($ary,$var,$what) = @_; + die "ran out of $what\n" unless @$ary; + $$var = shift @$ary; + }; + my ($x,$y); + my $next_x = sub { $next_something->(\@xs, \$x, 'before'); }; + my $next_y = sub { $next_something->(\@ys, \$y, 'after' ); }; + my $is_string = sub { $_[0]{T} =~ m/heredoc|string/; }; + for (;;) { + last unless @xs or @ys; + $next_x->(); + $next_y->(); + next if $x->{E} eq $y->{E}; + my $string_changed; + my $ye = $y->{E}; + if ($ye eq '__' or $ye eq 'f_') { + $next_y->(); + die "__ on non-string $y->{P}\n" unless $is_string->($y); + die "__ on was non-string $y->{P}\n" unless $is_string->($x); + if ($y->{Q} ne "'") { + die "var subst in new string\n" + if $y->{V} =~ m{(?{V} eq $x->{V}; + die "__ string quote changed\n" unless $y->{Q} eq $x->{Q}; + }; + $string_changed = $@; + } + if ($ye eq '__') { + $_ = $y->{V}; + die "percent $& in __ ' string\n" if m{\%}; + die $string_changed if length $string_changed; + next; + } + if ($ye eq 'f_') { + my $fmt = $y->{V}; + die "no percent in _f string\n" unless $fmt =~ m{\%}; + next unless $string_changed; + die "f_ old string '-quoted\n" if length $x->{V}; + my $xs = $x->{V}; + my $exactly = sub { + my ($lit) = @_; + my $xl = substr($xs, 0, length($lit)); + die "exactly mismatch in $lit\n" unless $xl eq $lit; + $xs = substr($xs, length($lit)); + }; + for (;;) { + if ($fmt !~ m{\%[^\%]}) { + $exactly->($fmt); + $fmt = ''; + last; + } + $exactly->($`); + $fmt = $'; + if ($& eq '%%') { $exactly->('%'); next; } + elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } + $next_y->(); + die "expected comma, got $y->{P}\n" unless $y->{E} eq ','; + if ($xs =~ m{^\@}) { + $next_y->(); + die "\@... => not string" unless $is_string->($y); + die "\@... => $y->{P}" if $y->{Q} ne '"'; + $exactly->($y->{V}); + next; + } + my $bras = 0; + for (;;) { + if (!$bras and !@ys) { + last; + } + $next_y->(); + if (!$bras and + (grep { $y->{E} eq $_ } qw( or xor and not ; : ) + or $y->{T} eq 'ket' + )) { + unshift @ys, $y; + last; + } + $xs =~ s{^\s+}{}; + $exactly->($y->{E}); + if ($y->{T} eq 'bra' or $y->{L} eq '?') { + $bras++; + } elsif ($y->{T} eq 'ket' or $y->{L} eq ':') { + die "too many kets at $y->{L}\n" unless $bras; + $bras--; + } + } + } + next; + } + die "mismatch $x->{P} => $y->{P}\n"; + } +} + +sub analyse_chunk () { + for (;;) { + eval { analyse_chunk_core(); }; + return unless length $@; + if ($@ =~ m{^missing end of here doc (\S+)\n}) { + # fudge this + $before .= "\n$1\n"; + $after .= "\n$1\n"; + next; + } else { + die $@; + } + } +} + for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { m{^diff} or next; - while (l_ok $ifilehead and m{^index|^---|^\Q+++\E}) { } + while (l_ok $ifilehead and m{^diff|^index|^---|^\Q+++\E}) { $ifilehead++ } $ihunkhead = $ifilehead; while (l_ok $ihunkhead) { - m{^\@\@} or confess; - $ichunk = $ihunkhead + 1; - for (;;) { - l_ok $ichunk or last; - - while (l_ok $ichunk and + m{^\@\@} or confess "$ihunkhead $_ ?"; + my $i = $ihunkhead + 1; + for (; ; $i++) { + if (!l_ok $i or m{^ } or m{^\@\@}) { + if (defined $ichunkstart) { + $ichunkend = $i; + eval { analyse_chunk(); 1; }; + if (length $@) { + print Dumper('REPORT', + $ichunkstart, $ichunkend, + $before, $after, + $@); + } + $ichunkstart = $ichunkend = $before = $after = undef; + } + l_ok $i or last; + m{^\@\@} and last; + } elsif (m{^[-+]}) { + my $which = $& eq '-' ? \$before : \$after; + $ichunkstart //= $i; + $$which //= ''; + $$which .= $'; + } else { + confess "$i $_ ?"; + } + } + $ichunkend = $i; + $ihunkhead = $i; } - - confess unless m{^\@\@}; - - -while (<>){ - if (m/^diff.*/) { - my $headline = }