X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=dgit-junk.git;a=blobdiff_plain;f=i18n-diff-auditor;h=b9bd4e50e17e1643c894ad961bfbe2e7771f594a;hp=8252c3f1ba4d6f9b5517c2b8c08c8a74a25f98be;hb=9f63cef14f7581cabba2ccfd4ac4d23ccabe5042;hpb=81019d41a3311596539218f80a003840ea9cb4a3 diff --git a/i18n-diff-auditor b/i18n-diff-auditor index 8252c3f..b9bd4e5 100755 --- a/i18n-diff-auditor +++ b/i18n-diff-auditor @@ -76,6 +76,7 @@ sub prep_perlop () { next if m{^list operators}; s{ and unary.*}{}; s{ etc\. }{ }; + s{\?\:}{ ? : }; foreach my $op (split /\s+/) { next unless length $op; next if $op =~ m{^\w+$}; @@ -96,27 +97,46 @@ our ($before, $after); sub semiparse ($) { ($_) = @_; my @o; + # entries contain + # T type + # E exact input text (does not contain here doc contents) + # P something to print in messages + # V value, only for: heredoc string + # Q quote characcter, only for: heredoc string for (;;) { s{^\s+}{}; - if (s{^[\$\@\%][_0-9a-zA-Z]+}{}) { - push @o, { T => 'ident', L => $& }; - } elsif (s{^\<\<('?)([A-Z_]+)\1}{}) { + if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) { + push @o, { T => 'ident', E => $&, P => $& }; + } elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) { my ($q,$d) = ($1,$2); - push @o, { T => 'heredoc', Q => $q, Delim => $d }; + $q //= '"'; + push @o, { T => 'heredoc', Q => $q, Delim => $d, + E => $&, P => "<<$q$d..." }; s{^ ( .* \n ) ( (?: (?! $d) .* \n )* ) - }{ $1 } or die "missing end of here doc $d\n"; + $d \n + }{ $1 }x or die "missing end of here doc $d\n"; $o[$#o]{V} = $2; - } elsif (s{^ (["'])( (?: [^\\] | \\ \1 )* )}{}x) { + } elsif (s{^ (["'])( (?: [^\\'"] + | \\ [^"'] + | (?! \1 ) [^"] + )* + \1 )}{}x) { my ($q,$v) = ($1,$2); - push @o, { T => 'string', Q => $q, V => $v }; - } elsif (s{^$perlop_re}{}) { - push @o, { T => 'op', L => $& }; + push @o, { T => 'string', E => $&, P => "$q-string", + Q => $q, V => $v}; + } elsif (s{^$perlop_re|\;}{}) { + push @o, { T => 'op', E => $&, P => $& }; } elsif (s/[[{(]//) { - push @o, { T => 'bra', L => $& }; + push @o, { T => 'bra', E => $&, P => $& }; } elsif (s/[]})]//) { - push @o, { T => 'ket', L => $& }; + push @o, { T => 'ket', E => $&, P => $& }; + } elsif (s/^( [\$\@\%] )( \{ )//x) { + push @o, { T => 'deref', E => $1, P => $1 }, + { T => 'bra', E => $2, P => $2 }; + } elsif (s/^ [\$\@\%] [^[^{] //x) { + push @o, { T => 'specvar', E => $&, P => $& }; } elsif (!length) { last; } else { @@ -124,15 +144,124 @@ sub semiparse ($) { die "cannot tokenise \`$&'"; } } + # coalesce concatenated strings + return @o; } -sub analyse_chunk () { +sub analyse_chunk_core () { die "plain deletion\n" unless defined $after; die "plain insertion\n" unless defined $before; - my @before = semiparse $before; - my @after = semiparse $after; - print Dumper($ichunkstart, $ichunkend, \@before, \@after); - flush STDOUT; + my @xs = semiparse $before; + my @ys = semiparse $after; + my $next_something = sub { + my ($ary,$var,$what) = @_; + die "ran out of $what\n" unless @$ary; + $$var = shift @$ary; + }; + my ($x,$y); + my $next_x = sub { $next_something->(\@xs, \$x, 'before'); }; + my $next_y = sub { $next_something->(\@ys, \$y, 'after' ); }; + my $is_string = sub { $_[0]{T} =~ m/heredoc|string/; }; + for (;;) { + last unless @xs or @ys; + $next_x->(); + $next_y->(); + next if $x->{E} eq $y->{E}; + my $string_changed; + my $ye = $y->{E}; + if ($ye eq '__' or $ye eq 'f_') { + $next_y->(); + die "__ on non-string $y->{P}\n" unless $is_string->($y); + die "__ on was non-string $y->{P}\n" unless $is_string->($x); + if ($y->{Q} ne "'") { + die "var subst in new string\n" + if $y->{V} =~ m{(?{V} eq $x->{V}; + die "__ string quote changed\n" unless $y->{Q} eq $x->{Q}; + }; + $string_changed = $@; + } + if ($ye eq '__') { + $_ = $y->{V}; + die "percent $& in __ ' string\n" if m{\%}; + die $string_changed if length $string_changed; + next; + } + if ($ye eq 'f_') { + my $fmt = $y->{V}; + die "no percent in _f string\n" unless $fmt =~ m{\%}; + next unless $string_changed; + die "f_ old string '-quoted\n" if length $x->{V}; + my $xs = $x->{V}; + my $exactly = sub { + my ($lit) = @_; + my $xl = substr($xs, 0, length($lit)); + die "exactly mismatch in $lit\n" unless $xl eq $lit; + $xs = substr($xs, length($lit)); + }; + for (;;) { + if ($fmt !~ m{\%[^\%]}) { + $exactly->($fmt); + $fmt = ''; + last; + } + $exactly->($`); + $fmt = $'; + if ($& eq '%%') { $exactly->('%'); next; } + elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } + $next_y->(); + die "expected comma, got $y->{P}\n" unless $y->{E} eq ','; + if ($xs =~ m{^\@}) { + $next_y->(); + die "\@... => not string" unless $is_string->($y); + die "\@... => $y->{P}" if $y->{Q} ne '"'; + $exactly->($y->{V}); + next; + } + my $bras = 0; + for (;;) { + if (!$bras and !@ys) { + last; + } + $next_y->(); + if (!$bras and + (grep { $y->{E} eq $_ } qw( or xor and not ; : ) + or $y->{T} eq 'ket' + )) { + unshift @ys, $y; + last; + } + $xs =~ s{^\s+}{}; + $exactly->($y->{E}); + if ($y->{T} eq 'bra' or $y->{L} eq '?') { + $bras++; + } elsif ($y->{T} eq 'ket' or $y->{L} eq ':') { + die "too many kets at $y->{L}\n" unless $bras; + $bras--; + } + } + } + next; + } + die "mismatch $x->{P} => $y->{P}\n"; + } +} + +sub analyse_chunk () { + for (;;) { + eval { analyse_chunk_core(); }; + return unless length $@; + if ($@ =~ m{^missing end of here doc (\S+)\n}) { + # fudge this + $before .= "\n$1\n"; + $after .= "\n$1\n"; + next; + } else { + die $@; + } + } } for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { @@ -148,7 +277,8 @@ for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { $ichunkend = $i; eval { analyse_chunk(); 1; }; if (length $@) { - print Dumper($ichunkstart, $ichunkend, + print Dumper('REPORT', + $ichunkstart, $ichunkend, $before, $after, $@); }