From e62682ec9e8598c65d192cd1f98c46c3cf57efe1 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Sun, 30 Sep 2018 15:35:52 +0100 Subject: [PATCH] auditor wip halfway reorg @o --- i18n-diff-auditor | 125 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 116 insertions(+), 9 deletions(-) diff --git a/i18n-diff-auditor b/i18n-diff-auditor index 7398a8e..4219041 100755 --- a/i18n-diff-auditor +++ b/i18n-diff-auditor @@ -97,13 +97,20 @@ our ($before, $after); sub semiparse ($) { ($_) = @_; my @o; + # entries contain + # T type + # E exact input text (does not contain here doc contents) + # P something to print in messages + # V value, only for: heredoc string + # Q quote characcter, only for: heredoc string for (;;) { s{^\s+}{}; if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) { - push @o, { T => 'ident', L => $& }; + push @o, { T => 'ident', E => $&, P => $& }; } elsif (s{^\<\<('?)([A-Z_]+)\1}{}) { my ($q,$d) = ($1,$2); - push @o, { T => 'heredoc', Q => $q, Delim => $d }; + push @o, { T => 'heredoc', Q => $q, Delim => $d, + E => $&, P => "<<$q$d..." }; s{^ ( .* \n ) ( (?: (?! $d) .* \n )* ) @@ -116,18 +123,19 @@ sub semiparse ($) { )* \1 )}{}x) { my ($q,$v) = ($1,$2); - push @o, { T => 'string', Q => $q, V => $v }; + push @o, { T => 'string', Q => $q, V => $v, + I => $&, P => "$q-string" }; } elsif (s{^$perlop_re|\;}{}) { - push @o, { T => 'op', L => $& }; + push @o, { T => 'op', E => $&, I => $& }; } elsif (s/[[{(]//) { - push @o, { T => 'bra', L => $& }; + push @o, { T => 'bra', E => $&, I => $& }; } elsif (s/[]})]//) { - push @o, { T => 'ket', L => $& }; + push @o, { T => 'ket', E => $&, I => $& }; } elsif (s/^( [\$\@\%] )( \{ )//x) { - push @o, { T => 'deref', L => $1 }, - { T => 'bra', L => $2 }; + push @o, { T => 'deref', E => $1, I => $1 }, + { T => 'bra', E => $2, I => $1 }; } elsif (s/^ [\$\@\%] [^[^{] //x) { - push @o, { T => 'specvar', L => $& }; + push @o, { T => 'specvar', E => $&, I => $& }; } elsif (!length) { last; } else { @@ -135,6 +143,7 @@ sub semiparse ($) { die "cannot tokenise \`$&'"; } } + # coalesce concatenated strings return @o; } @@ -143,6 +152,104 @@ sub analyse_chunk_core () { die "plain insertion\n" unless defined $before; my @before = semiparse $before; my @after = semiparse $after; + my $next_something = sub { + my ($ary,$var,$what) = @_; + die "ran out of $what\n" unless @$ary; + $$var = shift @$ary; + }; + my $next_before = sub { $next_something->(\@before, \$x, 'before'); }; + my $next_after = sub { $next_something->(\@after , \$y, 'after' ); }; + sub $is_string = sub { $_[0]{T} =~ m/heredoc|string/; }; + for (;;) { + last unless @before or @after; + $next_before->(); + $next_after->(); + next if $x->{I} eq $y->{I}; + my $string_changed; + if ($y->{I} eq '__' or $y->{I} eq '_f') { + $next_after->(); + die "__ on non-string\n" unless Sis_string($y); + die "__ on was non-string\n" unless $is_string($x); + if ($y->{Q} ne "'") { + die "var subst in new str\n" if $y->{V} =~ m{(?{V} eq $x->{V}; + die "__ string quote changed\n" unless $y->{Q} eq $x->{Q}; + }; + $string_changed = $@; + } + if ($y->{I} eq '__') { + $_ = $y->{V}; + die "percent in __ ' string\n" if m{\%}; + die $string_changed if length $not_unchanged; + next; + } + if ($y->{I} eq 'f_') { + my $fmt = $y->{V}; + die "no percent in _f string\n" unless $fmt =~ m{\%}; + next unless $string_changed; + die "f_ old string '-quoted\n" if length $x->{V}; + my $xs = $x->{V}; + my $exactly = sub { + my ($lit) = @_; + my $xl = substr($xs, 0, length($lit)); + die "exactly mismatch in $lit\n" unless $xl eq $lit; + $xs = substr($xs, length($lit)); + }; + for (;;) { + if ($fmt !~ m{\%[^\%]}) { + $exactly->($fmt); + $fmt = ''; + last; + } + $exactly->($`); + $fmt = $'; + if ($& eq '%%') { $exactly('%'); next; } + elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } + $next_after->(); + die "expected comma, got $y->{T}\n" unless $y->{I} eq ','; + if ($old =~ m{^\@}) { + $next_after->(); + die "\@... => not string" unless $is_string($y); + $exactly->($y->{V}); + next; + } + my $bras = 0; + for (;;) { + if (!$bra and !@after) { + last; + } + $next_after->(); + if (!$bra and + (grep { $y->{I} eq $_ } qw( or xor and not ; : ) + or $y->{T} eq 'ket' + )) { + unshift @after, $ys; + last; + } + $xs =~ s{^\s+}{}; + $exactly->{$y->{I}); + if ($y->{T} eq 'bra' or $y->{L} eq '?') { + $bras++; + } elsif ($y->{T} eq 'ket' or $y->{L} eq ':') { + die "too many kets at $y->{L}\n" unless $bras; + $bras--; + } + } + } + next; + } + die "mismatch + + $exact-> + + if ($fmt =~ m{ + if + + if ($y->{Q} eq $x->{Q} && $y-> + + $y = shift @after; print Dumper($ichunkstart, $ichunkend, \@before, \@after); flush STDOUT; } -- 2.30.2