From: Ian Jackson Date: Sun, 30 Sep 2018 19:53:36 +0000 (+0100) Subject: joining and debug X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=dgit-junk.git;a=commitdiff_plain;h=67fc89a905d3f922ffb3e33172cd8b8343945b1c joining and debug --- diff --git a/i18n-diff-auditor b/i18n-diff-auditor index 36f3da3..8fd623a 100755 --- a/i18n-diff-auditor +++ b/i18n-diff-auditor @@ -2,6 +2,18 @@ use strict; use Carp; use Data::Dumper; +use Getopt::Long; + +open DEBUG, ">/dev/null" or die $!; + +GetOptions("debug|D" => sub { open DEBUG, ">&2" or die $!; } + ); + +our @debug; +sub debug ($$) { + my ($i,$s) = @_; + push @{ $debug[$i] }, $s; +} my @d = <>; unshift @d, "# dummy line to make line 1 index 1 in \@d\n"; @@ -96,6 +108,8 @@ our ($ihunkhead, $ihunkend); our ($ichunkstart, $ichunkend); our ($before, $after); +sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }; + sub semiparse ($) { ($_) = @_; my @o; @@ -111,9 +125,9 @@ sub semiparse ($) { push @o, { T => 'ident', E => $&, P => $& }; } elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) { my ($q,$d) = ($1,$2); - $q //= '"'; + $q ||= '"'; push @o, { T => 'heredoc', Q => $q, Delim => $d, - E => $&, P => "<<$q$d..." }; + E => $&, P => "<<$q$d$q" }; s{^ ( .* \n ) ( (?: (?! $d) .* \n )* ) @@ -124,11 +138,11 @@ sub semiparse ($) { | \\ [^"'] | (?! \1 ) [^"] )* - \1 )}{}x) { + ) \1 }{}x) { my ($q,$v) = ($1,$2); push @o, { T => 'string', E => $&, P => "$q-string", Q => $q, V => $v}; - } elsif (s{^$perlop_re|\;}{}) { + } elsif (s{^$perlop_re|^\;}{}) { push @o, { T => 'op', E => $&, P => $& }; } elsif (s/[[{(]//) { push @o, { T => 'bra', E => $&, P => $& }; @@ -146,7 +160,22 @@ sub semiparse ($) { die "cannot tokenise \`$&'"; } } - # coalesce concatenated strings + for (my $i=@o-2; $i>0; --$i) { + next unless $o[$i+1]{E} eq '.'; + my @inputs = @o[$i, $i+2]; + next if grep { !is_string($_) } @inputs; + my $q = $inputs[0]{Q}; + next if grep { $_->{Q} ne $q } @inputs; + my $new = { T => 'joinedstrings', + E => (join '.', map { $_->{E} } @inputs), + P => (join '.', map { $_->{P} } @inputs), + V => (join '', map { $_->{V} } @inputs), + Q => $q, + }; + @o = (@o[0..$i-1], $new, @o[$i+3..$#o]); + print STDERR Dumper(\@o); + } + debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o; return @o; } @@ -163,7 +192,6 @@ sub analyse_chunk_core () { my ($x,$y); my $next_x = sub { $next_something->(\@xs, \$x, 'before'); }; my $next_y = sub { $next_something->(\@ys, \$y, 'after' ); }; - my $is_string = sub { $_[0]{T} =~ m/heredoc|string/; }; for (;;) { last unless @xs or @ys; $next_x->(); @@ -173,8 +201,8 @@ sub analyse_chunk_core () { my $ye = $y->{E}; if ($ye eq '__' or $ye eq 'f_') { $next_y->(); - die "__ on non-string $y->{P}\n" unless $is_string->($y); - die "__ on was non-string $y->{P}\n" unless $is_string->($x); + die "__ on non-string $y->{P}\n" unless is_string($y); + die "__ on was non-string $y->{P}\n" unless is_string($x); if ($y->{Q} ne "'") { die "var subst in new string\n" if $y->{V} =~ m{(?{V}; die "no percent in _f string\n" unless $fmt =~ m{\%}; next unless $string_changed; - die "f_ old string '-quoted\n" if length $x->{V}; + die "f_ old string '-quoted\n" if $x->{Q} ne '"'; my $xs = $x->{V}; my $exactly = sub { - my ($lit) = @_; + my ($lit, $what) = @_; my $xl = substr($xs, 0, length($lit)); - die "exactly mismatch in $lit\n" unless $xl eq $lit; + if ($xl ne $lit) { + debug $ichunkstart, "not exactly x: $xl"; + debug $ichunkstart, "not exactly y: $lit"; + my $next = @ys ? $ys[0]{P} : '(end)'; + die "string contents mismatch near $what before $next\n"; + } $xs = substr($xs, length($lit)); }; for (;;) { if ($fmt !~ m{\%[^\%]}) { - $exactly->($fmt); + $exactly->($fmt, '(tail)'); $fmt = ''; last; } - $exactly->($`); + $exactly->($`, '(literal)'); $fmt = $'; - if ($& eq '%%') { $exactly->('%'); next; } + if ($& eq '%%') { $exactly->('%', '%%'); next; } elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } $next_y->(); die "expected comma, got $y->{P}\n" unless $y->{E} eq ','; if ($xs =~ m{^\@}) { $next_y->(); - die "\@... => not string" unless $is_string->($y); + die "\@... => not string" unless is_string($y); die "\@... => $y->{P}" if $y->{Q} ne '"'; - $exactly->($y->{V}); + $exactly->($y->{V}, $y->{P}); next; } my $bras = 0; @@ -236,11 +269,12 @@ sub analyse_chunk_core () { last; } $xs =~ s{^\s+}{}; - $exactly->($y->{E}); - if ($y->{T} eq 'bra' or $y->{L} eq '?') { + #debug $ichunkstart, "TOKEN $y->{P}\n"; + $exactly->($y->{E}, $y->{P}); + if ($y->{T} eq 'bra' or $y->{E} eq '?') { $bras++; - } elsif ($y->{T} eq 'ket' or $y->{L} eq ':') { - die "too many kets at $y->{L}\n" unless $bras; + } elsif ($y->{T} eq 'ket' or $y->{E} eq ':') { + die "too many kets at $y->{E}\n" unless $bras; $bras--; } } @@ -290,6 +324,10 @@ sub report_on_hunk () { last unless $i<$ihunkend; + foreach my $ds (@{ $debug[$i] }) { + print "# $ds\n"; + } + if ($i == $r->{S}) { print "!! $r->{M}"; $r->{Done} |= 01;