From: Ian Jackson Date: Sun, 30 Sep 2018 19:56:40 +0000 (+0100) Subject: move i18n-diff-auditor to dgit repo X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=dgit-junk.git;a=commitdiff_plain;h=88d827adecf9a35e6b8574ee73a64f19e9f69a56 move i18n-diff-auditor to dgit repo --- diff --git a/i18n-diff-auditor b/i18n-diff-auditor deleted file mode 100755 index 8fd623a..0000000 --- a/i18n-diff-auditor +++ /dev/null @@ -1,385 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use Carp; -use Data::Dumper; -use Getopt::Long; - -open DEBUG, ">/dev/null" or die $!; - -GetOptions("debug|D" => sub { open DEBUG, ">&2" or die $!; } - ); - -our @debug; -sub debug ($$) { - my ($i,$s) = @_; - push @{ $debug[$i] }, $s; -} - -my @d = <>; -unshift @d, "# dummy line to make line 1 index 1 in \@d\n"; - -our $i_last_l_ok = -1; -our $count_i_last_l_ok; - -sub l_ok ($) { - my ($i) = @_; - - if ($i == $i_last_l_ok) { - confess $i if $count_i_last_l_ok++ > 50; - } else { - $count_i_last_l_ok = 0; - $i_last_l_ok = $i; - } - - return unless $i < @d; - $_ = $d[$i]; - #print STDERR "L $i\n"; - 1; -} - -sub l ($) { - my ($i) = @_; - confess $i unless l_ok $i; -}; - -our $perlop_text = <<'END'; # c&p from man perlop - left terms and list operators (leftward) - left -> - nonassoc ++ -- - right ** - right ! ~ \ and unary + and - - left =~ !~ - left * / % x - left + - . - left << >> - nonassoc named unary operators - nonassoc < > <= >= lt gt le ge - nonassoc == != <=> eq ne cmp ~~ - left & - left | ^ - left && - left || // - nonassoc .. ... - right ?: - right = += -= *= etc. goto last next redo dump - left , => - nonassoc list operators (rightward) - right not - left and - left or xor - - **= += *= &= &.= <<= &&= - -= /= |= |.= >>= ||= - .= %= ^= ^.= //= - x= -END - -our $perlop_re; - -sub prep_perlop () { - my @ops; - foreach (split /\n/, $perlop_text) { - next unless m{\S}; - s{\s+$}{}; - s{^\s+}{}; - s{^(?: left | right | nonassoc ) \s+}{}x; - next if m{^terms and list operators}; - next if m{^named unary}; - next if m{^list operators}; - s{ and unary.*}{}; - s{ etc\. }{ }; - s{\?\:}{ ? : }; - foreach my $op (split /\s+/) { - next unless length $op; - next if $op =~ m{^\w+$}; - $op =~ s/\W/\\$&/g; - push @ops, $op; - } - } - $perlop_re = '(?: '.(join ' | ', @ops).' )'; - $perlop_re = qr{$perlop_re}x; - #print STDERR "$perlop_re\n"; -} - -prep_perlop(); - -our ($ifilehead, $ifirsthunkhead); -our ($ihunkhead, $ihunkend); -our ($ichunkstart, $ichunkend); -our ($before, $after); - -sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }; - -sub semiparse ($) { - ($_) = @_; - my @o; - # entries contain - # T type - # E exact input text (does not contain here doc contents) - # P something to print in messages - # V value, only for: heredoc string - # Q quote characcter, only for: heredoc string - for (;;) { - s{^\s+}{}; - if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) { - push @o, { T => 'ident', E => $&, P => $& }; - } elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) { - my ($q,$d) = ($1,$2); - $q ||= '"'; - push @o, { T => 'heredoc', Q => $q, Delim => $d, - E => $&, P => "<<$q$d$q" }; - s{^ - ( .* \n ) - ( (?: (?! $d) .* \n )* ) - $d \n - }{ $1 }x or die "missing end of here doc $d\n"; - $o[$#o]{V} = $2; - } elsif (s{^ (["'])( (?: [^\\'"] - | \\ [^"'] - | (?! \1 ) [^"] - )* - ) \1 }{}x) { - my ($q,$v) = ($1,$2); - push @o, { T => 'string', E => $&, P => "$q-string", - Q => $q, V => $v}; - } elsif (s{^$perlop_re|^\;}{}) { - push @o, { T => 'op', E => $&, P => $& }; - } elsif (s/[[{(]//) { - push @o, { T => 'bra', E => $&, P => $& }; - } elsif (s/[]})]//) { - push @o, { T => 'ket', E => $&, P => $& }; - } elsif (s/^( [\$\@\%] )( \{ )//x) { - push @o, { T => 'deref', E => $1, P => $1 }, - { T => 'bra', E => $2, P => $2 }; - } elsif (s/^ [\$\@\%] [^[^{] //x) { - push @o, { T => 'specvar', E => $&, P => $& }; - } elsif (!length) { - last; - } else { - m{^.{0,10}}; - die "cannot tokenise \`$&'"; - } - } - for (my $i=@o-2; $i>0; --$i) { - next unless $o[$i+1]{E} eq '.'; - my @inputs = @o[$i, $i+2]; - next if grep { !is_string($_) } @inputs; - my $q = $inputs[0]{Q}; - next if grep { $_->{Q} ne $q } @inputs; - my $new = { T => 'joinedstrings', - E => (join '.', map { $_->{E} } @inputs), - P => (join '.', map { $_->{P} } @inputs), - V => (join '', map { $_->{V} } @inputs), - Q => $q, - }; - @o = (@o[0..$i-1], $new, @o[$i+3..$#o]); - print STDERR Dumper(\@o); - } - debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o; - return @o; -} - -sub analyse_chunk_core () { - die "plain deletion\n" unless defined $after; - die "plain insertion\n" unless defined $before; - my @xs = semiparse $before; - my @ys = semiparse $after; - my $next_something = sub { - my ($ary,$var,$what) = @_; - die "ran out of $what\n" unless @$ary; - $$var = shift @$ary; - }; - my ($x,$y); - my $next_x = sub { $next_something->(\@xs, \$x, 'before'); }; - my $next_y = sub { $next_something->(\@ys, \$y, 'after' ); }; - for (;;) { - last unless @xs or @ys; - $next_x->(); - $next_y->(); - next if $x->{E} eq $y->{E}; - my $string_changed; - my $ye = $y->{E}; - if ($ye eq '__' or $ye eq 'f_') { - $next_y->(); - die "__ on non-string $y->{P}\n" unless is_string($y); - die "__ on was non-string $y->{P}\n" unless is_string($x); - if ($y->{Q} ne "'") { - die "var subst in new string\n" - if $y->{V} =~ m{(?{V} eq $x->{V}; - die "__ string quote changed\n" unless $y->{Q} eq $x->{Q}; - }; - $string_changed = $@; - } - if ($ye eq '__') { - $_ = $y->{V}; - die "percent $& in __ ' string\n" if m{\%}; - die $string_changed if length $string_changed; - next; - } - if ($ye eq 'f_') { - my $fmt = $y->{V}; - die "no percent in _f string\n" unless $fmt =~ m{\%}; - next unless $string_changed; - die "f_ old string '-quoted\n" if $x->{Q} ne '"'; - my $xs = $x->{V}; - my $exactly = sub { - my ($lit, $what) = @_; - my $xl = substr($xs, 0, length($lit)); - if ($xl ne $lit) { - debug $ichunkstart, "not exactly x: $xl"; - debug $ichunkstart, "not exactly y: $lit"; - my $next = @ys ? $ys[0]{P} : '(end)'; - die "string contents mismatch near $what before $next\n"; - } - $xs = substr($xs, length($lit)); - }; - for (;;) { - if ($fmt !~ m{\%[^\%]}) { - $exactly->($fmt, '(tail)'); - $fmt = ''; - last; - } - $exactly->($`, '(literal)'); - $fmt = $'; - if ($& eq '%%') { $exactly->('%', '%%'); next; } - elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } - $next_y->(); - die "expected comma, got $y->{P}\n" unless $y->{E} eq ','; - if ($xs =~ m{^\@}) { - $next_y->(); - die "\@... => not string" unless is_string($y); - die "\@... => $y->{P}" if $y->{Q} ne '"'; - $exactly->($y->{V}, $y->{P}); - next; - } - my $bras = 0; - for (;;) { - if (!$bras and !@ys) { - last; - } - $next_y->(); - if (!$bras and - (grep { $y->{E} eq $_ } qw( or xor and not ; : ) - or $y->{T} eq 'ket' - )) { - unshift @ys, $y; - last; - } - $xs =~ s{^\s+}{}; - #debug $ichunkstart, "TOKEN $y->{P}\n"; - $exactly->($y->{E}, $y->{P}); - if ($y->{T} eq 'bra' or $y->{E} eq '?') { - $bras++; - } elsif ($y->{T} eq 'ket' or $y->{E} eq ':') { - die "too many kets at $y->{E}\n" unless $bras; - $bras--; - } - } - } - next; - } - die "mismatch $x->{P} => $y->{P}\n"; - } -} - -sub analyse_chunk () { - for (;;) { - eval { analyse_chunk_core(); }; - return unless length $@; - if ($@ =~ m{^missing end of here doc (\S+)\n}) { - # fudge this - $before .= "\n$1\n"; - $after .= "\n$1\n"; - next; - } else { - die $@; - } - } -} - -our @report; -our $last_filehead = -1; - -sub report_on_hunk () { - return unless @report; - if ($last_filehead != $ifilehead) { - foreach (my $i=$ifilehead; $i<$ifirsthunkhead; $i++) { - print $d[$i]; - } - $last_filehead = $ifilehead; - } - my $dummy_r = { S => (scalar @d)+1, E => (scalar @d)+1 }; - my $r; - for (my $i=$ihunkhead; ; $i++) { - for (;;) { - $r //= shift @report; - $r //= $dummy_r; - last if $i < $r->{E}; - confess unless $r->{Done} == 03; - $r = undef; - } - - last unless $i<$ihunkend; - - foreach my $ds (@{ $debug[$i] }) { - print "# $ds\n"; - } - - if ($i == $r->{S}) { - print "!! $r->{M}"; - $r->{Done} |= 01; - } - if ($i >= $r->{S}) { - print "!"; - $r->{Done} |= 02; - } else { - print " "; - } - print $d[$i]; - } - confess unless $r = $dummy_r; -} - -for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { - m{^diff} or next; - $ifirsthunkhead = $ifilehead; - while (l_ok $ifirsthunkhead and - m{^diff|^index|^---|^\Q+++\E}) { - $ifirsthunkhead++ - } - $ihunkhead = $ifirsthunkhead; - while (l_ok $ihunkhead) { - m{^\@\@} or confess "$ihunkhead $_ ?"; - my $i = $ihunkhead + 1; - for (; ; $i++) { - if (!l_ok $i or m{^ } or m{^\@\@}) { - if (defined $ichunkstart) { - $ichunkend = $i; - eval { analyse_chunk(); 1; }; - if (length $@) { - push @report, { M => $@, - S => $ichunkstart, - E => $ichunkend }; - } - $ichunkstart = $ichunkend = $before = $after = undef; - } - l_ok $i or last; - m{^\@\@} and last; - } elsif (m{^[-+]}) { - my $which = $& eq '-' ? \$before : \$after; - $ichunkstart //= $i; - $$which //= ''; - $$which .= $'; - } else { - confess "$i $_ ?"; - } - } - $ihunkend = $i; - report_on_hunk(); - $ichunkend = $i; - $ihunkhead = $i; - } -}