X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=dgit-junk.git;a=blobdiff_plain;f=i18n-diff-auditor;h=d1ae6f1818826d5b1528776ef83debdf2fb66d18;hp=777bf47ededa166703f847d7290a1c839d394727;hb=36d9d13a20e44076e35d08a103004dd0f5e61401;hpb=78ab0ab57128eec1ef2447cdd3e6008230ee5c04 diff --git a/i18n-diff-auditor b/i18n-diff-auditor index 777bf47..d1ae6f1 100755 --- a/i18n-diff-auditor +++ b/i18n-diff-auditor @@ -1,38 +1,172 @@ #!/usr/bin/perl -w use strict; use Carp; +use Data::Dumper; my @d = <>; +unshift @d, "# dummy line to make line 1 index 1 in \@d\n"; + +our $i_last_l_ok = -1; +our $count_i_last_l_ok; sub l_ok ($) { my ($i) = @_; + + if ($i == $i_last_l_ok) { + confess $i if $count_i_last_l_ok++ > 50; + } else { + $count_i_last_l_ok = 0; + $i_last_l_ok = $i; + } + return unless $i < @d; $_ = $d[$i]; + #print STDERR "L $i\n"; 1; } -sub l ($) { confess unless l_ok $_[0]; }; +sub l ($) { + my ($i) = @_; + confess $i unless l_ok $i; +}; -our ($ifilehead, $ihunkhead, $ichunk); +our $perlop_text = <<'END'; # c&p from man perlop + left terms and list operators (leftward) + left -> + nonassoc ++ -- + right ** + right ! ~ \ and unary + and - + left =~ !~ + left * / % x + left + - . + left << >> + nonassoc named unary operators + nonassoc < > <= >= lt gt le ge + nonassoc == != <=> eq ne cmp ~~ + left & + left | ^ + left && + left || // + nonassoc .. ... + right ?: + right = += -= *= etc. goto last next redo dump + left , => + nonassoc list operators (rightward) + right not + left and + left or xor + + **= += *= &= &.= <<= &&= + -= /= |= |.= >>= ||= + .= %= ^= ^.= //= + x= +END + +our $perlop_re; + +sub prep_perlop () { + my @ops; + foreach (split /\n/, $perlop_text) { + next unless m{\S}; + s{\s+$}{}; + s{^\s+}{}; + s{^(?: left | right | nonassoc ) \s+}{}x; + next if m{^terms and list operators}; + next if m{^named unary}; + next if m{^list operators}; + s{ and unary.*}{}; + s{ etc\. }{ }; + foreach my $op (split /\s+/) { + next unless length $op; + next if $op =~ m{^\w+$}; + $op =~ s/\W/\\$&/g; + push @ops, $op; + } + } + $perlop_re = '(?: '.(join ' | ', @ops).' )'; + $perlop_re = qr{$perlop_re}x; + #print STDERR "$perlop_re\n"; +} + +prep_perlop(); + +our ($ifilehead, $ihunkhead, $ichunkstart, $ichunkend); our ($before, $after); +sub semiparse ($) { + ($_) = @_; + my @o; + for (;;) { + s{^\s+}{}; + if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) { + push @o, { T => 'ident', L => $& }; + } elsif (s{^\<\<('?)([A-Z_]+)\1}{}) { + my ($q,$d) = ($1,$2); + push @o, { T => 'heredoc', Q => $q, Delim => $d }; + s{^ + ( .* \n ) + ( (?: (?! $d) .* \n )* ) + }{ $1 } or die "missing end of here doc $d\n"; + $o[$#o]{V} = $2; + } elsif (s{^ (["'])( (?: [^\\] | \\ \1 )* )}{}x) { + my ($q,$v) = ($1,$2); + push @o, { T => 'string', Q => $q, V => $v }; + } elsif (s{^$perlop_re}{}) { + push @o, { T => 'op', L => $& }; + } elsif (s/[[{(]//) { + push @o, { T => 'bra', L => $& }; + } elsif (s/[]})]//) { + push @o, { T => 'ket', L => $& }; + } elsif (!length) { + last; + } else { + m{^.{0,10}}; + die "cannot tokenise \`$&'"; + } + } + return @o; +} + +sub analyse_chunk () { + die "plain deletion\n" unless defined $after; + die "plain insertion\n" unless defined $before; + my @before = semiparse $before; + my @after = semiparse $after; + print Dumper($ichunkstart, $ichunkend, \@before, \@after); + flush STDOUT; +} + for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { m{^diff} or next; - while (l_ok $ifilehead and m{^index|^---|^\Q+++\E}) { } + while (l_ok $ifilehead and m{^diff|^index|^---|^\Q+++\E}) { $ifilehead++ } $ihunkhead = $ifilehead; while (l_ok $ihunkhead) { - m{^\@\@} or confess; - $ichunk = $ihunkhead + 1; - for (;;) { - l_ok $ichunk or last; - - while (l_ok $ichunk and + m{^\@\@} or confess "$ihunkhead $_ ?"; + my $i = $ihunkhead + 1; + for (; ; $i++) { + if (!l_ok $i or m{^ } or m{^\@\@}) { + if (defined $ichunkstart) { + $ichunkend = $i; + eval { analyse_chunk(); 1; }; + if (length $@) { + print Dumper($ichunkstart, $ichunkend, + $before, $after, + $@); + } + $ichunkstart = $ichunkend = $before = $after = undef; + } + l_ok $i or last; + m{^\@\@} and last; + } elsif (m{^[-+]}) { + my $which = $& eq '-' ? \$before : \$after; + $ichunkstart //= $i; + $$which //= ''; + $$which .= $'; + } else { + confess "$i $_ ?"; + } + } + $ichunkend = $i; + $ihunkhead = $i; } - - confess unless m{^\@\@}; - - -while (<>){ - if (m/^diff.*/) { - my $headline = }