From: Ian Jackson Date: Sun, 30 Sep 2018 12:49:46 +0000 (+0100) Subject: auditor wip semiparse X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=81019d41a3311596539218f80a003840ea9cb4a3;p=dgit-junk.git auditor wip semiparse --- diff --git a/i18n-diff-auditor b/i18n-diff-auditor index d947a01..8252c3f 100755 --- a/i18n-diff-auditor +++ b/i18n-diff-auditor @@ -99,7 +99,7 @@ sub semiparse ($) { for (;;) { s{^\s+}{}; if (s{^[\$\@\%][_0-9a-zA-Z]+}{}) { - push @o, { T => 'ident', V => $& }; + push @o, { T => 'ident', L => $& }; } elsif (s{^\<\<('?)([A-Z_]+)\1}{}) { my ($q,$d) = ($1,$2); push @o, { T => 'heredoc', Q => $q, Delim => $d }; @@ -111,8 +111,17 @@ sub semiparse ($) { } elsif (s{^ (["'])( (?: [^\\] | \\ \1 )* )}{}x) { my ($q,$v) = ($1,$2); push @o, { T => 'string', Q => $q, V => $v }; + } elsif (s{^$perlop_re}{}) { + push @o, { T => 'op', L => $& }; + } elsif (s/[[{(]//) { + push @o, { T => 'bra', L => $& }; + } elsif (s/[]})]//) { + push @o, { T => 'ket', L => $& }; + } elsif (!length) { + last; } else { - die; + m{^.{0,10}}; + die "cannot tokenise \`$&'"; } } } @@ -122,7 +131,7 @@ sub analyse_chunk () { die "plain insertion\n" unless defined $before; my @before = semiparse $before; my @after = semiparse $after; - print Dumper($ichunkstart, $ichunkend, $before, $after); + print Dumper($ichunkstart, $ichunkend, \@before, \@after); flush STDOUT; } @@ -138,7 +147,11 @@ for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { if (defined $ichunkstart) { $ichunkend = $i; eval { analyse_chunk(); 1; }; - # do something with $@ + if (length $@) { + print Dumper($ichunkstart, $ichunkend, + $before, $after, + $@); + } $ichunkstart = $ichunkend = $before = $after = undef; } l_ok $i or last;