#!/usr/bin/perl -w use strict; use Carp; use Data::Dumper; my @d = <>; unshift @d, "# dummy line to make line 1 index 1 in \@d\n"; our $i_last_l_ok = -1; our $count_i_last_l_ok; sub l_ok ($) { my ($i) = @_; if ($i == $i_last_l_ok) { confess $i if $count_i_last_l_ok++ > 50; } else { $count_i_last_l_ok = 0; $i_last_l_ok = $i; } return unless $i < @d; $_ = $d[$i]; #print STDERR "L $i\n"; 1; } sub l ($) { my ($i) = @_; confess $i unless l_ok $i; }; our $perlop_text = <<'END'; # c&p from man perlop left terms and list operators (leftward) left -> nonassoc ++ -- right ** right ! ~ \ and unary + and - left =~ !~ left * / % x left + - . left << >> nonassoc named unary operators nonassoc < > <= >= lt gt le ge nonassoc == != <=> eq ne cmp ~~ left & left | ^ left && left || // nonassoc .. ... right ?: right = += -= *= etc. goto last next redo dump left , => nonassoc list operators (rightward) right not left and left or xor **= += *= &= &.= <<= &&= -= /= |= |.= >>= ||= .= %= ^= ^.= //= x= END our $perlop_re; sub prep_perlop () { my @ops; foreach (split /\n/, $perlop_text) { next unless m{\S}; s{\s+$}{}; s{^\s+}{}; s{^(?: left | right | nonassoc ) \s+}{}x; next if m{^terms and list operators}; next if m{^named unary}; next if m{^list operators}; s{ and unary.*}{}; s{ etc\. }{ }; foreach my $op (split /\s+/) { next unless length $op; next if $op =~ m{^\w+$}; $op =~ s/\W/\\$&/g; push @ops, $op; } } $perlop_re = '(?: '.(join ' | ', @ops).' )'; $perlop_re = qr{$perlop_re}x; #print STDERR "$perlop_re\n"; } prep_perlop(); our ($ifilehead, $ihunkhead, $ichunkstart, $ichunkend); our ($before, $after); sub semiparse ($) { ($_) = @_; my @o; for (;;) { s{^\s+}{}; if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) { push @o, { T => 'ident', L => $& }; } elsif (s{^\<\<('?)([A-Z_]+)\1}{}) { my ($q,$d) = ($1,$2); push @o, { T => 'heredoc', Q => $q, Delim => $d }; s{^ ( .* \n ) ( (?: (?! $d) .* \n )* ) }{ $1 } or die "missing end of here doc $d\n"; $o[$#o]{V} = $2; } elsif (s{^ (["'])( (?: [^\\] | \\ \1 )* )}{}x) { my ($q,$v) = ($1,$2); push @o, { T => 'string', Q => $q, V => $v }; } elsif (s{^$perlop_re}{}) { push @o, { T => 'op', L => $& }; } elsif (s/[[{(]//) { push @o, { T => 'bra', L => $& }; } elsif (s/[]})]//) { push @o, { T => 'ket', L => $& }; } elsif (!length) { last; } else { m{^.{0,10}}; die "cannot tokenise \`$&'"; } } return @o; } sub analyse_chunk () { die "plain deletion\n" unless defined $after; die "plain insertion\n" unless defined $before; my @before = semiparse $before; my @after = semiparse $after; print Dumper($ichunkstart, $ichunkend, \@before, \@after); flush STDOUT; } for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { m{^diff} or next; while (l_ok $ifilehead and m{^diff|^index|^---|^\Q+++\E}) { $ifilehead++ } $ihunkhead = $ifilehead; while (l_ok $ihunkhead) { m{^\@\@} or confess "$ihunkhead $_ ?"; my $i = $ihunkhead + 1; for (; ; $i++) { if (!l_ok $i or m{^ } or m{^\@\@}) { if (defined $ichunkstart) { $ichunkend = $i; eval { analyse_chunk(); 1; }; if (length $@) { print Dumper($ichunkstart, $ichunkend, $before, $after, $@); } $ichunkstart = $ichunkend = $before = $after = undef; } l_ok $i or last; m{^\@\@} and last; } elsif (m{^[-+]}) { my $which = $& eq '-' ? \$before : \$after; $ichunkstart //= $i; $$which //= ''; $$which .= $'; } else { confess "$i $_ ?"; } } $ichunkend = $i; $ihunkhead = $i; } }