#!/usr/bin/perl -w use strict; use Carp; use Data::Dumper; my @d = <>; unshift @d, "# dummy line to make line 1 index 1 in \@d\n"; our $i_last_l_ok = -1; our $count_i_last_l_ok; sub l_ok ($) { my ($i) = @_; if ($i == $i_last_l_ok) { confess $i if $count_i_last_l_ok++ > 50; } else { $count_i_last_l_ok = 0; $i_last_l_ok = $i; } return unless $i < @d; $_ = $d[$i]; #print STDERR "L $i\n"; 1; } sub l ($) { my ($i) = @_; confess $i unless l_ok $i; }; our $perlop_text = <<'END'; # c&p from man perlop left terms and list operators (leftward) left -> nonassoc ++ -- right ** right ! ~ \ and unary + and - left =~ !~ left * / % x left + - . left << >> nonassoc named unary operators nonassoc < > <= >= lt gt le ge nonassoc == != <=> eq ne cmp ~~ left & left | ^ left && left || // nonassoc .. ... right ?: right = += -= *= etc. goto last next redo dump left , => nonassoc list operators (rightward) right not left and left or xor **= += *= &= &.= <<= &&= -= /= |= |.= >>= ||= .= %= ^= ^.= //= x= END our $perlop_re; sub prep_perlop () { my @ops; foreach (split /\n/, $perlop_text) { next unless m{\S}; s{\s+$}{}; s{^\s+}{}; s{^(?: left | right | nonassoc ) \s+}{}x; next if m{^terms and list operators}; next if m{^named unary}; next if m{^list operators}; s{ and unary.*}{}; s{ etc\. }{ }; s{\?\:}{ ? : }; foreach my $op (split /\s+/) { next unless length $op; next if $op =~ m{^\w+$}; $op =~ s/\W/\\$&/g; push @ops, $op; } } $perlop_re = '(?: '.(join ' | ', @ops).' )'; $perlop_re = qr{$perlop_re}x; #print STDERR "$perlop_re\n"; } prep_perlop(); our ($ifilehead, $ihunkhead, $ichunkstart, $ichunkend); our ($before, $after); sub semiparse ($) { ($_) = @_; my @o; # entries contain # T type # E exact input text (does not contain here doc contents) # P something to print in messages # V value, only for: heredoc string # Q quote characcter, only for: heredoc string for (;;) { s{^\s+}{}; if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) { push @o, { T => 'ident', E => $&, P => $& }; } elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) { my ($q,$d) = ($1,$2); $q //= '"'; push @o, { T => 'heredoc', Q => $q, Delim => $d, E => $&, P => "<<$q$d..." }; s{^ ( .* \n ) ( (?: (?! $d) .* \n )* ) $d \n }{ $1 }x or die "missing end of here doc $d\n"; $o[$#o]{V} = $2; } elsif (s{^ (["'])( (?: [^\\'"] | \\ [^"'] | (?! \1 ) [^"] )* \1 )}{}x) { my ($q,$v) = ($1,$2); push @o, { T => 'string', E => $&, P => "$q-string", Q => $q, V => $v}; } elsif (s{^$perlop_re|\;}{}) { push @o, { T => 'op', E => $&, P => $& }; } elsif (s/[[{(]//) { push @o, { T => 'bra', E => $&, P => $& }; } elsif (s/[]})]//) { push @o, { T => 'ket', E => $&, P => $& }; } elsif (s/^( [\$\@\%] )( \{ )//x) { push @o, { T => 'deref', E => $1, P => $1 }, { T => 'bra', E => $2, P => $2 }; } elsif (s/^ [\$\@\%] [^[^{] //x) { push @o, { T => 'specvar', E => $&, P => $& }; } elsif (!length) { last; } else { m{^.{0,10}}; die "cannot tokenise \`$&'"; } } # coalesce concatenated strings return @o; } sub analyse_chunk_core () { die "plain deletion\n" unless defined $after; die "plain insertion\n" unless defined $before; my @xs = semiparse $before; my @ys = semiparse $after; my $next_something = sub { my ($ary,$var,$what) = @_; die "ran out of $what\n" unless @$ary; $$var = shift @$ary; }; my ($x,$y); my $next_before = sub { $next_something->(\@xs, \$x, 'before'); }; my $next_after = sub { $next_something->(\@ys , \$y, 'after' ); }; my $is_string = sub { $_[0]{T} =~ m/heredoc|string/; }; for (;;) { last unless @xs or @ys; $next_before->(); $next_after->(); next if $x->{E} eq $y->{E}; my $string_changed; if ($y->{E} eq '__' or $y->{E} eq '_f') { $next_after->(); die "__ on non-string $y->{P}\n" unless Sis_string->($y); die "__ on was non-string $y->{P}\n" unless $is_string->($x); if ($y->{Q} ne "'") { die "var subst in new string\n" if $y->{V} =~ m{(?{V} eq $x->{V}; die "__ string quote changed\n" unless $y->{Q} eq $x->{Q}; }; $string_changed = $@; } if ($y->{E} eq '__') { $_ = $y->{V}; die "percent $& in __ ' string\n" if m{\%}; die $string_changed if length $string_changed; next; } if ($y->{E} eq 'f_') { my $fmt = $y->{V}; die "no percent in _f string\n" unless $fmt =~ m{\%}; next unless $string_changed; die "f_ old string '-quoted\n" if length $x->{V}; my $xs = $x->{V}; my $exactly = sub { my ($lit) = @_; my $xl = substr($xs, 0, length($lit)); die "exactly mismatch in $lit\n" unless $xl eq $lit; $xs = substr($xs, length($lit)); }; for (;;) { if ($fmt !~ m{\%[^\%]}) { $exactly->($fmt); $fmt = ''; last; } $exactly->($`); $fmt = $'; if ($& eq '%%') { $exactly->('%'); next; } elsif ($& ne '%s') { die "unhandled %-subst $&\n"; } $next_after->(); die "expected comma, got $y->{P}\n" unless $y->{E} eq ','; if ($xs =~ m{^\@}) { $next_after->(); die "\@... => not string" unless $is_string->($y); die "\@... => $y->{P}" if $y->{Q} ne '"'; $exactly->($y->{V}); next; } my $bras = 0; for (;;) { if (!$bras and !@ys) { last; } $next_after->(); if (!$bras and (grep { $y->{E} eq $_ } qw( or xor and not ; : ) or $y->{T} eq 'ket' )) { unshift @ys, $y; last; } $xs =~ s{^\s+}{}; $exactly->($y->{E}); if ($y->{T} eq 'bra' or $y->{L} eq '?') { $bras++; } elsif ($y->{T} eq 'ket' or $y->{L} eq ':') { die "too many kets at $y->{L}\n" unless $bras; $bras--; } } } next; } die "mismatch $x->{P} => $y->{P}\n"; } } sub analyse_chunk () { for (;;) { eval { analyse_chunk_core(); }; return unless length $@; if ($@ =~ m{^missing end of here doc (\S+)\n}) { # fudge this $before .= "\n$1\n"; $after .= "\n$1\n"; next; } else { die $@; } } } for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) { m{^diff} or next; while (l_ok $ifilehead and m{^diff|^index|^---|^\Q+++\E}) { $ifilehead++ } $ihunkhead = $ifilehead; while (l_ok $ihunkhead) { m{^\@\@} or confess "$ihunkhead $_ ?"; my $i = $ihunkhead + 1; for (; ; $i++) { if (!l_ok $i or m{^ } or m{^\@\@}) { if (defined $ichunkstart) { $ichunkend = $i; eval { analyse_chunk(); 1; }; if (length $@) { print Dumper('REPORT', $ichunkstart, $ichunkend, $before, $after, $@); } $ichunkstart = $ichunkend = $before = $after = undef; } l_ok $i or last; m{^\@\@} and last; } elsif (m{^[-+]}) { my $which = $& eq '-' ? \$before : \$after; $ichunkstart //= $i; $$which //= ''; $$which .= $'; } else { confess "$i $_ ?"; } } $ichunkend = $i; $ihunkhead = $i; } }