sub semiparse ($) {
($_) = @_;
my @o;
+ # entries contain
+ # T type
+ # E exact input text (does not contain here doc contents)
+ # P something to print in messages
+ # V value, only for: heredoc string
+ # Q quote characcter, only for: heredoc string
for (;;) {
s{^\s+}{};
if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) {
- push @o, { T => 'ident', L => $& };
+ push @o, { T => 'ident', E => $&, P => $& };
} elsif (s{^\<\<('?)([A-Z_]+)\1}{}) {
my ($q,$d) = ($1,$2);
- push @o, { T => 'heredoc', Q => $q, Delim => $d };
+ push @o, { T => 'heredoc', Q => $q, Delim => $d,
+ E => $&, P => "<<$q$d..." };
s{^
( .* \n )
( (?: (?! $d) .* \n )* )
)*
\1 )}{}x) {
my ($q,$v) = ($1,$2);
- push @o, { T => 'string', Q => $q, V => $v };
+ push @o, { T => 'string', Q => $q, V => $v,
+ I => $&, P => "$q-string" };
} elsif (s{^$perlop_re|\;}{}) {
- push @o, { T => 'op', L => $& };
+ push @o, { T => 'op', E => $&, I => $& };
} elsif (s/[[{(]//) {
- push @o, { T => 'bra', L => $& };
+ push @o, { T => 'bra', E => $&, I => $& };
} elsif (s/[]})]//) {
- push @o, { T => 'ket', L => $& };
+ push @o, { T => 'ket', E => $&, I => $& };
} elsif (s/^( [\$\@\%] )( \{ )//x) {
- push @o, { T => 'deref', L => $1 },
- { T => 'bra', L => $2 };
+ push @o, { T => 'deref', E => $1, I => $1 },
+ { T => 'bra', E => $2, I => $1 };
} elsif (s/^ [\$\@\%] [^[^{] //x) {
- push @o, { T => 'specvar', L => $& };
+ push @o, { T => 'specvar', E => $&, I => $& };
} elsif (!length) {
last;
} else {
die "cannot tokenise \`$&'";
}
}
+ # coalesce concatenated strings
return @o;
}
die "plain insertion\n" unless defined $before;
my @before = semiparse $before;
my @after = semiparse $after;
+ my $next_something = sub {
+ my ($ary,$var,$what) = @_;
+ die "ran out of $what\n" unless @$ary;
+ $$var = shift @$ary;
+ };
+ my $next_before = sub { $next_something->(\@before, \$x, 'before'); };
+ my $next_after = sub { $next_something->(\@after , \$y, 'after' ); };
+ sub $is_string = sub { $_[0]{T} =~ m/heredoc|string/; };
+ for (;;) {
+ last unless @before or @after;
+ $next_before->();
+ $next_after->();
+ next if $x->{I} eq $y->{I};
+ my $string_changed;
+ if ($y->{I} eq '__' or $y->{I} eq '_f') {
+ $next_after->();
+ die "__ on non-string\n" unless Sis_string($y);
+ die "__ on was non-string\n" unless $is_string($x);
+ if ($y->{Q} ne "'") {
+ die "var subst in new str\n" if $y->{V} =~ m{(?<!\\) [\$\@]};
+ }
+ eval {
+ die "__ string changed\n" unless $y->{V} eq $x->{V};
+ die "__ string quote changed\n" unless $y->{Q} eq $x->{Q};
+ };
+ $string_changed = $@;
+ }
+ if ($y->{I} eq '__') {
+ $_ = $y->{V};
+ die "percent in __ ' string\n" if m{\%};
+ die $string_changed if length $not_unchanged;
+ next;
+ }
+ if ($y->{I} eq 'f_') {
+ my $fmt = $y->{V};
+ die "no percent in _f string\n" unless $fmt =~ m{\%};
+ next unless $string_changed;
+ die "f_ old string '-quoted\n" if length $x->{V};
+ my $xs = $x->{V};
+ my $exactly = sub {
+ my ($lit) = @_;
+ my $xl = substr($xs, 0, length($lit));
+ die "exactly mismatch in $lit\n" unless $xl eq $lit;
+ $xs = substr($xs, length($lit));
+ };
+ for (;;) {
+ if ($fmt !~ m{\%[^\%]}) {
+ $exactly->($fmt);
+ $fmt = '';
+ last;
+ }
+ $exactly->($`);
+ $fmt = $';
+ if ($& eq '%%') { $exactly('%'); next; }
+ elsif ($& ne '%s') { die "unhandled %-subst $&\n"; }
+ $next_after->();
+ die "expected comma, got $y->{T}\n" unless $y->{I} eq ',';
+ if ($old =~ m{^\@}) {
+ $next_after->();
+ die "\@... => not string" unless $is_string($y);
+ $exactly->($y->{V});
+ next;
+ }
+ my $bras = 0;
+ for (;;) {
+ if (!$bra and !@after) {
+ last;
+ }
+ $next_after->();
+ if (!$bra and
+ (grep { $y->{I} eq $_ } qw( or xor and not ; : )
+ or $y->{T} eq 'ket'
+ )) {
+ unshift @after, $ys;
+ last;
+ }
+ $xs =~ s{^\s+}{};
+ $exactly->{$y->{I});
+ if ($y->{T} eq 'bra' or $y->{L} eq '?') {
+ $bras++;
+ } elsif ($y->{T} eq 'ket' or $y->{L} eq ':') {
+ die "too many kets at $y->{L}\n" unless $bras;
+ $bras--;
+ }
+ }
+ }
+ next;
+ }
+ die "mismatch
+
+ $exact->
+
+ if ($fmt =~ m{
+ if
+
+ if ($y->{Q} eq $x->{Q} && $y->
+
+ $y = shift @after;
print Dumper($ichunkstart, $ichunkend, \@before, \@after);
flush STDOUT;
}