X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=dgit-junk.git;a=blobdiff_plain;f=i18n-diff-auditor;h=b9bd4e50e17e1643c894ad961bfbe2e7771f594a;hp=8252c3f1ba4d6f9b5517c2b8c08c8a74a25f98be;hb=9f63cef14f7581cabba2ccfd4ac4d23ccabe5042;hpb=81019d41a3311596539218f80a003840ea9cb4a3

diff --git a/i18n-diff-auditor b/i18n-diff-auditor
index 8252c3f..b9bd4e5 100755
--- a/i18n-diff-auditor
+++ b/i18n-diff-auditor
@@ -76,6 +76,7 @@ sub prep_perlop () {
 	next if m{^list operators};
 	s{ and unary.*}{};
 	s{ etc\. }{ };
+	s{\?\:}{ ? : };
 	foreach my $op (split /\s+/) {
 	    next unless length $op;
 	    next if $op =~ m{^\w+$};
@@ -96,27 +97,46 @@ our ($before, $after);
 sub semiparse ($) {
     ($_) = @_;
     my @o;
+    # entries contain
+    #   T     type
+    #   E     exact input text (does not contain here doc contents)
+    #   P     something to print in messages
+    #   V     value, only for: heredoc string
+    #   Q     quote characcter, only for: heredoc string
     for (;;) {
 	s{^\s+}{};
-	if (s{^[\$\@\%][_0-9a-zA-Z]+}{}) {
-	    push @o, { T => 'ident', L => $& };
-	} elsif (s{^\<\<('?)([A-Z_]+)\1}{}) {
+	if (s{^[\$\@\%]?[_0-9a-zA-Z]+}{}) {
+	    push @o, { T => 'ident', E => $&, P => $& };
+	} elsif (s{^\<\<(['"]?)([A-Z_]+)\1}{}) {
 	    my ($q,$d) = ($1,$2);
-	    push @o, { T => 'heredoc', Q => $q, Delim => $d };
+	    $q //= '"';
+	    push @o, { T => 'heredoc', Q => $q, Delim => $d,
+		       E => $&, P => "<<$q$d..." };
 	    s{^
                 (             .* \n    )
                 ( (?: (?! $d) .* \n )* )
-              }{ $1 } or die "missing end of here doc $d\n";
+                          $d     \n
+              }{ $1 }x or die "missing end of here doc $d\n";
 	    $o[$#o]{V} = $2;
-	} elsif (s{^ (["'])( (?:  [^\\] | \\ \1  )* )}{}x) {
+	} elsif (s{^ (["'])( (?: [^\\'"]
+                               | \\ [^"']
+                               | (?! \1 ) [^"]
+                              )*
+                       \1 )}{}x) {
 	    my ($q,$v) = ($1,$2);
-	    push @o, { T => 'string', Q => $q, V => $v };
-	} elsif (s{^$perlop_re}{}) {
-	    push @o, { T => 'op', L => $& };
+	    push @o, { T => 'string', E => $&, P => "$q-string",
+		       Q => $q, V => $v};
+	} elsif (s{^$perlop_re|\;}{}) {
+	    push @o, { T => 'op', E => $&, P => $& };
 	} elsif (s/[[{(]//) {
-	    push @o, { T => 'bra', L => $& };
+	    push @o, { T => 'bra', E => $&, P => $& };
 	} elsif (s/[]})]//) {
-	    push @o, { T => 'ket', L => $& };
+	    push @o, { T => 'ket', E => $&, P => $& };
+	} elsif (s/^( [\$\@\%] )( \{ )//x) {
+	    push @o, { T => 'deref', E => $1, P => $1 },
+		     { T => 'bra',   E => $2, P => $2 };
+	} elsif (s/^ [\$\@\%] [^[^{] //x) {
+	    push @o, { T => 'specvar', E => $&, P => $& };
 	} elsif (!length) {
 	    last;
 	} else {
@@ -124,15 +144,124 @@ sub semiparse ($) {
 	    die "cannot tokenise \`$&'";
 	}
     }
+    # coalesce concatenated strings
+    return @o;
 }	    
 
-sub analyse_chunk () {
+sub analyse_chunk_core () {
     die "plain deletion\n" unless defined $after;
     die "plain insertion\n" unless defined $before;
-    my @before = semiparse $before;
-    my @after = semiparse $after;
-    print Dumper($ichunkstart, $ichunkend, \@before, \@after);
-    flush STDOUT;
+    my @xs = semiparse $before;
+    my @ys = semiparse $after;
+    my $next_something = sub {
+	my ($ary,$var,$what) = @_;
+	die "ran out of $what\n" unless @$ary;
+	$$var = shift @$ary;
+    };
+    my ($x,$y);
+    my $next_x = sub { $next_something->(\@xs, \$x, 'before'); };
+    my $next_y = sub { $next_something->(\@ys, \$y, 'after' ); };
+    my $is_string = sub { $_[0]{T} =~ m/heredoc|string/; };
+    for (;;) {
+	last unless @xs or @ys;
+	$next_x->();
+	$next_y->();
+	next if $x->{E} eq $y->{E};
+	my $string_changed;
+	my $ye = $y->{E};
+	if ($ye eq '__' or $ye eq 'f_') {
+	    $next_y->();
+	    die "__ on non-string $y->{P}\n"     unless $is_string->($y);
+	    die "__ on was non-string $y->{P}\n" unless $is_string->($x);
+	    if ($y->{Q} ne "'") {
+		die "var subst in new string\n"
+		    if $y->{V} =~ m{(?<!\\) [\$\@]};
+	    }
+	    eval {
+		die "__ string changed\n"       unless $y->{V} eq $x->{V};
+		die "__ string quote changed\n" unless $y->{Q} eq $x->{Q};
+	    };
+	    $string_changed = $@;
+	}
+	if ($ye eq '__') {
+	    $_ = $y->{V};
+	    die "percent $& in __ ' string\n" if m{\%};
+	    die $string_changed if length $string_changed;
+	    next;
+	}
+	if ($ye eq 'f_') {
+	    my $fmt = $y->{V};
+	    die "no percent in _f string\n" unless $fmt =~ m{\%};
+	    next unless $string_changed;
+	    die "f_ old string '-quoted\n" if length $x->{V};
+	    my $xs = $x->{V};
+	    my $exactly = sub {
+		my ($lit) = @_;
+		my $xl = substr($xs, 0, length($lit));
+		die "exactly mismatch in $lit\n" unless $xl eq $lit;
+		$xs = substr($xs, length($lit));
+	    };
+	    for (;;) {
+		if ($fmt !~ m{\%[^\%]}) {
+		    $exactly->($fmt);
+		    $fmt = '';
+		    last;
+		}
+		$exactly->($`);
+		$fmt = $';
+		if ($& eq '%%') { $exactly->('%'); next; }
+		elsif ($& ne '%s') { die "unhandled %-subst $&\n"; }
+		$next_y->();
+		die "expected comma, got $y->{P}\n" unless $y->{E} eq ',';
+		if ($xs =~ m{^\@}) {
+		    $next_y->();
+		    die "\@... => not string" unless $is_string->($y);
+		    die "\@... => $y->{P}" if $y->{Q} ne '"';
+		    $exactly->($y->{V});
+		    next;
+		}
+		my $bras = 0;
+		for (;;) {
+		    if (!$bras and !@ys) {
+			last;
+		    }
+		    $next_y->();
+		    if (!$bras and
+			(grep { $y->{E} eq $_ } qw( or xor and not ; : )
+			 or $y->{T} eq 'ket'
+			)) {
+			unshift @ys, $y;
+			last;
+		    }
+		    $xs =~ s{^\s+}{};
+		    $exactly->($y->{E});
+		    if ($y->{T} eq 'bra' or $y->{L} eq '?') {
+			$bras++;
+		    } elsif ($y->{T} eq 'ket' or $y->{L} eq ':') {
+			die "too many kets at $y->{L}\n" unless $bras;
+			$bras--;
+		    }
+		}
+	    }
+	    next;
+	}
+	die "mismatch $x->{P} => $y->{P}\n";
+    }
+}
+
+sub analyse_chunk () {
+    for (;;) {
+	eval { analyse_chunk_core(); };
+	return unless length $@;
+	if ($@ =~ m{^missing end of here doc (\S+)\n}) {
+	    # fudge this
+	    $before .= "\n$1\n";
+	    $after .= "\n$1\n";
+	    next;
+	} else {
+	    die $@;
+	}
+    }
 }
 
 for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) {
@@ -148,7 +277,8 @@ for ($ifilehead = 0; l_ok $ifilehead; $ifilehead++) {
 		    $ichunkend = $i;
 		    eval { analyse_chunk(); 1; };
 		    if (length $@) {
-			print Dumper($ichunkstart, $ichunkend,
+			print Dumper('REPORT',
+				     $ichunkstart, $ichunkend,
 				     $before, $after,
 				     $@);
 		    }