i18n: i18n-diff-auditor: handle invented here doc end differently

[dgit.git] / i18n-diff-auditor
diff --git a/i18n-diff-auditor b/i18n-diff-auditor

index 853dccea8b4ab009d7584a9f33e5b573a4430d44..13454197c7a4be61f62dddce2f2a57bbe3f29435 100755 (executable)
--- a/i18n-diff-auditor
+++ b/i18n-diff-auditor
@@ -110,9 +110,19 @@ our ($before, $after);
  sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }
  sub is_trans ($) { grep { $_[0]{E} eq $_ } qw(__ f_ i_); }
  
  sub is_string ($) { $_[0]{T} =~ m/heredoc|string/; }
  sub is_trans ($) { grep { $_[0]{E} eq $_ } qw(__ f_ i_); }
  
+sub qp ($) {
+    my ($p) = @_;
+    $p =~ s{\\}{\\\\}g;
+    $p =~ s{\'}{\\'}g;
+    $p =~ s{\n}{\\n}g;
+    $p =~ s{\t}{\\t}g;
+    return "'$p'";
+};
+
  sub semiparse ($) {
      ($_) = @_;
      my @o;
  sub semiparse ($) {
      ($_) = @_;
      my @o;
+    #my $in = $_;
      # entries contain
      #   T     type
      #   E     exact input text (does not contain here doc contents)
      # entries contain
      #   T     type
      #   E     exact input text (does not contain here doc contents)
@@ -128,12 +138,17 @@ sub semiparse ($) {
             $q ||= '"';
             push @o, { T => 'heredoc', Q => $q, Delim => $d,
                        E => $&, P => "<<$q$d$q" };
             $q ||= '"';
             push @o, { T => 'heredoc', Q => $q, Delim => $d,
                        E => $&, P => "<<$q$d$q" };
-           s{^
-                (             .* \n    )
-                ( (?: (?! $d) .* \n )* )
-                          $d     \n
-              }{ $1 }x or die "missing end of here doc $d\n";
-           $o[$#o]{V} = $2;
+           if (s{^
+                   (                 .* \n     )
+                   ( (?: (?! $d \n ) .* \n )*? )
+                             $d         \n
+                }{ $1 }xe) {
+               $o[$#o]{V} = $2;
+            } else {
+               s{ \n (.*) }{ \n }s;
+               $o[$#o]{V} = $1;
+               $o[$#o]{Invented} = 1;
+           }
         } elsif (s{^ (["'])( (?: [^\\'"]
                                 | \\ [^"']
                                 | (?! \1 ) [^"]
         } elsif (s{^ (["'])( (?: [^\\'"]
                                 | \\ [^"']
                                 | (?! \1 ) [^"]
@@ -144,9 +159,9 @@ sub semiparse ($) {
                        Q => $q, V => $v};
         } elsif (s{^$perlop_re|^\;}{}) {
             push @o, { T => 'op', E => $&, P => $& };
                        Q => $q, V => $v};
         } elsif (s{^$perlop_re|^\;}{}) {
             push @o, { T => 'op', E => $&, P => $& };
-       } elsif (s/[[{(]//) {
+       } elsif (s/^[[{(]//) {
             push @o, { T => 'bra', E => $&, P => $& };
             push @o, { T => 'bra', E => $&, P => $& };
-       } elsif (s/[]})]//) {
+       } elsif (s/^[]})]//) {
             push @o, { T => 'ket', E => $&, P => $& };
         } elsif (s/^( [\$\@\%] )( \{ )//x) {
             push @o, { T => 'deref', E => $1, P => $1 },
             push @o, { T => 'ket', E => $&, P => $& };
         } elsif (s/^( [\$\@\%] )( \{ )//x) {
             push @o, { T => 'deref', E => $1, P => $1 },
@@ -160,12 +175,14 @@ sub semiparse ($) {
             die "cannot tokenise \`$&'";
         }
      }
             die "cannot tokenise \`$&'";
         }
      }
-    for (my $i=@o-2; $i>0; --$i) {
+    for (my $i=0; $i+2 < @o; $i++) {
         next unless $o[$i+1]{E} eq '.';
         my @inputs = @o[$i, $i+2];
         next unless $o[$i+1]{E} eq '.';
         my @inputs = @o[$i, $i+2];
+       #print STDERR Dumper(\@inputs);
         next if grep { !is_string($_) } @inputs;
         my $q = $inputs[0]{Q};
         next if grep { $_->{Q} ne $q } @inputs;
         next if grep { !is_string($_) } @inputs;
         my $q = $inputs[0]{Q};
         next if grep { $_->{Q} ne $q } @inputs;
+       next if grep { $_->{Invented} } @inputs;
         my $new = { T => 'joinedstrings',
                     E => (join '.', map { $_->{E} } @inputs),
                     P => (join '.', map { $_->{P} } @inputs),
         my $new = { T => 'joinedstrings',
                     E => (join '.', map { $_->{E} } @inputs),
                     P => (join '.', map { $_->{P} } @inputs),
@@ -173,8 +190,10 @@ sub semiparse ($) {
                     Q => $q,
                   };
         @o = (@o[0..$i-1], $new, @o[$i+3..$#o]);
                     Q => $q,
                   };
         @o = (@o[0..$i-1], $new, @o[$i+3..$#o]);
+       $i--; # counteracts $i++
      }
      debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o;
      }
      debug $ichunkstart, "semiparsed: ".join ' ', map { $_->{P} } @o;
+    # debug $ichunkstart, "semiparsed V: ".join ' ', map { defined $_->{V} ? ">$_->{V}<" : '-' } @o;
      return @o;
  }          
  
      return @o;
  }          
  
@@ -208,12 +227,22 @@ sub analyse_chunk_core () {
         $next_x->();
         $next_y->();
         next if $x->{E} eq $y->{E};
         $next_x->();
         $next_y->();
         next if $x->{E} eq $y->{E};
+       next if $x->{E} eq 'sprintf' and $y->{E} eq 'f_';
+       next if $x->{E} eq 'die'     and $y->{E} eq 'confess';
         if ($y->{E} eq '+'
             and @ys >= 3
             and $ys[0]{E} eq '('
             and is_trans($ys[1])) {
             $next_y->(); # (
         if ($y->{E} eq '+'
             and @ys >= 3
             and $ys[0]{E} eq '('
             and is_trans($ys[1])) {
             $next_y->(); # (
-           $next_y->(); # __ f_
+           $next_y->(); # __ f_ i_
+           @y_expect_suffix = ')';
+       } elsif ($y->{E} eq '('
+           and @ys > 2
+           and is_trans($ys[0])
+           and @analysed_y
+           and (grep { $_ eq $analysed_y[-1] } (qw( => [ { ? : . ),
+                                                '(', ',') )) {
+           $next_y->(); # __ f_ i_
             @y_expect_suffix = ')';
         }
         my $string_changed;
             @y_expect_suffix = ')';
         }
         my $string_changed;
@@ -221,7 +250,7 @@ sub analyse_chunk_core () {
         if (is_trans($y)) {
             $next_y->();
             die "__ on non-string $y->{P}\n"     unless is_string($y);
         if (is_trans($y)) {
             $next_y->();
             die "__ on non-string $y->{P}\n"     unless is_string($y);
-           die "__ on was non-string $y->{P}\n" unless is_string($x);
+           die "__ on was non-string $x->{P}\n" unless is_string($x);
             if ($y->{Q} ne "'") {
                 die "var subst in new string\n"
                     if $y->{V} =~ m{(?<!\\) [\$\@]};
             if ($y->{Q} ne "'") {
                 die "var subst in new string\n"
                     if $y->{V} =~ m{(?<!\\) [\$\@]};
@@ -252,17 +281,20 @@ sub analyse_chunk_core () {
                 my ($lit, $what) = @_;
                 my $xl = substr($xs, 0, length($lit));
                 if ($xl ne $lit) {
                 my ($lit, $what) = @_;
                 my $xl = substr($xs, 0, length($lit));
                 if ($xl ne $lit) {
-                   debug $ichunkstart, "not exactly x: ..\"$xs\"";
-                   debug $ichunkstart, "not exactly y:    $lit";
+                   debug $ichunkstart, "not exactly x: ..".qp($xs);
+                   debug $ichunkstart, "not exactly y:   ".qp($lit);
                     my $next = @ys ? $ys[0]{P} : '(end)';
                     die "string contents mismatch near $what before $next\n";
                 }
                 $xs = substr($xs, length($lit));
             };
             for (;;) {
                     my $next = @ys ? $ys[0]{P} : '(end)';
                     die "string contents mismatch near $what before $next\n";
                 }
                 $xs = substr($xs, length($lit));
             };
             for (;;) {
+               #print STDERR Dumper($fmt, $xs, \@xs, @ys);
                 if ($fmt !~ m{\%[^\%]}) {
                     $exactly->($fmt, '(tail)');
                     $fmt = '';
                 if ($fmt !~ m{\%[^\%]}) {
                     $exactly->($fmt, '(tail)');
                     $fmt = '';
+                   die "text deleted from end of string: ".qp($xs)."\n"
+                       if length $xs;
                     last;
                 }
                 $exactly->($`, '(literal)');
                     last;
                 }
                 $exactly->($`, '(literal)');
@@ -271,6 +303,15 @@ sub analyse_chunk_core () {
                 elsif ($& ne '%s') { die "unhandled %-subst $&\n"; }
                 $next_y->();
                 die "expected comma, got $y->{P}\n" unless $y->{E} eq ',';
                 elsif ($& ne '%s') { die "unhandled %-subst $&\n"; }
                 $next_y->();
                 die "expected comma, got $y->{P}\n" unless $y->{E} eq ',';
+               if (!length $fmt and
+                   !length $xs and
+                   @xs and
+                   $xs[0]{E} eq '.') {
+                   # X has   "<earlier>" .                <something>
+                   # Y has   "<earlier>%s" [other args] , <something>
+                   $next_x->(); # eat the '.'
+                   next;
+               }
                 if ($xs =~ m{^\@}) {
                     $next_y->();
                     die "\@... => not string" unless is_string($y);
                 if ($xs =~ m{^\@}) {
                     $next_y->();
                     die "\@... => not string" unless is_string($y);
@@ -296,7 +337,11 @@ sub analyse_chunk_core () {
                         pop @analysed_y;
                         last;
                     }
                         pop @analysed_y;
                         last;
                     }
-                   $xs =~ s{^\s+}{};
+                   $xs =~ s{^\s+}{} if $bras;
+                   if (is_string($y) and $y->{Q} eq '"') {
+                       $exactly->($y->{V}, $y->{P});
+                       next;
+                   }
                     $exactly->($y->{E}, $y->{P});
                     if ($y->{T} eq 'bra' or $y->{E} eq '?') {
                         $bras++;
                     $exactly->($y->{E}, $y->{P});
                     if ($y->{T} eq 'bra' or $y->{E} eq '?') {
                         $bras++;
@@ -318,6 +363,8 @@ sub analyse_chunk () {
         return unless length $@;
         if ($@ =~ m{^missing end of here doc (\S+)\n}) {
             # fudge this
         return unless length $@;
         if ($@ =~ m{^missing end of here doc (\S+)\n}) {
             # fudge this
+           # (this never happens now, but in the future we might
+           # want this code again eg to try adding to the chunk)
             $before .= "\n$1\n";
             $after .= "\n$1\n";
             next;
             $before .= "\n$1\n";
             $after .= "\n$1\n";
             next;