Disallow invalid escapes, parse error on invalid unicode escapes

author Jeff Ober <jober@ziprecruiter.com>

Fri, 10 Jan 2020 18:07:05 +0000 (13:07 -0500)

committer Jeff Ober <jober@ziprecruiter.com>

Fri, 10 Jan 2020 18:07:05 +0000 (13:07 -0500)
author Jeff Ober <jober@ziprecruiter.com>
Fri, 10 Jan 2020 18:07:05 +0000 (13:07 -0500)
committer Jeff Ober <jober@ziprecruiter.com>
Fri, 10 Jan 2020 18:07:05 +0000 (13:07 -0500)
diff --git a/lib/TOML/Tiny/Grammar.pm b/lib/TOML/Tiny/Grammar.pm

index 7dff53f62ae3ac169b572068848f0da35656e089..03fc43c0ed64f7e61f11ccd5972342fc41185d35 100644 (file)
--- a/lib/TOML/Tiny/Grammar.pm
+++ b/lib/TOML/Tiny/Grammar.pm
@@ -173,7 +173,7 @@ our $TOML = qr{
    (?<EscapeChar>
      \x5C                        # leading \
      (?:
-        [\x5C/"btnfr]           # escapes: \\ \/ \b \t \n \f \r
+        [\x5C"btnfr]            # escapes: \\ \b \t \n \f \r
        | (?: u [_0-9a-fA-F]{4} ) # unicode (4 bytes)
        | (?: U [_0-9a-fA-F]{8} ) # unicode (8 bytes)
      )
diff --git a/lib/TOML/Tiny/Tokenizer.pm b/lib/TOML/Tiny/Tokenizer.pm

index bc52df537cbd55639fd310ef6c6787f9feb9310e..46c9a0695af7d24724f7721abb979a3155044912 100644 (file)
--- a/lib/TOML/Tiny/Tokenizer.pm
+++ b/lib/TOML/Tiny/Tokenizer.pm
@@ -3,9 +3,9 @@ package TOML::Tiny::Tokenizer;
  use strict;
  use warnings;
  no warnings qw(experimental);
+use charnames qw(:full);
  use v5.18;
  
-use Carp;
  use TOML::Tiny::Grammar;
  
  use Class::Struct 'TOML::Tiny::Token' => {
@@ -182,7 +182,7 @@ sub error {
    my $token = shift;
    my $msg   = shift // 'unknown';
    my $line  = $token ? $token->line : $self->{line};
-  croak "toml: parse error at line $line: $msg\n";
+  die "toml: parse error at line $line: $msg\n";
  }
  
  sub tokenize_key {
@@ -241,12 +241,12 @@ sub tokenize_string {
  
        $str =~ s/^(?&WS) (?&NL) $TOML//x;
        $str =~ s/\\(?&NL)\s* $TOML//xgs;
-      $str = unescape_str($str);
+      $str = $self->unescape_str($str);
      }
  
      when (/^ ((?&BasicString)) $TOML/x) {
        $str = substr($1, 1, length($1) - 2);
-      $str = unescape_str($str);
+      $str = $self->unescape_str($str);
      }
  
      when (/^ ((?&MultiLineStringLiteral)) $TOML/x) {
@@ -269,6 +269,7 @@ sub tokenize_string {
  
  # Adapted from TOML::Parser::Util
  sub unescape_str {
+  my $self = shift;
    my $str = shift;
  
    $str =~ s/((?&EscapeChar)) $TOML/
@@ -281,11 +282,13 @@ sub unescape_str {
        $ch = "\x22" when '\"';
        $ch = "\x2F" when '\/';
        $ch = "\x5C" when '\\\\';
+
        default{
-        my $c = substr $1, 2;
-        $c = chr(hex($c));
-        if ($c ne "\0") {
-          $ch = $c;
+        my $hex = hex substr($ch, 2);
+        if (charnames::viacode($hex)) {
+          $ch = chr $hex;
+        } else {
+          $self->error(undef, "invalid unicode escape: $ch");
          }
        }
      }
author	Jeff Ober <jober@ziprecruiter.com>
	Fri, 10 Jan 2020 18:07:05 +0000 (13:07 -0500)
committer	Jeff Ober <jober@ziprecruiter.com>
	Fri, 10 Jan 2020 18:07:05 +0000 (13:07 -0500)
lib/TOML/Tiny/Grammar.pm		patch \| blob \| history
lib/TOML/Tiny/Tokenizer.pm		patch \| blob \| history