From: Jeff Ober Date: Fri, 10 Jan 2020 18:07:05 +0000 (-0500) Subject: Disallow invalid escapes, parse error on invalid unicode escapes X-Git-Tag: nailing-cargo/1.0.0~234^2~56 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=9435b01f0244d01b24c3c7ad9ad8d0ec199c8320;p=nailing-cargo.git Disallow invalid escapes, parse error on invalid unicode escapes --- diff --git a/lib/TOML/Tiny/Grammar.pm b/lib/TOML/Tiny/Grammar.pm index 7dff53f..03fc43c 100644 --- a/lib/TOML/Tiny/Grammar.pm +++ b/lib/TOML/Tiny/Grammar.pm @@ -173,7 +173,7 @@ our $TOML = qr{ (? \x5C # leading \ (?: - [\x5C/"btnfr] # escapes: \\ \/ \b \t \n \f \r + [\x5C"btnfr] # escapes: \\ \b \t \n \f \r | (?: u [_0-9a-fA-F]{4} ) # unicode (4 bytes) | (?: U [_0-9a-fA-F]{8} ) # unicode (8 bytes) ) diff --git a/lib/TOML/Tiny/Tokenizer.pm b/lib/TOML/Tiny/Tokenizer.pm index bc52df5..46c9a06 100644 --- a/lib/TOML/Tiny/Tokenizer.pm +++ b/lib/TOML/Tiny/Tokenizer.pm @@ -3,9 +3,9 @@ package TOML::Tiny::Tokenizer; use strict; use warnings; no warnings qw(experimental); +use charnames qw(:full); use v5.18; -use Carp; use TOML::Tiny::Grammar; use Class::Struct 'TOML::Tiny::Token' => { @@ -182,7 +182,7 @@ sub error { my $token = shift; my $msg = shift // 'unknown'; my $line = $token ? $token->line : $self->{line}; - croak "toml: parse error at line $line: $msg\n"; + die "toml: parse error at line $line: $msg\n"; } sub tokenize_key { @@ -241,12 +241,12 @@ sub tokenize_string { $str =~ s/^(?&WS) (?&NL) $TOML//x; $str =~ s/\\(?&NL)\s* $TOML//xgs; - $str = unescape_str($str); + $str = $self->unescape_str($str); } when (/^ ((?&BasicString)) $TOML/x) { $str = substr($1, 1, length($1) - 2); - $str = unescape_str($str); + $str = $self->unescape_str($str); } when (/^ ((?&MultiLineStringLiteral)) $TOML/x) { @@ -269,6 +269,7 @@ sub tokenize_string { # Adapted from TOML::Parser::Util sub unescape_str { + my $self = shift; my $str = shift; $str =~ s/((?&EscapeChar)) $TOML/ @@ -281,11 +282,13 @@ sub unescape_str { $ch = "\x22" when '\"'; $ch = "\x2F" when '\/'; $ch = "\x5C" when '\\\\'; + default{ - my $c = substr $1, 2; - $c = chr(hex($c)); - if ($c ne "\0") { - $ch = $c; + my $hex = hex substr($ch, 2); + if (charnames::viacode($hex)) { + $ch = chr $hex; + } else { + $self->error(undef, "invalid unicode escape: $ch"); } } }