From 52e71d62f94d08b9d6642bdd9226191d4b3c1173 Mon Sep 17 00:00:00 2001 From: Jeff Ober Date: Thu, 16 Jan 2020 14:04:22 -0500 Subject: [PATCH] Float optimizations --- lib/TOML/Tiny/Grammar.pm | 2 +- lib/TOML/Tiny/Parser.pm | 4 ++-- lib/TOML/Tiny/Tokenizer.pm | 38 +++++++++++++++++++++++++------------- t/tokens/float.t | 1 + 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/lib/TOML/Tiny/Grammar.pm b/lib/TOML/Tiny/Grammar.pm index fa2ba29..73c58ec 100644 --- a/lib/TOML/Tiny/Grammar.pm +++ b/lib/TOML/Tiny/Grammar.pm @@ -158,7 +158,7 @@ our $TOML = qr{ #----------------------------------------------------------------------------- (? [eE] (?&Dec)) (? [-+]? (?> (?:inf) | (?:nan))) - (? [.] (?&Dec) ) + (? [.] (?&DecChar) (?> _? (?&DecChar) )* ) (? (?> diff --git a/lib/TOML/Tiny/Parser.pm b/lib/TOML/Tiny/Parser.pm index 4d92eea..ab0733e 100644 --- a/lib/TOML/Tiny/Parser.pm +++ b/lib/TOML/Tiny/Parser.pm @@ -59,10 +59,10 @@ sub parse { sub parse_error { my ($self, $token, $msg) = @_; my $line = $token ? $token->{line} : 'EOF'; - if ($self->{annotated}) { + if ($self->{annotated} || $ENV{TOML_TINY_DEBUG}) { my $root = Dumper($self->{root}); my $tok = Dumper($token); - my $src = substr $self->{tokenizer}{source}, $self->{tokenizer}{position} - 20, 40; + my $src = substr $self->{tokenizer}{source}, $self->{tokenizer}{position}, 30; confess qq{ toml parse error at line $line: diff --git a/lib/TOML/Tiny/Tokenizer.pm b/lib/TOML/Tiny/Tokenizer.pm index ea71113..51f78bd 100644 --- a/lib/TOML/Tiny/Tokenizer.pm +++ b/lib/TOML/Tiny/Tokenizer.pm @@ -16,13 +16,23 @@ sub new { source => $param{source}, last_position => length $param{source}, position => 0, - line => 0, + line => 1, tokens => [], }, $class; return $self; } +sub prev_token_type { + my $self = shift; + + if (@{$self->{tokens}}) { + return $self->{tokens}[-1]{type} // 'EOL'; + } + + return 'EOL'; +} + sub next_token { my $self = shift; @@ -44,26 +54,28 @@ sub next_token { state $key = qr/(?&Key) $TOML/x; while ($self->{position} < $self->{last_position} && !$token) { + my $prev = $self->prev_token_type; + my $newline = !!($prev eq 'EOL' || $prev eq 'table' || $prev eq 'array_table'); + for ($self->{source}) { - when (/\G [\x20 \x09]+/xgc) { - ; - } + /\G[\x20\x09]+/gc; # ignore whitespace + /\G\x23.*/gc && next; # ignore comments - when (/\G \x23 .*/xgc) { - ; - } + last when /\G $/xgc; when (/\G \x0D? \x0A/xgc) { ++$self->{line}; $token = $self->_make_token('EOL'); } - when (/\G \[ [\x20 \x09]* ($key) [\x20 \x09]* \] [\x20 \x09]* (?= (:? \x23 .* )? (?: \x0D? \x0A) | $ )/xgc) { - $token = $self->_make_token('table', $self->tokenize_key($1)); - } + if ($newline) { + when (/\G \[ [\x20 \x09]* ($key) [\x20 \x09]* \] [\x20 \x09]* (?= (:? \x23 .* )? (?: \x0D? \x0A) | $ )/xgc) { + $token = $self->_make_token('table', $self->tokenize_key($1)); + } - when (/\G \[\[ [\x20 \x09]* ($key) [\x20 \x09]* \]\] [\x20 \x09]* (?= (:? \x23 .* )? (?: \x0D? \x0A) | $ )/xgc) { - $token = $self->_make_token('array_table', $self->tokenize_key($1)); + when (/\G \[\[ [\x20 \x09]* ($key) [\x20 \x09]* \]\] [\x20 \x09]* (?= (:? \x23 .* )? (?: \x0D? \x0A) | $ )/xgc) { + $token = $self->_make_token('array_table', $self->tokenize_key($1)); + } } when (/\G \[ /xgc) { @@ -91,7 +103,7 @@ sub next_token { } when (/\G ($key) [\x20 \x09]* (?= =)/xgc) { - $token = $self->_make_token('key', $1); + $token = $self->_make_token('key', $1); } when (/\G ((?&Boolean)) $TOML/xgc) { diff --git a/t/tokens/float.t b/t/tokens/float.t index c807688..79e4ed0 100644 --- a/t/tokens/float.t +++ b/t/tokens/float.t @@ -4,6 +4,7 @@ use TOML::Tiny::Grammar; my $re = qr{ ((?&Float)) $TOML }x; my @valid = qw( + 0.01 +1.0 3.1415 -0.01 -- 2.30.2