From: Jeff Ober Date: Tue, 14 Jan 2020 17:54:44 +0000 (-0500) Subject: First pass on low hanging fruit identified with nytprof X-Git-Tag: nailing-cargo/1.0.0~234^2~39 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=0e203eaf995c66d234176790b76097a31ff84ca5;p=nailing-cargo.git First pass on low hanging fruit identified with nytprof --- diff --git a/lib/TOML/Tiny/Grammar.pm b/lib/TOML/Tiny/Grammar.pm index 3ac4168..739f677 100644 --- a/lib/TOML/Tiny/Grammar.pm +++ b/lib/TOML/Tiny/Grammar.pm @@ -115,27 +115,25 @@ our $TOML = qr{ #----------------------------------------------------------------------------- # Key #----------------------------------------------------------------------------- - (? [-_a-zA-Z0-9]+) + (? [-_a-zA-Z0-9]+) (? (?&BasicString) | (?&StringLiteral)) - (? - (?: (?&BareKey) | (?&QuotedKey) ) - (?: (?&WS) [.] (?&WS) (?: (?&BareKey) | (?&QuotedKey) ) )+ - ) - (? (?&DottedKey) | (?&BareKey) | (?&QuotedKey) ) + (? (?&BareKey) | (?&QuotedKey)) + (? (?&SimpleKey) (?: \x2E (?&SimpleKey) )+) + (? (?&BareKey) | (?&QuotedKey) | (?&DottedKey)) #----------------------------------------------------------------------------- # Boolean #----------------------------------------------------------------------------- - (? \b(?:true)|(?:false)\b) + (? (?: \b (?:true) | (?:false) \b )) #----------------------------------------------------------------------------- # Integer #----------------------------------------------------------------------------- - (? [1-9]) - (? [0-9]) - (? [0-9 a-f A-F]) - (? [0-7]) - (? [01]) + (? [1-9]) + (? [0-9]) + (? [0-9 a-f A-F]) + (? [0-7]) + (? [01]) (? [-+]? 0) (? (?&Zero) | (?: [-+]? (?&DecFirstChar) (?: (?&DecChar) | (?: _ (?&DecChar) ))*)) @@ -148,18 +146,20 @@ our $TOML = qr{ #----------------------------------------------------------------------------- # Float #----------------------------------------------------------------------------- - (? [eE] (?&Dec)) - (? [-+]? (?:inf) | (?:nan)) - (? [.] (?&Dec) ) + (? [eE] (?&Dec)) + (? [-+]? (?:inf) | (?:nan)) + (? [.] (?&Dec) ) (? - (?: - (?: (?&Dec) (?&Fraction) (?&Exponent) ) - | (?: (?&Dec) (?&Exponent) ) - | (?: (?&Dec) (?&Fraction) ) - ) - | - (?&SpecialFloat) + (?: + (?&Dec) + + (?: + (?: (?&Fraction) (?&Exponent)? ) + | (?&Exponent) + ) + ) + | (?&SpecialFloat) ) #----------------------------------------------------------------------------- @@ -168,7 +168,7 @@ our $TOML = qr{ (? \x5C # leading \ (?: - [\x5C"btnfr] # escapes: \\ \b \t \n \f \r + [\x5C"btnfr] # escapes: \\ \" \b \t \n \f \r | (?: u [_0-9a-fA-F]{4} ) # unicode (4 bytes) | (?: U [_0-9a-fA-F]{8} ) # unicode (8 bytes) ) @@ -192,22 +192,26 @@ our $TOML = qr{ (?: " # opening quote (?: # escape sequences or any char except " or \ - (?: (?&EscapeChar) ) - | [^"\\] + [^"\\] + | (?&EscapeChar) )* " # closing quote ) ) (? + (?m) (?s) """ # opening triple-quote (?: - (?: (?&EscapeChar) ) # escaped char - | . + [^"\\] + | "{1,2} # 1-2 quotation marks + | (?&EscapeChar) # escape + | (?: \\ $) )*? """ # closing triple-quote (?-s) + (?-m) ) (? diff --git a/lib/TOML/Tiny/Parser.pm b/lib/TOML/Tiny/Parser.pm index 8da34eb..4d92eea 100644 --- a/lib/TOML/Tiny/Parser.pm +++ b/lib/TOML/Tiny/Parser.pm @@ -9,7 +9,6 @@ use v5.18; use Carp; use Data::Dumper; use List::Util qw(all); -use Scalar::Util qw(looks_like_number); use TOML::Tiny::Grammar; use TOML::Tiny::Tokenizer; use TOML::Tiny::Util qw(is_strict_array); @@ -34,10 +33,7 @@ sub new { } sub next_token { - my $self = shift; - return unless $self->{tokenizer}; - my $token = $self->{tokenizer}->next_token; - return $token; + $_[0]->{tokenizer} && $_[0]->{tokenizer}->next_token; } sub parse { @@ -62,7 +58,7 @@ sub parse { sub parse_error { my ($self, $token, $msg) = @_; - my $line = $token ? $token->line : 'EOF'; + my $line = $token ? $token->{line} : 'EOF'; if ($self->{annotated}) { my $root = Dumper($self->{root}); my $tok = Dumper($token); @@ -91,7 +87,7 @@ $src sub expect_type { my ($self, $token, $expected) = @_; - my $actual = $token->type; + my $actual = $token->{type}; $self->parse_error($token, "expected $expected, but found $actual") unless $actual eq $expected; } @@ -99,7 +95,7 @@ sub expect_type { sub push_keys { my ($self, $token) = @_; - push @{ $self->{keys} }, $token->value; + push @{ $self->{keys} }, $token->{value}; } sub pop_keys { @@ -124,14 +120,15 @@ sub set_keys { } sub scan_to_key { - my ($self, $keys) = @_; + my $self = shift; + my $keys = shift // [ $self->get_keys ]; my $node = $self->{root}; for my $key (@$keys) { if (exists $node->{$key}) { for (ref $node->{$key}) { - $node = $node->{$key} when /HASH/; - $node = $node->{$key}[-1] when /ARRAY/; + $node = $node->{$key} when 'HASH'; + $node = $node->{$key}[-1] when 'ARRAY'; default{ my $full_key = join '.', @$keys; die "$full_key is already defined\n"; @@ -149,10 +146,10 @@ sub scan_to_key { sub parse_table { my $self = shift; - my $token = shift // $self->next_token; + my $token = shift // $self->next_token // return; # may be undef on first token in empty document $self->expect_type($token, 'table'); $self->push_keys($token); - $self->scan_to_key([$self->get_keys]); + $self->scan_to_key; my @keys = $self->get_keys; my $key = join '.', @keys; @@ -177,10 +174,10 @@ sub parse_table { } TOKEN: while (my $token = $self->next_token) { - for ($token->type) { - next TOKEN when /EOL/; + for ($token->{type}) { + next TOKEN when 'EOL'; - when (/key/) { + when ('key') { $self->expect_type($self->next_token, 'assign'); $self->push_keys($token); $self->set_keys; @@ -193,13 +190,13 @@ sub parse_table { } } - when (/array_table/) { + when ('array_table') { $self->pop_keys; @_ = ($self, $token); goto \&parse_array_table; } - when (/table/) { + when ('table') { $self->pop_keys; @_ = ($self, $token); goto \&parse_table; @@ -225,7 +222,7 @@ sub parse_array_table { push @{ $node->{$key} }, {}; TOKEN: while (my $token = $self->next_token) { - for ($token->type) { + for ($token->{type}) { next TOKEN when /EOL/; when (/key/) { @@ -258,28 +255,28 @@ sub parse_key { my $self = shift; my $token = shift // $self->next_token; $self->expect_type($token, 'key'); - return $token->value; + return $token->{value}; } sub parse_value { my $self = shift; my $token = shift // $self->next_token; - for ($token->type) { + for ($token->{type}) { when (/float/) { if ($self->{annotated}) { - return $token->value; + return $token->{value}; } else { use bignum; - return $token->value + 0; + return $token->{value} + 0; } } when (/integer/) { if ($self->{annotated}) { - return $token->value; + return $token->{value}; } else { - for (my $n = $token->value) { + for (my $n = $token->{value}) { use bigint; when (/(?&Oct) $TOML/x) { @@ -302,9 +299,9 @@ sub parse_value { } } - return $token->value when /string/; - return $self->{inflate_boolean}->($token->value) when /bool/; - return $self->{inflate_datetime}->($token->value) when /datetime/; + return $token->{value} when /string/; + return $self->{inflate_boolean}->($token->{value}) when /bool/; + return $self->{inflate_datetime}->($token->{value}) when /datetime/; return $self->parse_inline_table when /inline_table/; return $self->parse_inline_array when /inline_array/; @@ -319,7 +316,7 @@ sub parse_inline_array { my @array; TOKEN: while (my $token = $self->next_token) { - for ($token->type) { + for ($token->{type}) { next TOKEN when /comma/; next TOKEN when /EOL/; last TOKEN when /inline_array_close/; @@ -344,13 +341,13 @@ sub parse_inline_table { my $table = {}; TOKEN: while (my $token = $self->next_token) { - for ($token->type) { + for ($token->{type}) { next TOKEN when /comma/; last TOKEN when /inline_table_close/; when (/key/) { $self->expect_type($self->next_token, 'assign'); - my $key = $token->value->[0]; + my $key = $token->{value}[0]; $table->{ $key } = $self->parse_value; } diff --git a/lib/TOML/Tiny/Tokenizer.pm b/lib/TOML/Tiny/Tokenizer.pm index e3150bb..286bcce 100644 --- a/lib/TOML/Tiny/Tokenizer.pm +++ b/lib/TOML/Tiny/Tokenizer.pm @@ -9,22 +9,15 @@ use v5.18; use TOML::Tiny::Grammar; -use Class::Struct 'TOML::Tiny::Token' => { - type => '$', - line => '$', - pos => '$', - value => '$', -}; - sub new { my ($class, %param) = @_; my $self = bless{ - source => $param{source}, - is_exhausted => 0, - position => 0, - line => 0, - tokens => [], + source => $param{source}, + last_position => length $param{source}, + position => 0, + line => 0, + tokens => [], }, $class; return $self; @@ -37,7 +30,7 @@ sub next_token { return; } - if ($self->{is_exhausted}) { + if ($self->is_exhausted) { return; } @@ -53,7 +46,7 @@ sub next_token { my $token; - while (!defined($token) && !$self->{is_exhausted}) { + while (!defined($token) && !$self->is_exhausted) { for ($self->{source}) { when (/\G (?&NL) $TOML/xgc) { ++$self->{line}; @@ -64,10 +57,36 @@ sub next_token { ; } - when (/\G ((?&Key)) (?= (?&WS) =) $TOML/xgc) { + when (/\G ((?&Key)) (?&WS) (?= =) $TOML/xgc) { $token = $self->_make_token('key', $1); } + when (/\G \[ (?&WS) ((?&Key)) (?&WS) \] (?&WS) (?=(?&NL) | $)$TOML/xgc) { + my $key = $self->tokenize_key($1); + $token = $self->_make_token('table', $key); + } + + when (/\G \[\[ (?&WS) ((?&Key)) (?&WS) \]\] (?&WS) (?=(?&NL) | $) $TOML/xgc) { + my $key = $self->tokenize_key($1); + $token = $self->_make_token('array_table', $key); + } + + when (/\G \[ /xgc) { + $token = $self->_make_token('inline_array', $1); + } + + when (/\G \] /xgc) { + $token = $self->_make_token('inline_array_close', $1); + } + + when (/\G \{ /xgc) { + $token = $self->_make_token('inline_table', $1); + } + + when (/\G \} /xgc) { + $token = $self->_make_token('inline_table_close', $1); + } + when (/\G ((?&Boolean)) $TOML/xgc) { $token = $self->_make_token('bool', $1); } @@ -96,32 +115,6 @@ sub next_token { $token = $self->_make_token('comma', $1); } - when (/\G \[ (?&WS) ((?&Key)) (?&WS) \] (?&WS) (?=(?&NL) | $)$TOML/xgc) { - my $key = $self->tokenize_key($1); - $token = $self->_make_token('table', $key); - } - - when (/\G \[\[ (?&WS) ((?&Key)) (?&WS) \]\] (?&WS) (?=(?&NL) | $) $TOML/xgc) { - my $key = $self->tokenize_key($1); - $token = $self->_make_token('array_table', $key); - } - - when (/\G \[ /xgc) { - $token = $self->_make_token('inline_array', $1); - } - - when (/\G \] /xgc) { - $token = $self->_make_token('inline_array_close', $1); - } - - when (/\G \{ /xgc) { - $token = $self->_make_token('inline_table', $1); - } - - when (/\G \} /xgc) { - $token = $self->_make_token('inline_table_close', $1); - } - default{ my $substr = substr($self->{source}, $self->{position} - 20, 40) // 'undef'; die "toml syntax error on line $self->{line}\n\t--> $substr\n"; @@ -149,19 +142,12 @@ sub pop_token { sub _make_token { my ($self, $type, $value) = @_; - my $token = TOML::Tiny::Token->new( + my $token = { type => $type, line => $self->{line}, pos => $self->{position}, - ); - - $self->update_position; - - if (my $tokenize = $self->can("tokenize_$type")) { - $value = $tokenize->($self, $value); - } - - $token->value($value); + value => $self->can("tokenize_$type") ? $self->can("tokenize_$type")->($self, $value) : $value, + }; return $token; } @@ -173,45 +159,38 @@ sub current_line { substr $rest, 0, $stop; } +sub is_exhausted { + return $_[0]->{position} >= $_[0]->{last_position}; +} + sub update_position { my $self = shift; $self->{position} = pos($self->{source}) // 0; - $self->{is_exhausted} = $self->{position} >= length($self->{source}); } sub error { my $self = shift; my $token = shift; my $msg = shift // 'unknown'; - my $line = $token ? $token->line : $self->{line}; + my $line = $token ? $token->{line} : $self->{line}; die "toml: parse error at line $line: $msg\n"; } sub tokenize_key { my $self = shift; my $toml = shift; + my @keys; - for ($toml) { - my @parts; - - $toml =~ qr{ - ( - (?: - ( (?&QuotedKey) | (?&BareKey) ) - [.]? - (?{push @parts, $^N}) - )+ - ) - $TOML - }x; - - for (@parts) { - s/^["']//; - s/["']$//; - } + while ($toml =~ s/^ ((?&SimpleKey)) [.]? $TOML//x) { + push @keys, $1; + } - return \@parts; + for (@keys) { + s/^["']//; + s/["']$//; } + + return \@keys; } sub tokenize_float { @@ -232,73 +211,56 @@ sub tokenize_integer { sub tokenize_string { my $self = shift; my $toml = shift; - my $str = ''; - - for ($toml) { - when (/^ ((?&MultiLineString)) $TOML/x) { - $str = substr $1, 3, length($1) - 6; - - my @newlines = $str =~ /((?&NL)) $TOML/xg; - $self->{line} += scalar( grep{ defined $_ } @newlines ); + my $ml = $toml =~ /^(?:''')|(?:""")/; + my $lit = $toml =~ /^'/; + my $str = ''; + + if ($ml) { + $str = substr $toml, 3, length($toml) - 6; + my @newlines = $str =~ /(\x0D?\x0A)/g; + $self->{line} += scalar @newlines; + $str =~ s/^(?&WS) (?&NL) $TOML//x; # trim leading whitespace + $str =~ s/\\(?&NL)\s* $TOML//xgs; # trim newlines from lines ending in backslash + } else { + $str = substr($toml, 1, length($toml) - 2); + } - $str =~ s/^(?&WS) (?&NL) $TOML//x; - $str =~ s/\\(?&NL)\s* $TOML//xgs; - $str = $self->unescape_str($str); - } + if (!$lit) { + $str = $self->unescape_str($str); + } - when (/^ ((?&BasicString)) $TOML/x) { - $str = substr($1, 1, length($1) - 2); - $str = $self->unescape_str($str); - } + return ''.$str; +} - when (/^ ((?&MultiLineStringLiteral)) $TOML/x) { - $str = substr $1, 3, length($1) - 6; +sub unescape_chars { + state %esc = ( + '\b' => "\x08", + '\t' => "\x09", + '\n' => "\x0A", + '\f' => "\x0C", + '\r' => "\x0D", + '\"' => "\x22", + '\/' => "\x2F", + '\\\\' => "\x5C", + ); - my @newlines = $str =~ /(?&NL) $TOML/xg; - $self->{line} += scalar( grep{ defined $_ } @newlines ); + if (exists $esc{$_[0]}) { + return $esc{$_[0]}; + } - $str =~ s/^(?&WS) (?&NL) $TOML//x; - $str =~ s/\\(?&NL)\s* $TOML//xgs; - } + my $hex = hex substr($_[0], 2); - when (/^ ((?&StringLiteral)) $TOML/x) { - $str = substr($1, 1, length($1) - 2); - } + if (charnames::viacode($hex)) { + return chr $hex; } - return ''.$str; + return; } -# Adapted from TOML::Parser::Util sub unescape_str { - my $self = shift; - my $str = shift; - - $str =~ s/((?&EscapeChar)) $TOML/ - my $ch = $1; for ($1) { - $ch = "\x08" when '\b'; - $ch = "\x09" when '\t'; - $ch = "\x0A" when '\n'; - $ch = "\x0C" when '\f'; - $ch = "\x0D" when '\r'; - $ch = "\x22" when '\"'; - $ch = "\x2F" when '\/'; - $ch = "\x5C" when '\\\\'; - - default{ - my $hex = hex substr($ch, 2); - if (charnames::viacode($hex)) { - $ch = chr $hex; - } else { - $self->error(undef, "invalid unicode escape: $ch"); - } - } - } - - $ch; - /xge; - - return $str; + state $re = qr/((?&EscapeChar)) $TOML/x; + $_[1] =~ s|$re|unescape_chars($1) // $_[0]->error(undef, "invalid unicode escape: $1")|xge; + $_[1]; } 1;