First pass on low hanging fruit identified with nytprof

author Jeff Ober <jober@ziprecruiter.com>

Tue, 14 Jan 2020 17:54:44 +0000 (12:54 -0500)

committer Jeff Ober <jober@ziprecruiter.com>

Tue, 14 Jan 2020 17:54:44 +0000 (12:54 -0500)
author Jeff Ober <jober@ziprecruiter.com>
Tue, 14 Jan 2020 17:54:44 +0000 (12:54 -0500)
committer Jeff Ober <jober@ziprecruiter.com>
Tue, 14 Jan 2020 17:54:44 +0000 (12:54 -0500)
diff --git a/lib/TOML/Tiny/Grammar.pm b/lib/TOML/Tiny/Grammar.pm

index 3ac41681b9efd40ecc53e793e1fd0f7006ae7bf3..739f677fcb44a669a7c7e371d3125a2b574ce1aa 100644 (file)
--- a/lib/TOML/Tiny/Grammar.pm
+++ b/lib/TOML/Tiny/Grammar.pm
@@ -115,27 +115,25 @@ our $TOML = qr{
    #-----------------------------------------------------------------------------
    # Key
    #-----------------------------------------------------------------------------
-  (?<BareKey> [-_a-zA-Z0-9]+)
+  (?<BareKey>   [-_a-zA-Z0-9]+)
    (?<QuotedKey> (?&BasicString) | (?&StringLiteral))
-  (?<DottedKey>
-    (?: (?&BareKey) | (?&QuotedKey) )
-    (?: (?&WS) [.] (?&WS) (?: (?&BareKey) | (?&QuotedKey) ) )+
-  )
-  (?<Key> (?&DottedKey) | (?&BareKey) | (?&QuotedKey) )
+  (?<SimpleKey> (?&BareKey) | (?&QuotedKey))
+  (?<DottedKey> (?&SimpleKey) (?: \x2E (?&SimpleKey) )+)
+  (?<Key>       (?&BareKey) | (?&QuotedKey) | (?&DottedKey))
  
    #-----------------------------------------------------------------------------
    # Boolean
    #-----------------------------------------------------------------------------
-  (?<Boolean> \b(?:true)|(?:false)\b)
+  (?<Boolean> (?: \b (?:true) | (?:false) \b ))
  
    #-----------------------------------------------------------------------------
    # Integer
    #-----------------------------------------------------------------------------
-  (?<DecFirstChar> [1-9])
-  (?<DecChar> [0-9])
-  (?<HexChar> [0-9 a-f A-F])
-  (?<OctChar> [0-7])
-  (?<BinChar> [01])
+  (?<DecFirstChar>  [1-9])
+  (?<DecChar>       [0-9])
+  (?<HexChar>       [0-9 a-f A-F])
+  (?<OctChar>       [0-7])
+  (?<BinChar>       [01])
  
    (?<Zero> [-+]? 0)
    (?<Dec> (?&Zero) | (?: [-+]? (?&DecFirstChar) (?: (?&DecChar) | (?: _ (?&DecChar) ))*))
@@ -148,18 +146,20 @@ our $TOML = qr{
    #-----------------------------------------------------------------------------
    # Float
    #-----------------------------------------------------------------------------
-  (?<Exponent> [eE] (?&Dec))
-  (?<SpecialFloat> [-+]?  (?:inf) | (?:nan))
-  (?<Fraction> [.] (?&Dec) )
+  (?<Exponent>      [eE] (?&Dec))
+  (?<SpecialFloat>  [-+]? (?:inf) | (?:nan))
+  (?<Fraction>      [.] (?&Dec) )
  
    (?<Float>
-    (?:
-        (?: (?&Dec) (?&Fraction) (?&Exponent) )
-      | (?: (?&Dec) (?&Exponent) )
-      | (?: (?&Dec) (?&Fraction) )
-    )
-    |
-    (?&SpecialFloat)
+      (?:
+        (?&Dec)
+
+        (?:
+            (?: (?&Fraction) (?&Exponent)? )
+          | (?&Exponent)
+        )
+      )
+    | (?&SpecialFloat)
    )
  
    #-----------------------------------------------------------------------------
@@ -168,7 +168,7 @@ our $TOML = qr{
    (?<EscapeChar>
      \x5C                        # leading \
      (?:
-        [\x5C"btnfr]            # escapes: \\ \b \t \n \f \r
+        [\x5C"btnfr]            # escapes: \\ \" \b \t \n \f \r
        | (?: u [_0-9a-fA-F]{4} ) # unicode (4 bytes)
        | (?: U [_0-9a-fA-F]{8} ) # unicode (8 bytes)
      )
@@ -192,22 +192,26 @@ our $TOML = qr{
      (?:
        "                       # opening quote
        (?:                     # escape sequences or any char except " or \
-          (?: (?&EscapeChar) )
-        | [^"\\]
+          [^"\\]
+        | (?&EscapeChar)
        )*
        "                       # closing quote
      )
    )
  
    (?<MultiLineString>
+    (?m)
      (?s)
      """                       # opening triple-quote
      (?:
-      (?: (?&EscapeChar) )    # escaped char
-      | .
+        [^"\\]
+      | "{1,2}                # 1-2 quotation marks
+      | (?&EscapeChar)        # escape
+      | (?: \\ $)
      )*?
      """                       # closing triple-quote
      (?-s)
+    (?-m)
    )
  
    (?<String>
diff --git a/lib/TOML/Tiny/Parser.pm b/lib/TOML/Tiny/Parser.pm

index 8da34eb7f03effc2de87e7f8a0c19605fceb3e02..4d92eeaedf3dd3031caec3b76a139ad9949e6272 100644 (file)
--- a/lib/TOML/Tiny/Parser.pm
+++ b/lib/TOML/Tiny/Parser.pm
@@ -9,7 +9,6 @@ use v5.18;
  use Carp;
  use Data::Dumper;
  use List::Util qw(all);
-use Scalar::Util qw(looks_like_number);
  use TOML::Tiny::Grammar;
  use TOML::Tiny::Tokenizer;
  use TOML::Tiny::Util qw(is_strict_array);
@@ -34,10 +33,7 @@ sub new {
  }
  
  sub next_token {
-  my $self = shift;
-  return unless $self->{tokenizer};
-  my $token = $self->{tokenizer}->next_token;
-  return $token;
+  $_[0]->{tokenizer} && $_[0]->{tokenizer}->next_token;
  }
  
  sub parse {
@@ -62,7 +58,7 @@ sub parse {
  
  sub parse_error {
    my ($self, $token, $msg) = @_;
-  my $line = $token ? $token->line : 'EOF';
+  my $line = $token ? $token->{line} : 'EOF';
    if ($self->{annotated}) {
      my $root = Dumper($self->{root});
      my $tok  = Dumper($token);
@@ -91,7 +87,7 @@ $src
  
  sub expect_type {
    my ($self, $token, $expected) = @_;
-  my $actual = $token->type;
+  my $actual = $token->{type};
    $self->parse_error($token, "expected $expected, but found $actual")
      unless $actual eq $expected;
  }
@@ -99,7 +95,7 @@ sub expect_type {
  
  sub push_keys {
    my ($self, $token) = @_;
-  push @{ $self->{keys} }, $token->value;
+  push @{ $self->{keys} }, $token->{value};
  }
  
  sub pop_keys {
@@ -124,14 +120,15 @@ sub set_keys {
  }
  
  sub scan_to_key {
-  my ($self, $keys) = @_;
+  my $self = shift;
+  my $keys = shift // [ $self->get_keys ];
    my $node = $self->{root};
  
    for my $key (@$keys) {
      if (exists $node->{$key}) {
        for (ref $node->{$key}) {
-        $node = $node->{$key}     when /HASH/;
-        $node = $node->{$key}[-1] when /ARRAY/;
+        $node = $node->{$key}     when 'HASH';
+        $node = $node->{$key}[-1] when 'ARRAY';
          default{
            my $full_key = join '.', @$keys;
            die "$full_key is already defined\n";
@@ -149,10 +146,10 @@ sub scan_to_key {
  
  sub parse_table {
    my $self  = shift;
-  my $token = shift // $self->next_token;
+  my $token = shift // $self->next_token // return; # may be undef on first token in empty document
    $self->expect_type($token, 'table');
    $self->push_keys($token);
-  $self->scan_to_key([$self->get_keys]);
+  $self->scan_to_key;
  
    my @keys = $self->get_keys;
    my $key = join '.', @keys;
@@ -177,10 +174,10 @@ sub parse_table {
    }
  
    TOKEN: while (my $token = $self->next_token) {
-    for ($token->type) {
-      next TOKEN when /EOL/;
+    for ($token->{type}) {
+      next TOKEN when 'EOL';
  
-      when (/key/) {
+      when ('key') {
          $self->expect_type($self->next_token, 'assign');
          $self->push_keys($token);
          $self->set_keys;
@@ -193,13 +190,13 @@ sub parse_table {
          }
        }
  
-      when (/array_table/) {
+      when ('array_table') {
          $self->pop_keys;
          @_ = ($self, $token);
          goto \&parse_array_table;
        }
  
-      when (/table/) {
+      when ('table') {
          $self->pop_keys;
          @_ = ($self, $token);
          goto \&parse_table;
@@ -225,7 +222,7 @@ sub parse_array_table {
    push @{ $node->{$key} }, {};
  
    TOKEN: while (my $token = $self->next_token) {
-    for ($token->type) {
+    for ($token->{type}) {
        next TOKEN when /EOL/;
  
        when (/key/) {
@@ -258,28 +255,28 @@ sub parse_key {
    my $self  = shift;
    my $token = shift // $self->next_token;
    $self->expect_type($token, 'key');
-  return $token->value;
+  return $token->{value};
  }
  
  sub parse_value {
    my $self = shift;
    my $token = shift // $self->next_token;
  
-  for ($token->type) {
+  for ($token->{type}) {
      when (/float/) {
        if ($self->{annotated}) {
-        return $token->value;
+        return $token->{value};
        } else {
          use bignum;
-        return $token->value + 0;
+        return $token->{value} + 0;
        }
      }
  
      when (/integer/) {
        if ($self->{annotated}) {
-        return $token->value;
+        return $token->{value};
        } else {
-        for (my $n = $token->value) {
+        for (my $n = $token->{value}) {
            use bigint;
  
            when (/(?&Oct) $TOML/x) {
@@ -302,9 +299,9 @@ sub parse_value {
        }
      }
  
-    return $token->value when /string/;
-    return $self->{inflate_boolean}->($token->value) when /bool/;
-    return $self->{inflate_datetime}->($token->value) when /datetime/;
+    return $token->{value} when /string/;
+    return $self->{inflate_boolean}->($token->{value}) when /bool/;
+    return $self->{inflate_datetime}->($token->{value}) when /datetime/;
      return $self->parse_inline_table when /inline_table/;
      return $self->parse_inline_array when /inline_array/;
  
@@ -319,7 +316,7 @@ sub parse_inline_array {
    my @array;
  
    TOKEN: while (my $token = $self->next_token) {
-    for ($token->type) {
+    for ($token->{type}) {
        next TOKEN when /comma/;
        next TOKEN when /EOL/;
        last TOKEN when /inline_array_close/;
@@ -344,13 +341,13 @@ sub parse_inline_table {
    my $table = {};
  
    TOKEN: while (my $token = $self->next_token) {
-    for ($token->type) {
+    for ($token->{type}) {
        next TOKEN when /comma/;
        last TOKEN when /inline_table_close/;
  
        when (/key/) {
          $self->expect_type($self->next_token, 'assign');
-        my $key = $token->value->[0];
+        my $key = $token->{value}[0];
          $table->{ $key } = $self->parse_value;
        }
  
diff --git a/lib/TOML/Tiny/Tokenizer.pm b/lib/TOML/Tiny/Tokenizer.pm

index e3150bb23e00e7dbee15ee2aa69c3f5a701f19dc..286bccef1a55ec738b594371d390dd0a51bce17e 100644 (file)
--- a/lib/TOML/Tiny/Tokenizer.pm
+++ b/lib/TOML/Tiny/Tokenizer.pm
@@ -9,22 +9,15 @@ use v5.18;
  
  use TOML::Tiny::Grammar;
  
-use Class::Struct 'TOML::Tiny::Token' => {
-  type  => '$',
-  line  => '$',
-  pos   => '$',
-  value => '$',
-};
-
  sub new {
    my ($class, %param) = @_;
  
    my $self = bless{
-    source       => $param{source},
-    is_exhausted => 0,
-    position     => 0,
-    line         => 0,
-    tokens       => [],
+    source        => $param{source},
+    last_position => length $param{source},
+    position      => 0,
+    line          => 0,
+    tokens        => [],
    }, $class;
  
    return $self;
@@ -37,7 +30,7 @@ sub next_token {
      return;
    }
  
-  if ($self->{is_exhausted}) {
+  if ($self->is_exhausted) {
      return;
    }
  
@@ -53,7 +46,7 @@ sub next_token {
  
    my $token;
  
-  while (!defined($token) && !$self->{is_exhausted}) {
+  while (!defined($token) && !$self->is_exhausted) {
      for ($self->{source}) {
        when (/\G (?&NL) $TOML/xgc) {
          ++$self->{line};
@@ -64,10 +57,36 @@ sub next_token {
          ;
        }
  
-      when (/\G ((?&Key)) (?= (?&WS) =) $TOML/xgc) {
+      when (/\G ((?&Key)) (?&WS) (?= =) $TOML/xgc) {
           $token = $self->_make_token('key', $1);
        }
  
+      when (/\G \[ (?&WS) ((?&Key)) (?&WS) \] (?&WS) (?=(?&NL) | $)$TOML/xgc) {
+        my $key = $self->tokenize_key($1);
+        $token = $self->_make_token('table', $key);
+      }
+
+      when (/\G \[\[ (?&WS) ((?&Key)) (?&WS) \]\] (?&WS) (?=(?&NL) | $) $TOML/xgc) {
+        my $key = $self->tokenize_key($1);
+        $token = $self->_make_token('array_table', $key);
+      }
+
+      when (/\G \[ /xgc) {
+        $token = $self->_make_token('inline_array', $1);
+      }
+
+      when (/\G \] /xgc) {
+        $token = $self->_make_token('inline_array_close', $1);
+      }
+
+      when (/\G \{ /xgc) {
+        $token = $self->_make_token('inline_table', $1);
+      }
+
+      when (/\G \} /xgc) {
+        $token = $self->_make_token('inline_table_close', $1);
+      }
+
        when (/\G ((?&Boolean)) $TOML/xgc) {
          $token = $self->_make_token('bool', $1);
        }
@@ -96,32 +115,6 @@ sub next_token {
          $token = $self->_make_token('comma', $1);
        }
  
-      when (/\G \[ (?&WS) ((?&Key)) (?&WS) \] (?&WS) (?=(?&NL) | $)$TOML/xgc) {
-        my $key = $self->tokenize_key($1);
-        $token = $self->_make_token('table', $key);
-      }
-
-      when (/\G \[\[ (?&WS) ((?&Key)) (?&WS) \]\] (?&WS) (?=(?&NL) | $) $TOML/xgc) {
-        my $key = $self->tokenize_key($1);
-        $token = $self->_make_token('array_table', $key);
-      }
-
-      when (/\G \[ /xgc) {
-        $token = $self->_make_token('inline_array', $1);
-      }
-
-      when (/\G \] /xgc) {
-        $token = $self->_make_token('inline_array_close', $1);
-      }
-
-      when (/\G \{ /xgc) {
-        $token = $self->_make_token('inline_table', $1);
-      }
-
-      when (/\G \} /xgc) {
-        $token = $self->_make_token('inline_table_close', $1);
-      }
-
        default{
          my $substr = substr($self->{source}, $self->{position} - 20, 40) // 'undef';
          die "toml syntax error on line $self->{line}\n\t--> $substr\n";
@@ -149,19 +142,12 @@ sub pop_token {
  sub _make_token {
    my ($self, $type, $value) = @_;
  
-  my $token = TOML::Tiny::Token->new(
+  my $token = {
      type  => $type,
      line  => $self->{line},
      pos   => $self->{position},
-  );
-
-  $self->update_position;
-
-  if (my $tokenize = $self->can("tokenize_$type")) {
-    $value = $tokenize->($self, $value);
-  }
-
-  $token->value($value);
+    value => $self->can("tokenize_$type") ?  $self->can("tokenize_$type")->($self, $value) : $value,
+  };
  
    return $token;
  }
@@ -173,45 +159,38 @@ sub current_line {
    substr $rest, 0, $stop;
  }
  
+sub is_exhausted {
+  return $_[0]->{position} >= $_[0]->{last_position};
+}
+
  sub update_position {
    my $self = shift;
    $self->{position} = pos($self->{source}) // 0;
-  $self->{is_exhausted} = $self->{position} >= length($self->{source});
  }
  
  sub error {
    my $self  = shift;
    my $token = shift;
    my $msg   = shift // 'unknown';
-  my $line  = $token ? $token->line : $self->{line};
+  my $line  = $token ? $token->{line} : $self->{line};
    die "toml: parse error at line $line: $msg\n";
  }
  
  sub tokenize_key {
    my $self = shift;
    my $toml = shift;
+  my @keys;
  
-  for ($toml) {
-    my @parts;
-
-    $toml =~ qr{
-      (
-        (?:
-          ( (?&QuotedKey) | (?&BareKey) )
-          [.]?
-          (?{push @parts, $^N})
-        )+
-      )
-      $TOML
-    }x;
-
-    for (@parts) {
-      s/^["']//;
-      s/["']$//;
-    }
+  while ($toml =~ s/^ ((?&SimpleKey)) [.]? $TOML//x) {
+    push @keys, $1;
+  }
  
-    return \@parts;
+  for (@keys) {
+    s/^["']//;
+    s/["']$//;
    }
+
+  return \@keys;
  }
  
  sub tokenize_float {
@@ -232,73 +211,56 @@ sub tokenize_integer {
  sub tokenize_string {
    my $self = shift;
    my $toml = shift;
-  my $str = '';
-
-  for ($toml) {
-    when (/^ ((?&MultiLineString)) $TOML/x) {
-      $str = substr $1, 3, length($1) - 6;
-
-      my @newlines = $str =~ /((?&NL)) $TOML/xg;
-      $self->{line} += scalar( grep{ defined $_ } @newlines );
+  my $ml   = $toml =~ /^(?:''')|(?:""")/;
+  my $lit  = $toml =~ /^'/;
+  my $str  = '';
+
+  if ($ml) {
+    $str = substr $toml, 3, length($toml) - 6;
+    my @newlines = $str =~ /(\x0D?\x0A)/g;
+    $self->{line} += scalar @newlines;
+    $str =~ s/^(?&WS) (?&NL) $TOML//x; # trim leading whitespace
+    $str =~ s/\\(?&NL)\s* $TOML//xgs;  # trim newlines from lines ending in backslash
+  } else {
+    $str = substr($toml, 1, length($toml) - 2);
+  }
  
-      $str =~ s/^(?&WS) (?&NL) $TOML//x;
-      $str =~ s/\\(?&NL)\s* $TOML//xgs;
-      $str = $self->unescape_str($str);
-    }
+  if (!$lit) {
+    $str = $self->unescape_str($str);
+  }
  
-    when (/^ ((?&BasicString)) $TOML/x) {
-      $str = substr($1, 1, length($1) - 2);
-      $str = $self->unescape_str($str);
-    }
+  return ''.$str;
+}
  
-    when (/^ ((?&MultiLineStringLiteral)) $TOML/x) {
-      $str = substr $1, 3, length($1) - 6;
+sub unescape_chars {
+  state %esc = (
+    '\b'   => "\x08",
+    '\t'   => "\x09",
+    '\n'   => "\x0A",
+    '\f'   => "\x0C",
+    '\r'   => "\x0D",
+    '\"'   => "\x22",
+    '\/'   => "\x2F",
+    '\\\\' => "\x5C",
+  );
  
-      my @newlines = $str =~ /(?&NL) $TOML/xg;
-      $self->{line} += scalar( grep{ defined $_ } @newlines );
+  if (exists $esc{$_[0]}) {
+    return $esc{$_[0]};
+  }
  
-      $str =~ s/^(?&WS) (?&NL) $TOML//x;
-      $str =~ s/\\(?&NL)\s* $TOML//xgs;
-    }
+  my $hex = hex substr($_[0], 2);
  
-    when (/^ ((?&StringLiteral)) $TOML/x) {
-      $str = substr($1, 1, length($1) - 2);
-    }
+  if (charnames::viacode($hex)) {
+    return chr $hex;
    }
  
-  return ''.$str;
+  return;
  }
  
-# Adapted from TOML::Parser::Util
  sub unescape_str {
-  my $self = shift;
-  my $str = shift;
-
-  $str =~ s/((?&EscapeChar)) $TOML/
-    my $ch = $1; for ($1) {
-      $ch = "\x08" when '\b';
-      $ch = "\x09" when '\t';
-      $ch = "\x0A" when '\n';
-      $ch = "\x0C" when '\f';
-      $ch = "\x0D" when '\r';
-      $ch = "\x22" when '\"';
-      $ch = "\x2F" when '\/';
-      $ch = "\x5C" when '\\\\';
-
-      default{
-        my $hex = hex substr($ch, 2);
-        if (charnames::viacode($hex)) {
-          $ch = chr $hex;
-        } else {
-          $self->error(undef, "invalid unicode escape: $ch");
-        }
-      }
-    }
-
-    $ch;
-  /xge;
-
-  return $str;
+  state $re = qr/((?&EscapeChar)) $TOML/x;
+  $_[1] =~ s|$re|unescape_chars($1) // $_[0]->error(undef, "invalid unicode escape: $1")|xge;
+  $_[1];
  }
  
  1;
author	Jeff Ober <jober@ziprecruiter.com>
	Tue, 14 Jan 2020 17:54:44 +0000 (12:54 -0500)
committer	Jeff Ober <jober@ziprecruiter.com>
	Tue, 14 Jan 2020 17:54:44 +0000 (12:54 -0500)
lib/TOML/Tiny/Grammar.pm		patch \| blob \| history
lib/TOML/Tiny/Parser.pm		patch \| blob \| history
lib/TOML/Tiny/Tokenizer.pm		patch \| blob \| history