Rewrite tokenizer/parser, test for equivalence with TOML::Parser

author Jeff Ober <jober@ziprecruiter.com>

Mon, 6 Jan 2020 12:57:20 +0000 (07:57 -0500)

committer Jeff Ober <jober@ziprecruiter.com>

Wed, 8 Jan 2020 16:16:57 +0000 (11:16 -0500)
author Jeff Ober <jober@ziprecruiter.com>
Mon, 6 Jan 2020 12:57:20 +0000 (07:57 -0500)
committer Jeff Ober <jober@ziprecruiter.com>
Wed, 8 Jan 2020 16:16:57 +0000 (11:16 -0500)
diff --git a/README.pod b/README.pod

index d09e5ea0d53509e3afda1dc1d8e4715b0f4e7e03..dd2287f51aa38ac9a6836b0b7b5c2637ab870946 100644 (file)
--- a/README.pod
+++ b/README.pod
@@ -4,7 +4,7 @@
  
  =head1 NAME
  
-TOML::Tiny - a minimal TOML parser and serializer
+TOML::Tiny - a minimal, pure perl TOML parser and serializer
  
  =head1 VERSION
  
diff --git a/cpanfile b/cpanfile

index 7ec73c82c88ffc9397cbb0ecfbac74b794122eff..f3e9b048138b46167412ae468aa6d63bde4af618 100644 (file)
--- a/cpanfile
+++ b/cpanfile
@@ -1,9 +1,11 @@
-requires 'perl'     => '>= 5.014';
-requires 'JSON::PP' => '0';
+requires 'perl' => '>= 5.014';
  
  recommends 'Types::Serialiser' => 0;
  
  on test => sub{
-  requires 'Test::Pod' => '0';
-  requires 'Test2::V0' => '0';
+  requires 'Data::Dumper'              => '0';
+  requires 'DateTime::Format::ISO8601' => '0';
+  requires 'TOML::Parser'              => '0';
+  requires 'Test2::V0'                 => '0';
+  requires 'Test::Pod'                 => '0';
  };
diff --git a/lib/TOML/Tiny.pm b/lib/TOML/Tiny.pm

index 32627fcd48fbf23cea165b0f5e1d0e55109c1d60..ef1b5624117faaf00f1c7d48647b1c90ad77baf1 100644 (file)
--- a/lib/TOML/Tiny.pm
+++ b/lib/TOML/Tiny.pm
@@ -1,11 +1,26 @@
  package TOML::Tiny;
-# ABSTRACT: a minimal TOML parser and serializer
+# ABSTRACT: a minimal, pure perl TOML parser and serializer
  
  use strict;
  use warnings;
+use TOML::Tiny::Parser;
  
-use TOML::Tiny::Grammar;
+use parent 'Exporter';
  
-our $GRAMMAR_V5 = $TOML::Tiny::Grammar::GRAMMAR_V5;
+our @EXPORT = qw(
+  from_toml
+  to_toml
+);
+
+sub from_toml {
+  my $source = shift;
+  my $parser = TOML::Tiny::Parser->new(@_);
+  my $toml = eval{ $parser->parse($source) };
+  return ($toml, $@);
+}
+
+sub to_toml {
+  my $data = shift;
+}
  
  1;
diff --git a/lib/TOML/Tiny/Grammar.pm b/lib/TOML/Tiny/Grammar.pm

index fc9c1cb5ad5c717dba876ee98a5275d90e37414c..b95627ad536c9fcc9c3ce79e7d6f82848bb0dbde 100644 (file)
--- a/lib/TOML/Tiny/Grammar.pm
+++ b/lib/TOML/Tiny/Grammar.pm
@@ -3,7 +3,13 @@ package TOML::Tiny::Grammar;
  use strict;
  use warnings;
  
-our $GRAMMAR_V5 = qr{
+use parent 'Exporter';
+
+our @EXPORT = qw(
+  $TOML
+);
+
+our $TOML = qr{
  
  (?(DEFINE)
    #-----------------------------------------------------------------------------
@@ -19,7 +25,7 @@ our $GRAMMAR_V5 = qr{
      | (?&InlineTable)
    )
  
-  (?<NLSeq> (?: \x0A) | (?: \x0D \x0A))
+  (?<NLSeq> \x0A | (?: \x0D \x0A))
    (?<NL> (?&NLSeq) | (?&Comment))
  
    (?<WSChar> [ \x20 \x09 ])       # (space, tab)
@@ -173,14 +179,14 @@ our $GRAMMAR_V5 = qr{
    )
  
    (?<StringLiteral>
-    (?: ' ([^']*) ')          # single quoted string (no escaped chars allowed)
+    (?: ' [^']* ')            # single quoted string (no escaped chars allowed)
    )
  
    (?<MultiLineStringLiteral>
      (?m)
      (?s)
      '''                       # opening triple-quote
-    (.)*?                     # capture
+    .*?
      '''                       # closing triple-quote
      (?-s)
      (?-m)
@@ -189,7 +195,7 @@ our $GRAMMAR_V5 = qr{
    (?<BasicString>
      (?:
        "                       # opening quote
-      (?:                     # capture escape sequences or any char except " or \
+      (?:                     # escape sequences or any char except " or \
            (?: (?&EscapeChar) )
          | [^"\\]
        )*
@@ -200,7 +206,7 @@ our $GRAMMAR_V5 = qr{
    (?<MultiLineString>
      (?s)
      """                       # opening triple-quote
-    (                         # capture:
+    (?:
        (?: (?&EscapeChar) )    # escaped char
        | .
      )*?
diff --git a/lib/TOML/Tiny/Parser.pm b/lib/TOML/Tiny/Parser.pm

index bec0543af6b7f0555f6d040ea47b74fa972dcf89..04fc83560c4671d7207b455049d570e1541bb429 100644 (file)
--- a/lib/TOML/Tiny/Parser.pm
+++ b/lib/TOML/Tiny/Parser.pm
@@ -2,202 +2,253 @@ package TOML::Tiny::Parser;
  
  use strict;
  use warnings;
-use feature qw(switch);
+use feature qw(say switch);
  no warnings qw(experimental);
  
-use Carp;
-use DDP;
-use Data::Dumper;
  use TOML::Tiny::Tokenizer;
  
-our $TOML  = $TOML::Tiny::Grammar::GRAMMAR_V5;
  our $TRUE  = 1;
  our $FALSE = 0;
  
-BEGIN{
-  eval{
-    require Types::Serialiser;
-    $TRUE = Types::Serialiser::true();
-    $FALSE = Types::Serialiser::false();
-  };
-}
+eval{
+  require Types::Serialiser;
+  $TRUE = Types::Serialiser::true();
+  $FALSE = Types::Serialiser::false();
+};
  
  sub new {
    my ($class, %param) = @_;
-  return bless{
-    inflate_datetime => $param{inflate_datetime},
-    inflate_boolean  => $param{inflate_boolean},
+  bless{
+    inflate_datetime => $param{inflate_datetime} || sub{ shift },
+    inflate_boolean  => $param{inflate_boolean}  || sub{ shift eq 'true' ? $TRUE : $FALSE },
    }, $class;
  }
  
+sub next_token {
+  my $self = shift;
+  return unless $self->{tokenizer};
+  $self->{tokenizer}->next_token;
+}
+
  sub parse {
-  my $self   = shift;
-  my $tokens = TOML::Tiny::Tokenizer::tokenize(@_);
-  my $root   = {};
-  my $acc    = { root => $root, node => $root };
-  $self->parse_token($_, $acc) for @$tokens;
-  return $root;
-}
-
-sub parse_token {
-  my $self   = shift;
-  my $token  = shift;
-  my $acc    = shift;
-  my $type   = shift @$token;
-
-  for ($type) {
-    # Table
-    when ('table') {
-      my $keys = $self->parse_key(shift @$token);
-      $acc->{node} = $self->mkpath($keys, $acc->{root});
-    }
+  my ($self, $toml) = @_;
  
-    # Array of tables
-    when ('array-of-tables') {
-      my $keys = $self->parse_key(shift @$token);
-      my $last = pop @$keys;
-      $acc->{node} = $self->mkpath($keys, $acc->{root});
-      $acc->{node}{$last} ||= [];
-      push @{ $acc->{node}{$last} } => $acc->{node} = {};
-    }
+  $self->{tokenizer} = TOML::Tiny::Tokenizer->new(source => $toml);
+  $self->{keys} = [];
+  $self->{root} = {};
+
+  $self->parse_table;
+  my $result = $self->{root};
+
+  delete $self->{tokenizer};
+  delete $self->{keys};
+  delete $self->{root};
+
+  return $result;
+}
+
+sub parse_error {
+  my ($self, $token, $msg) = @_;
+  my $line = $token ? $token->line : 'EOF';
+  die "toml parse error at line $line: $msg\n";
+}
+
+sub expect_type {
+  my ($self, $token, $expected) = @_;
+  my $actual = $token->type;
+  $self->parse_error($token, "expected $expected, but found $actual")
+    unless $actual eq $expected;
+}
  
-    # Key-value pair
-    when ('assignment') {
-      my $keys = $self->parse_key(shift @$token);
-      my $last = pop @$keys;
-      my $value = $self->parse_value(shift @$token);
-      my $node = $self->mkpath($keys, $acc->{node});
-      $node->{$last} = $value;
-    }
-  }
  
-  return $acc;
+sub push_keys {
+  my ($self, $token) = @_;
+  push @{ $self->{keys} }, $token->value;
+}
+
+sub pop_keys {
+  my $self = shift;
+  pop @{ $self->{keys} };
  }
  
-sub mkpath {
+sub get_keys {
    my $self = shift;
-  my $keys = shift;
-  my $node = shift;
+  return map{ @$_ } @{ $self->{keys} };
+}
+
+sub set_keys {
+  my $self  = shift;
+  my $value = $self->parse_value;
+  my @keys  = $self->get_keys;
+  my $key   = pop @keys;
+  my $node  = $self->scan_to_key(\@keys);
+  $node->{$key} = $value;
+}
+
+sub scan_to_key {
+  my ($self, $keys) = @_;
+  my $node = $self->{root};
  
    for my $key (@$keys) {
      if (exists $node->{$key}) {
        for (ref $node->{$key}) {
-        $node = $node->{$key}[-1] when 'ARRAY';
-        $node = $node->{$key}     when 'HASH';
+        $node = $node->{$key}     when /HASH/;
+        $node = $node->{$key}[-1] when /ARRAY/;
+        default{
+          my $full_key = join '.', @$keys;
+          die "$full_key is already defined\n";
+        }
        }
      }
      else {
-      $node = $node->{$key} ||= {};
+      $node = $node->{$key} = {};
      }
    }
  
    return $node;
  }
  
-sub parse_key {
-  my ($self, $key) = @_;
-  my $type = shift @$key;
-  for ($type) {
-    return $self->dotted_key(shift @$key) when 'dotted-key';
-    return $self->quoted_key(shift @$key) when 'quoted-key';
-    return $self->bare_key(shift @$key)   when 'bare-key';
-  }
-}
  
-sub parse_value {
-  my ($self, $token) = @_;
-  my $type = shift @$token;
+sub parse_table {
+  my $self  = shift;
+  my $token = shift // $self->next_token;
+  $self->expect_type($token, 'table');
+  $self->push_keys($token);
  
-  for ($type) {
-    return $self->datetime(@$token) when 'datetime';
-    return $self->boolean(@$token)  when 'boolean';
+  TOKEN: while (my $token = $self->next_token) {
+    for ($token->type) {
+      when (/key/) {
+        $self->expect_type($self->next_token, 'assign');
+        $self->push_keys($token);
+        $self->set_keys;
+        $self->pop_keys;
+      }
  
-    when ('array') {
-      my $contents = shift @$token;
-      return [ map{ $self->parse_value($_) } @$contents ];
-    }
+      when (/array_table/) {
+        $self->pop_keys;
+        $self->parse_array_table($token);
+      }
  
-    when ('inline-table') {
-      my $tokens = shift @$token;
-      my $root   = {};
-      my $acc    = {root => $root, node => $root};
-      $self->parse_token($_, $acc) for @$tokens;
-      return $root;
-    }
+      when (/table/) {
+        $self->pop_keys;
+        $self->parse_table($token);
+      }
  
-    default{
-      return shift @$token;
+      default{
+        $self->parse_error($token, "expected key-value pair, table, or array of tables but got $_");
+      }
      }
    }
  }
  
-sub bare_key {
-  my ($self, $key) = @_;
-  return [$key];
-}
+sub parse_array_table {
+  my $self = shift;
+  my $token = shift // $self->next_token;
+  $self->expect_type($token, 'array_table');
+  $self->push_keys($token);
+
+  my @keys = $self->get_keys;
+  my $key  = pop @keys;
+  my $node = $self->scan_to_key(\@keys);
+  $node->{$key} //= [];
+  push @{ $node->{$key} }, {};
+
+  TOKEN: while (my $token = $self->next_token) {
+    for ($token->type) {
+      when (/key/) {
+        $self->expect_type($self->next_token, 'assign');
+        $self->push_keys($token);
+        $self->set_keys;
+        $self->pop_keys;
+      }
  
-sub quoted_key {
-  my ($self, $key) = @_;
-  $key =~ s/^"//;
-  $key =~ s/"$//;
-  return [$key];
-}
+      when (/array_table/) {
+        $self->pop_keys;
+        $self->parse_array_table($token);
+      }
+
+      when (/table/) {
+        $self->pop_keys;
+        $self->parse_table($token);
+      }
  
-sub dotted_key {
-  my ($self, $key) = @_;
-  my @parts = split /\./, $key;
-  return \@parts;
+      default{
+        $self->parse_error($token, "expected key-value pair, table, or array of tables but got $_");
+      }
+    }
+  }
  }
  
-sub number {
-  my ($self, $n) = @_;
-  defined $n ? 0 + $n : $n;
+sub parse_key {
+  my $self  = shift;
+  my $token = shift // $self->next_token;
+  $self->expect_type($token, 'key');
+  return $token->value;
  }
  
-sub datetime {
-  my ($self, $dt) = @_;
+sub parse_value {
+  my $self = shift;
+  my $token = shift // $self->next_token;
  
-  if ($self->{inflate_datetime}) {
-    my ($year, $month, $day, $hour, $minute, $second, $fractional, $offset) = $dt =~ qr{
-        (?:
-          (\d\d\d\d) - (\d\d) - (\d\d)  # yyyy-mm-dd
-        )?
+  for ($token->type) {
+    return $token->value when /number/;
+    return $token->value when /string/;
+    return $self->{inflate_boolean}->($token->value) when /boolean/;
+    return $self->{inflate_datetime}->($token->value) when /datetime/;
+    return $self->parse_inline_table when /inline_table/;
+    return $self->parse_inline_array when /inline_array/;
  
-        (?:
-          [T ]
-          (\d\d) : (\d\d) : (\d\d)      # hh:mm:ss.fractional
-          (?:[.] (\d+) )?
+    default{
+      $self->parse_error($token, "value expected (boolean, number, string, datetime, inline array, inline table), but found $_");
+    }
+  }
+}
  
-          ((?&Offset)?)
-        )?
+sub parse_inline_array {
+  my $self = shift;
+  my @array;
  
-        $TOML::Tiny::Tokenizer::TOML
-    }x;
+  TOKEN: while (my $token = $self->next_token) {
+    for ($token->type) {
+      next TOKEN when /comma/;
+      last TOKEN when /inline_array_close/;
  
-    return {
-      original   => $dt,
-      year       => $self->number($year),
-      month      => $self->number($month),
-      day        => $self->number($day),
-      hour       => $self->number($hour),
-      minute     => $self->number($minute),
-      second     => $self->number($second),
-      fractional => $self->number($fractional),
-      offset     => $offset,
-    };
-  } else {
-    return $dt;
+      default{
+        push @array, $self->parse_value($token);
+      }
+    }
    }
+
+  return \@array;
  }
  
-sub boolean {
-  my ($self, $bool) = @_;
-  if ($self->{inflate_boolean}) {
-    return $TRUE if $bool eq 'true';
-    return $FALSE;
-  } else {
-    return $bool;
+sub parse_inline_table {
+  my $self = shift;
+
+  my @keys = $self->get_keys;
+  my $key  = pop @keys;
+  my $node = $self->scan_to_key(\@keys);
+  $node->{$key} //= {};
+
+  TOKEN: while (my $token = $self->next_token) {
+    for ($token->type) {
+      next TOKEN when /comma/;
+      last TOKEN when /inline_table_close/;
+
+      when (/key/) {
+        $self->expect_type($self->next_token, 'assign');
+        $self->push_keys($token);
+        $self->set_keys;
+        $self->pop_keys;
+      }
+
+      default{
+        $self->parse_error($token, "inline table expected key-value pair, but found $_");
+      }
+    }
    }
+
+  return $node->{$key};
  }
  
  1;
diff --git a/lib/TOML/Tiny/Tokenizer.pm b/lib/TOML/Tiny/Tokenizer.pm

index a991d12d5ee4632cc1fed20aacc388f1a532a4f8..cc4c150caa4ccb8ab9dde5924c6f9b8a6d327ee5 100644 (file)
--- a/lib/TOML/Tiny/Tokenizer.pm
+++ b/lib/TOML/Tiny/Tokenizer.pm
@@ -2,166 +2,225 @@ package TOML::Tiny::Tokenizer;
  
  use strict;
  use warnings;
-use feature qw(switch);
+use feature qw(say switch);
  no warnings qw(experimental);
  
-use JSON::PP;
+use DDP;
+use Carp;
  use TOML::Tiny::Grammar;
  
-our $TOML = $TOML::Tiny::Grammar::GRAMMAR_V5;
+use Class::Struct 'TOML::Tiny::Token' => {
+  type  => '$',
+  line  => '$',
+  pos   => '$',
+  value => '$',
+};
+
+sub new {
+  my ($class, %param) = @_;
+
+  my $self = bless{
+    source       => $param{source},
+    is_exhausted => 0,
+    position     => 0,
+    line         => 0,
+    tokens       => [],
+  }, $class;
+
+  return $self;
+}
  
-sub tokenize {
-  my $toml = shift;
-  my @tokens;
+sub next_token {
+  my $self = shift;
  
-  TOKEN: while ((pos($toml) // 0) < length($toml)) {
-    for ($toml) {
-      when (/\G ((?&Boolean)) $TOML/xgc) {
-        push @tokens, ['boolean', $1];
-      }
+  if (!defined($self->{source})) {
+    return;
+  }
  
-      when (/\G ((?&DateTime)) $TOML/xgc) {
-        push @tokens, ['datetime', $1];
-      }
+  if ($self->{is_exhausted}) {
+    return;
+  }
  
-      when (/\G ((?&Float)) $TOML/xgc) {
-        push @tokens, ['float', tokenize_float($1)];
+  if (!@{ $self->{tokens} }) {
+    my $root = $self->_make_token('table', []);
+    $self->push_token($root);
+    return $root;
+  }
+
+  # Update the regex engine's position marker in case some other regex
+  # attempted to match against the source string and reset it.
+  pos($self->{source}) = $self->{position};
+
+  my $token;
+
+  while (!defined($token) && !$self->{is_exhausted}) {
+    for ($self->{source}) {
+      when (/\G (?&NL) $TOML/xgc) {
+        ++$self->{line};
        }
  
-      when (/\G ((?&Integer)) $TOML/xgc) {
-        push @tokens, ['integer', tokenize_integer($1)];
+      when (/\G (?&WSChar)+ $TOML/xgc) {
+        ;
        }
  
-      when (/\G ((?&String)) $TOML/xgc) {
-        push @tokens, ['string', tokenize_string($1)];
+      when (/\G ((?&Key)) (?= (?&WS) =) $TOML/xgc) {
+         $token = $self->_make_token('key', $1);
        }
  
-      when (/\G ((?&KeyValuePairDecl)) $TOML/xgc) {
-        push @tokens, tokenize_assignment($1);
+      when (/\G ((?&Boolean)) $TOML/xgc) {
+        $token = $self->_make_token('boolean', $1);
        }
  
-      when (/\G ((?&Array)) $TOML/xgc) {
-        push @tokens, tokenize_array($1);
+      when (/\G ((?&DateTime)) $TOML/xgc) {
+        $token = $self->_make_token('datetime', $1);
        }
  
-      when (/\G ((?&InlineTable)) $TOML/xgc) {
-        push @tokens, tokenize_inline_table($1);
+      when (/\G ((?&Float) | (?&Integer)) $TOML/xgc) {
+        $token = $self->_make_token('number', $1);
        }
  
-      when (/\G \[ (?&WS) ((?&Key)) (?&WS) \] (?&WS) (?&NL) $TOML/xgc) {
-        push @tokens, ['table', tokenize_key($1)];
+      when (/\G ((?&String)) $TOML/xgc) {
+        $token = $self->_make_token('string', $1);
        }
  
-      when (/\G \[\[ (?&WS) ((?&Key)) (?&WS) \]\] (?&WS) (?&NL) $TOML/xgc) {
-        push @tokens, ['array-of-tables', tokenize_key($1)];
+      when (/\G = /xgc) {
+        $token = $self->_make_token('assign', $1);
        }
  
-      when (/\G (?: (?&WSChar) | (?&NLSeq) | (?&Comment) )+ $TOML/xgc) {
-        next TOKEN;
+      when (/\G , /xgc) {
+        $token = $self->_make_token('comma', $1);
        }
  
-      default{
-        my $pos    = pos $toml;
-        my $line   = line_number($toml, $pos);
-        my $substr = substr($toml, $pos, 30) // 'undef';
-        die "toml syntax error on line $line:\n\t--> $substr\n";
+      when (/\G \[ (?&WS) ((?&Key)) (?&WS) \] $TOML/xgc) {
+        my $key = $self->tokenize_key($1);
+        $token = $self->_make_token('table', $key);
        }
-    }
-  }
  
-  return \@tokens;
-}
+      when (/\G \[\[ (?&WS) ((?&Key)) (?&WS) \]\] (?&WS) (?&NL) $TOML/xgc) {
+        my $key = $self->tokenize_key($1);
+        $token = $self->_make_token('array_table', $key);
+      }
  
-sub tokenize_inline_table {
-  my $toml = shift;
-  my @items;
+      when (/\G \[ /xgc) {
+        $token = $self->_make_token('inline_array', $1);
+      }
  
-  $toml =~ s/^\s*\{\s*//;
-  $toml =~ s/\s*\}\s*$//;
+      when (/\G \] /xgc) {
+        $token = $self->_make_token('inline_array_close', $1);
+      }
  
-  ITEM: while ((pos($toml) // 0) < length($toml)) {
-    for ($toml) {
-      next ITEM when /\G\s*/gc;
-      next ITEM when /\G\{/gc;
-      next ITEM when /\G\}/gc;
+      when (/\G \{ /xgc) {
+        $token = $self->_make_token('inline_table', $1);
+      }
  
-      when (/\G ((?&KeyValuePair)) (?&WS) ,? $TOML/xgc) {
-        push @items, tokenize_assignment($1);
+      when (/\G \} /xgc) {
+        $token = $self->_make_token('inline_table_close', $1);
        }
  
        default{
-        die "invalid inline table syntax: $toml";
+        my $substr = substr($self->{source}, $self->{position}, 30) // 'undef';
+        die "toml syntax error on line $self->{line}\n\t--> $substr\n";
        }
      }
+
+    $self->push_token($token);
+    $self->update_position;
    }
  
-  return ['inline-table', \@items];
+  return $token;
  }
  
-sub tokenize_array {
-  my $toml = shift;
-  my @items;
+sub push_token {
+  my $self = shift;
+  my $token = shift // return;
+  push @{$self->{tokens}}, $token;
+}
+
+sub pop_token {
+  my $self = shift;
+  pop @{$self->{tokens}};
+}
  
-  $toml =~ s/^\s*\[\s*//;
-  $toml =~ s/\s*\]\s*$//;
+sub _make_token {
+  my ($self, $type, $value) = @_;
  
-  ITEM: while ((pos($toml) // 0) < length($toml)) {
-    my $pos = pos($toml) // 0;
-    for ($toml) {
-      when (/\G ((?&Value)) (?&WS) [,]? $TOML/xgc) {
-        push @items, @{ tokenize($1) };
-      }
+  my $token = TOML::Tiny::Token->new(
+    type  => $type,
+    line  => $self->{line},
+    pos   => $self->{position},
+  );
  
-      next ITEM when /\G\s*/gc;
-      next ITEM when /\G\[/gc;
-      next ITEM when /\G\]/gc;
+  $self->update_position;
  
-      default{
-        die "invalid array syntax: $toml";
-      }
-    }
+  if (my $tokenize = $self->can("tokenize_$type")) {
+    $value = $tokenize->($self, $value);
    }
  
-  return ['array', \@items];
+  $token->value($value);
+
+  return $token;
  }
  
-sub tokenize_assignment {
-  my $toml = shift;
+sub current_line {
+  my $self = shift;
+  my $rest = substr $self->{source}, $self->{position};
+  my $stop = index $rest, "\n";
+  substr $rest, 0, $stop;
+}
  
-  for ($toml) {
-    when (/\G(?&WS) ((?&Key)) (?&WS) = (?&WS) ((?&Value)) (?&NL)? $TOML/xgc) {
-      my $key = tokenize_key($1);
-      my $val = tokenize($2);
-      return ['assignment', $key, @$val];
-    }
+sub update_position {
+  my $self = shift;
+  $self->{position} = pos($self->{source}) // 0;
+  $self->{is_exhausted} = $self->{position} >= length($self->{source});
+}
  
-    default{
-      die "invalid assignment syntax: $toml";
-    }
-  }
+sub error {
+  my $self  = shift;
+  my $token = shift;
+  my $msg   = shift // 'unknown';
+  my $line  = $token ? $token->line : $self->{line};
+  croak "toml: parse error at line $line: $msg\n";
  }
  
  sub tokenize_key {
+  my $self = shift;
    my $toml = shift;
  
    for ($toml) {
-    return ['dotted-key', $1] when /^ ((?&DottedKey)) $TOML/x;
-    return ['quoted-key', $1] when /^ ((?&QuotedKey)) $TOML/x;
-    return ['bare-key', $1]   when /^ ((?&BareKey)) $TOML/x;
-
-    default{
-      die "invalid key: syntax $toml";
+    my @parts;
+
+    $toml =~ qr{
+      (
+        (?:
+          ( (?&QuotedKey) | (?&BareKey) )
+          [.]?
+          (?{push @parts, $^N})
+        )+
+      )
+      $TOML
+    }x;
+
+    for (@parts) {
+      s/^["']//;
+      s/["']$//;
      }
+
+    return \@parts;
    }
  }
  
  sub tokenize_string {
+  my $self = shift;
    my $toml = shift;
    my $str = '';
  
    for ($toml) {
      when (/^ ((?&MultiLineString)) $TOML/x) {
        $str = substr $1, 3, length($1) - 6;
+
+      my @newlines = $str =~ /((?&NL)) $TOML/xg;
+      $self->{line} += scalar( grep{ defined $_ } @newlines );
+
        $str =~ s/^(?&WS) (?&NL) $TOML//x;
      }
  
@@ -171,22 +230,23 @@ sub tokenize_string {
  
      when (/^ ((?&MultiLineStringLiteral)) $TOML/x) {
        $str = substr $1, 3, length($1) - 6;
+
+      my @newlines = $str =~ /(?&NL) $TOML/xg;
+      $self->{line} += scalar( grep{ defined $_ } @newlines );
+
        $str =~ s/^(?&WS) (?&NL) $TOML//x;
      }
  
      when (/^ ((?&StringLiteral)) $TOML/x) {
        $str = substr($1, 1, length($1) - 2);
      }
-
-    default{
-      die "invalid string syntax: $toml";
-    }
    }
  
    return ''.$str;
  }
  
-sub tokenize_integer {
+sub tokenize_number {
+  my $self = shift;
    my $toml = shift;
  
    for ($toml) {
@@ -202,28 +262,9 @@ sub tokenize_integer {
      when (/(?&Hex) $TOML/x) {
        return hex $toml;
      }
-
-    when (/(?&Dec) $TOML/x) {
-    }
-
-    default{
-      die "invalid datetime syntax: $toml";
-    }
    }
  
    return 0 + $toml;
  }
  
-sub tokenize_float {
-  my $toml = shift;
-  return 0 + $toml;
-}
-
-sub line_number {
-  my ($toml, $pos) = @_;
-  my $substr = substr $toml, 0, $pos + 1;
-  my @lines = $substr =~ /((?&NL)) $TOML/g;
-  return scalar @lines;
-}
-
  1;
diff --git a/t/parity.t b/t/parity.t

index 2dc52a66c3821fa1bcea099c3361ae92b24dcca1..29e00c82dc03fcb1f6335a15c5f22ea7b4a90af3 100644 (file)
--- a/t/parity.t
+++ b/t/parity.t
@@ -5,20 +5,39 @@
  # represented in the synopsis example
  #-------------------------------------------------------------------------------
  use Test2::V0;
+use Data::Dumper;
  use TOML::Tiny::Parser;
-use TOML;
  
  my $toml = do{ local $/; <DATA> };
  
-my $parser = TOML::Tiny::Parser->new(
-  inflate_datetime => 0,
-  inflate_boolean  => 0,
-);
+subtest 'TOML::Parser' => sub{
+  use TOML::Parser;
+
+  subtest 'defaults' => sub{
+    my $exp = TOML::Parser->new->parse($toml);
+    my $got = TOML::Tiny::Parser->new->parse($toml);
+    is $got, $exp, 'equivalence'
+      or diag Dumper({got => $got, expected => $exp});
+  };
+
+  subtest 'inflate_boolean' => sub{
+    my $inflate = sub{ shift eq 'true' ? 'yes' : 'no' };
+    my $exp = TOML::Parser->new(inflate_boolean => $inflate)->parse($toml);
+    my $got = TOML::Tiny::Parser->new(inflate_boolean => $inflate)->parse($toml);
+    is $got, $exp, 'equivalence'
+      or diag Dumper({got => $got, expected => $exp});
+  };
+
+  subtest 'inflate_datetime' => sub{
+    require DateTime::Format::ISO8601;
+    my $inflate = sub{ DateTime::Format::ISO8601->parse_datetime(shift) };
+    my $exp = TOML::Parser->new(inflate_datetime => $inflate)->parse($toml);
+    my $got = TOML::Tiny::Parser->new(inflate_datetime => $inflate)->parse($toml);
+    is $got, $exp, 'equivalence'
+      or diag Dumper({got => $got, expected => $exp});
+  };
+};
  
-my $got = $parser->parse($toml);
-my $exp = from_toml($toml);
-
-is $got, $exp, 'parity with TOML module';
  done_testing;
  
  __DATA__
diff --git a/t/tokens/array-of-tables.t b/t/tokens/array-of-tables.t

index 42cffe0a341633f0da7e812a3ad8b0742fbf3d0e..101eced003a36747f455c254ffe828d9d779030f 100644 (file)
--- a/t/tokens/array-of-tables.t
+++ b/t/tokens/array-of-tables.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ ((?&ArrayOfTables)) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ ((?&ArrayOfTables)) $TOML }x;
  
  my @valid = (
    qq{[[foo]]\n},
diff --git a/t/tokens/array.t b/t/tokens/array.t

index 30d6dd3f0dd3b0d5f1ae9784d50f8e6a44e088c5..b79c007151375ac775bc5110a4f5c6e784234f7d 100644 (file)
--- a/t/tokens/array.t
+++ b/t/tokens/array.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ ((?&Array)) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ ((?&Array)) $TOML }x;
  
  my @valid = (
    q{[ 1, 2, 3 ]},
diff --git a/t/tokens/boolean.t b/t/tokens/boolean.t

index 817851380b9d5a185310785f3630c7704a2f11e3..4a8fd43bd32e28b56c40d7f8128c3ec433010183 100644 (file)
--- a/t/tokens/boolean.t
+++ b/t/tokens/boolean.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ ((?&Boolean)) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ ((?&Boolean)) $TOML }x;
  
  like 'true', $re, 'true';
  like 'false', $re, 'false';
diff --git a/t/tokens/datetime.t b/t/tokens/datetime.t

index 25ee4261a4b9186a219bdab1fea94e5b2a1ae9fd..413b4ac3f244496acca93b8172523535951e4aff 100644 (file)
--- a/t/tokens/datetime.t
+++ b/t/tokens/datetime.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ ((?&DateTime)) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ ((?&DateTime)) $TOML }x;
  
  my @valid = (
    '1979-05-27T07:32:00Z',
diff --git a/t/tokens/float.t b/t/tokens/float.t

index 6c978e7fb57ce252f9871b47667443435cac81f7..c80768819a28c9210835fa6d23905bf7af557b4f 100644 (file)
--- a/t/tokens/float.t
+++ b/t/tokens/float.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ ((?&Float)) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ ((?&Float)) $TOML }x;
  
  my @valid = qw(
    +1.0
diff --git a/t/tokens/inline-table.t b/t/tokens/inline-table.t

index ff90e5a6248a4db73012b61ec1bd8784a509f0f5..289865393d031d10c18129448350ccfc27d61e7a 100644 (file)
--- a/t/tokens/inline-table.t
+++ b/t/tokens/inline-table.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ ((?&InlineTable)) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ ((?&InlineTable)) $TOML }x;
  
  my @valid = (
    q|{ first = "Tom", last = "Preston-Werner" }|,
diff --git a/t/tokens/integer.t b/t/tokens/integer.t

index cbf548fd6e47979fc8c127a430fa8d88a41dc1c7..efde783c25e2d3085fd35331d02578978a4220a9 100644 (file)
--- a/t/tokens/integer.t
+++ b/t/tokens/integer.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ ((?&Integer)) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ ((?&Integer)) $TOML }x;
  
  my @valid = qw(
    +99
diff --git a/t/tokens/key-value-pair.t b/t/tokens/key-value-pair.t

index ab692622124194571e55b34e8eaceb91d0c06c79..9ea5a2b97384803f424ee0d4bff445da648cdf57 100644 (file)
--- a/t/tokens/key-value-pair.t
+++ b/t/tokens/key-value-pair.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ ((?&KeyValuePair)) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ ((?&KeyValuePair)) $TOML }x;
  
  my @valid = (
    q{foo= "bar"},
diff --git a/t/tokens/key.t b/t/tokens/key.t

index f2d5975a94a5431def01136b78ae8f263073633d..f0ec3a9955366c61898966b78a5c6ec5c619b15d 100644 (file)
--- a/t/tokens/key.t
+++ b/t/tokens/key.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ ((?&Key)) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ ((?&Key)) $TOML }x;
  
  my @valid = qw(
    key
diff --git a/t/tokens/string.t b/t/tokens/string.t

index f328a5988fc5e6d64a4b516a3f43738c9ae4fc5b..460656200146b1a684166ec352bd0147a0d4cddd 100644 (file)
--- a/t/tokens/string.t
+++ b/t/tokens/string.t
@@ -1,5 +1,5 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
  sub test_simple_matches {
    my ($re, @tests) = @_;
@@ -13,7 +13,7 @@ sub test_simple_matches {
  subtest 'escaped characters' => sub{
    my $re = qr{
      ((?&EscapeChar))
-    $TOML::Tiny::GRAMMAR_V5
+    $TOML
    }x;
  
    test_simple_matches($re,
@@ -34,7 +34,7 @@ subtest 'escaped characters' => sub{
  subtest 'string literals' => sub{
    my $re = qr{
      ((?&StringLiteral))
-    $TOML::Tiny::GRAMMAR_V5
+    $TOML
    }x;
  
    test_simple_matches($re,
@@ -46,7 +46,7 @@ subtest 'string literals' => sub{
  subtest 'basic strings' => sub{
    my $re = qr{
      ((?&BasicString))
-    $TOML::Tiny::GRAMMAR_V5
+    $TOML
    }x;
  
    test_simple_matches($re,
@@ -60,7 +60,7 @@ subtest 'basic strings' => sub{
  subtest 'multi-line strings' => sub{
    my $re = qr{
      ((?&MultiLineString))
-    $TOML::Tiny::GRAMMAR_V5
+    $TOML
    }x;
  
    test_simple_matches($re,
@@ -87,7 +87,7 @@ subtest 'multi-line strings' => sub{
  subtest 'multi-line string literals' => sub{
    my $re = qr{
      ((?&MultiLineStringLiteral))
-    $TOML::Tiny::GRAMMAR_V5
+    $TOML
    }x;
  
    test_simple_matches($re,
diff --git a/t/tokens/table.t b/t/tokens/table.t

index 41cb8fcc9f8759c28f4daf83cf5cd20ac3f8884e..32d4ba4ee42c3e1475255263ecc55fe3afb74482 100644 (file)
--- a/t/tokens/table.t
+++ b/t/tokens/table.t
@@ -1,7 +1,7 @@
  use Test2::V0;
-use TOML::Tiny;
+use TOML::Tiny::Grammar;
  
-my $re = qr{ (?&Table) $TOML::Tiny::GRAMMAR_V5 }x;
+my $re = qr{ (?&Table) $TOML }x;
  
  my @valid = (
    qq{[foo]\n},
author	Jeff Ober <jober@ziprecruiter.com>
	Mon, 6 Jan 2020 12:57:20 +0000 (07:57 -0500)
committer	Jeff Ober <jober@ziprecruiter.com>
	Wed, 8 Jan 2020 16:16:57 +0000 (11:16 -0500)
README.pod		patch \| blob \| history
cpanfile		patch \| blob \| history
lib/TOML/Tiny.pm		patch \| blob \| history
lib/TOML/Tiny/Grammar.pm		patch \| blob \| history
lib/TOML/Tiny/Parser.pm		patch \| blob \| history
lib/TOML/Tiny/Tokenizer.pm		patch \| blob \| history
t/parity.t		patch \| blob \| history
t/tokens/array-of-tables.t		patch \| blob \| history
t/tokens/array.t		patch \| blob \| history
t/tokens/boolean.t		patch \| blob \| history
t/tokens/datetime.t		patch \| blob \| history
t/tokens/float.t		patch \| blob \| history
t/tokens/inline-table.t		patch \| blob \| history
t/tokens/integer.t		patch \| blob \| history
t/tokens/key-value-pair.t		patch \| blob \| history
t/tokens/key.t		patch \| blob \| history
t/tokens/string.t		patch \| blob \| history
t/tokens/table.t		patch \| blob \| history