Start testing with BurntSushi toml tests

author Jeff Ober <jober@ziprecruiter.com>

Thu, 9 Jan 2020 17:02:42 +0000 (12:02 -0500)

committer Jeff Ober <jober@ziprecruiter.com>

Thu, 9 Jan 2020 17:02:42 +0000 (12:02 -0500)
author Jeff Ober <jober@ziprecruiter.com>
Thu, 9 Jan 2020 17:02:42 +0000 (12:02 -0500)
committer Jeff Ober <jober@ziprecruiter.com>
Thu, 9 Jan 2020 17:02:42 +0000 (12:02 -0500)
diff --git a/bin/from-toml b/bin/from-toml

new file mode 100755 (executable)

index 0000000..dc92515
--- /dev/null
+++ b/bin/from-toml
@@ -0,0 +1,23 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use v5.14;
+use lib './lib';
+
+use JSON::PP   qw();
+use TOML::Tiny qw(from_toml);
+
+binmode STDIN,  ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+
+my $toml = do{ local $/; <STDIN> };
+my ($parsed, $error) = from_toml $toml, annotated => 1;
+
+if ($error) {
+  warn $error;
+  exit 1;
+}
+
+say JSON::PP->new->encode($parsed);
+exit 0;
diff --git a/bin/to-toml b/bin/to-toml

new file mode 100755 (executable)

index 0000000..dd9b8a0
--- /dev/null
+++ b/bin/to-toml
@@ -0,0 +1,21 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use v5.14;
+use lib './lib';
+
+use TOML::Tiny qw(to_toml);
+use JSON::PP qw(decode_json);
+
+my $json = do{ local $/; <STDIN> };
+my $data = decode_json $json;
+my $toml = eval{ to_toml $data };
+
+if ($@) {
+  warn $@;
+  exit 1;
+}
+
+say $toml;
+exit 0;
diff --git a/lib/TOML/Tiny/Grammar.pm b/lib/TOML/Tiny/Grammar.pm

index b95627ad536c9fcc9c3ce79e7d6f82848bb0dbde..150a4cd941e6f4524b05f06ad9e21da00365f4b9 100644 (file)
--- a/lib/TOML/Tiny/Grammar.pm
+++ b/lib/TOML/Tiny/Grammar.pm
@@ -170,11 +170,11 @@ our $TOML = qr{
    # String
    #-----------------------------------------------------------------------------
    (?<EscapeChar>
-    \\                        # leading \
+    \x5C                        # leading \
      (?:
-        [\\/"btnfr]           # escapes: \\ \/ \b \t \n \f \r
-      | (?: u \d{4} )         # unicode (4 bytes)
-      | (?: U \d{8} )         # unicode (8 bytes)
+        [\x5C/"btnfr]           # escapes: \\ \/ \b \t \n \f \r
+      | (?: u [_0-9a-fA-F]{4} ) # unicode (4 bytes)
+      | (?: U [_0-9a-fA-F]{8} ) # unicode (8 bytes)
      )
    )
  
diff --git a/lib/TOML/Tiny/Parser.pm b/lib/TOML/Tiny/Parser.pm

index d99c90d410c9ef3389631ac7f42ad6dea096b0ff..b19c2dba6fc7d4ce371430913745f9a8a63f2800 100644 (file)
--- a/lib/TOML/Tiny/Parser.pm
+++ b/lib/TOML/Tiny/Parser.pm
@@ -2,8 +2,8 @@ package TOML::Tiny::Parser;
  
  use strict;
  use warnings;
-use feature qw(say switch);
  no warnings qw(experimental);
+use v5.14;
  
  use TOML::Tiny::Tokenizer;
  
@@ -21,6 +21,7 @@ sub new {
    bless{
      inflate_datetime => $param{inflate_datetime} || sub{ shift },
      inflate_boolean  => $param{inflate_boolean}  || sub{ shift eq 'true' ? $TRUE : $FALSE },
+    annotated        => $param{annotated},
    }, $class;
  }
  
@@ -115,6 +116,8 @@ sub parse_table {
    $self->expect_type($token, 'table');
    $self->push_keys($token);
  
+  my $node = $self->scan_to_key([$self->get_keys]);
+
    TOKEN: while (my $token = $self->next_token) {
      for ($token->type) {
        when (/key/) {
@@ -194,16 +197,21 @@ sub parse_value {
    my $self = shift;
    my $token = shift // $self->next_token;
  
-  for ($token->type) {
-    return $token->value when /number/;
-    return $token->value when /string/;
-    return $self->{inflate_boolean}->($token->value) when /boolean/;
-    return $self->{inflate_datetime}->($token->value) when /datetime/;
-    return $self->parse_inline_table when /inline_table/;
-    return $self->parse_inline_array when /inline_array/;
+  if ($self->{annotated}) {
+    return {type => $token->type, value => ''.$token->value};
+  }
+  else {
+    for ($token->type) {
+      return $token->value when /float|integer/;
+      return $token->value when /string/;
+      return $self->{inflate_boolean}->($token->value) when /bool/;
+      return $self->{inflate_datetime}->($token->value) when /datetime/;
+      return $self->parse_inline_table when /inline_table/;
+      return $self->parse_inline_array when /inline_array/;
  
-    default{
-      $self->parse_error($token, "value expected (boolean, number, string, datetime, inline array, inline table), but found $_");
+      default{
+        $self->parse_error($token, "value expected (bool, number, string, datetime, inline array, inline table), but found $_");
+      }
      }
    }
  }
diff --git a/lib/TOML/Tiny/Tokenizer.pm b/lib/TOML/Tiny/Tokenizer.pm

index 08d1eff799701fa28ea3d9d473ca94c0dd3f382a..8f4370d7e4217eaa32aadc7fe4ed8fb7e7fef0fa 100644 (file)
--- a/lib/TOML/Tiny/Tokenizer.pm
+++ b/lib/TOML/Tiny/Tokenizer.pm
@@ -2,8 +2,8 @@ package TOML::Tiny::Tokenizer;
  
  use strict;
  use warnings;
-use feature qw(say switch);
  no warnings qw(experimental);
+use v5.14;
  
  use Carp;
  use TOML::Tiny::Grammar;
@@ -67,15 +67,19 @@ sub next_token {
        }
  
        when (/\G ((?&Boolean)) $TOML/xgc) {
-        $token = $self->_make_token('boolean', $1);
+        $token = $self->_make_token('bool', $1);
        }
  
        when (/\G ((?&DateTime)) $TOML/xgc) {
          $token = $self->_make_token('datetime', $1);
        }
  
-      when (/\G ((?&Float) | (?&Integer)) $TOML/xgc) {
-        $token = $self->_make_token('number', $1);
+      when (/\G ((?&Float)) $TOML/xgc) {
+        $token = $self->_make_token('float', $1);
+      }
+
+      when (/\G ((?&Integer)) $TOML/xgc) {
+        $token = $self->_make_token('integer', $1);
        }
  
        when (/\G ((?&String)) $TOML/xgc) {
@@ -221,10 +225,12 @@ sub tokenize_string {
        $self->{line} += scalar( grep{ defined $_ } @newlines );
  
        $str =~ s/^(?&WS) (?&NL) $TOML//x;
+      $str = unescape_str($str);
      }
  
      when (/^ ((?&BasicString)) $TOML/x) {
        $str = substr($1, 1, length($1) - 2);
+      $str = unescape_str($str);
      }
  
      when (/^ ((?&MultiLineStringLiteral)) $TOML/x) {
@@ -244,10 +250,15 @@ sub tokenize_string {
    return ''.$str;
  }
  
+sub tokenize_float   { goto \&tokenize_number }
+sub tokenize_integer { goto \&tokenize_number }
+
  sub tokenize_number {
    my $self = shift;
    my $toml = shift;
  
+  $toml =~ s/_//g;
+
    for ($toml) {
      when (/(?&Oct) $TOML/x) {
        $toml =~ s/^0o/0/; # convert to perl's octal format
@@ -266,4 +277,33 @@ sub tokenize_number {
    return 0 + $toml;
  }
  
+# Adapted from TOML::Parser::Util
+sub unescape_str {
+  my $str = shift;
+
+  $str =~ s/((?&EscapeChar)) $TOML/
+    my $ch = $1; for ($1) {
+      $ch = "\x08" when '\b';
+      $ch = "\x09" when '\t';
+      $ch = "\x0A" when '\n';
+      $ch = "\x0C" when '\f';
+      $ch = "\x0D" when '\r';
+      $ch = "\x22" when '\"';
+      $ch = "\x2F" when '\/';
+      $ch = "\x5C" when '\\\\';
+      default{
+        my $c = substr $1, 2;
+        $c = chr(hex($c));
+        if ($c ne "\0") {
+          $ch = $c;
+        }
+      }
+    }
+
+    $ch;
+  /xge;
+
+  return $str;
+}
+
  1;
author	Jeff Ober <jober@ziprecruiter.com>
	Thu, 9 Jan 2020 17:02:42 +0000 (12:02 -0500)
committer	Jeff Ober <jober@ziprecruiter.com>
	Thu, 9 Jan 2020 17:02:42 +0000 (12:02 -0500)
bin/from-toml	[new file with mode: 0755]	patch \| blob
bin/to-toml	[new file with mode: 0755]	patch \| blob
lib/TOML/Tiny/Grammar.pm		patch \| blob \| history
lib/TOML/Tiny/Parser.pm		patch \| blob \| history
lib/TOML/Tiny/Tokenizer.pm		patch \| blob \| history