From: Jeff Ober Date: Fri, 10 Jan 2020 20:48:16 +0000 (-0500) Subject: All parsing tests pass with new option, strict_arrays, for a pinch of BS to taste X-Git-Tag: nailing-cargo/1.0.0~234^2~51 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=b04280b03116e9c12a61591f81867219f376cf3c;p=nailing-cargo.git All parsing tests pass with new option, strict_arrays, for a pinch of BS to taste --- diff --git a/lib/TOML/Tiny/Grammar.pm b/lib/TOML/Tiny/Grammar.pm index 0471185..ff3a781 100644 --- a/lib/TOML/Tiny/Grammar.pm +++ b/lib/TOML/Tiny/Grammar.pm @@ -130,12 +130,14 @@ our $TOML = qr{ #----------------------------------------------------------------------------- # Integer #----------------------------------------------------------------------------- + (? [1-9]) (? [0-9]) - (? (?&DecChar) | [a-f A-F]) + (? [0-9 a-f A-F]) (? [0-7]) (? [01]) - (? [-+]? (?&DecChar) (?: (?&DecChar) | (?: _ (?&DecChar) ))*) + (? [-+]? 0) + (? (?&Zero) | (?: [-+]? (?&DecFirstChar) (?: (?&DecChar) | (?: _ (?&DecChar) ))*)) (? 0x (?&HexChar) (?: (?&HexChar) | (?: [_] (?&HexChar) ))*) (? 0o (?&OctChar) (?: (?&OctChar) | (?: [_] (?&OctChar) ))*) (? 0b (?&BinChar) (?: (?&BinChar) | (?: [_] (?&BinChar) ))*) @@ -147,7 +149,7 @@ our $TOML = qr{ #----------------------------------------------------------------------------- (? [eE] (?&Dec)) (? [-+]? (?:inf) | (?:nan)) - (? [.] [_0-9]+) + (? [.] (?&Dec) ) (? (?: diff --git a/lib/TOML/Tiny/Parser.pm b/lib/TOML/Tiny/Parser.pm index b1a1faf..e69624f 100644 --- a/lib/TOML/Tiny/Parser.pm +++ b/lib/TOML/Tiny/Parser.pm @@ -26,6 +26,7 @@ sub new { bless{ inflate_datetime => $param{inflate_datetime} || sub{ shift }, inflate_boolean => $param{inflate_boolean} || sub{ shift eq 'true' ? $TRUE : $FALSE }, + strict_arrays => $param{strict_arrays}, annotated => $param{annotated}, }, $class; } @@ -41,8 +42,9 @@ sub parse { my ($self, $toml) = @_; $self->{tokenizer} = TOML::Tiny::Tokenizer->new(source => $toml); - $self->{keys} = []; - $self->{root} = {}; + $self->{keys} = []; + $self->{root} = {}; + $self->{tables} = {}; # "seen" hash of explicitly defined table names $self->parse_table; my $result = $self->{root}; @@ -50,6 +52,7 @@ sub parse { delete $self->{tokenizer}; delete $self->{keys}; delete $self->{root}; + delete $self->{tables}; return annotate($result) if $self->{annotated}; return $result; @@ -109,10 +112,12 @@ sub get_keys { sub set_keys { my $self = shift; - my $value = $self->parse_value; + my $value = shift // $self->parse_value; my @keys = $self->get_keys; my $key = pop @keys; my $node = $self->scan_to_key(\@keys); + $self->parse_error(undef, 'duplicate key: '.join('.', @keys, $key)) + if exists $node->{$key}; $node->{$key} = $value; } @@ -147,6 +152,28 @@ sub parse_table { $self->push_keys($token); $self->scan_to_key([$self->get_keys]); + my @keys = $self->get_keys; + my $key = join '.', @keys; + if (exists $self->{tables}{$key}) { + # Tables cannot be redefined, *except* when doing so within a goddamn table + # array. Gawd I hate TOML. + my $in_a_stupid_table_array = 0; + my $node = $self->{root}; + for my $key (@keys) { + if (exists $node->{$key} && ref($node->{$key}) eq 'ARRAY') { + $in_a_stupid_table_array = 1; + last; + } else { + $node = $node->{$key}; + } + } + + $self->parse_error($token, "table $key is already defined") + unless $in_a_stupid_table_array; + } else { + $self->{tables}{$key} = 1; + } + TOKEN: while (my $token = $self->next_token) { for ($token->type) { next TOKEN when /EOL/; @@ -301,6 +328,40 @@ sub parse_inline_array { } } + if (@array > 1 && $self->{strict_arrays}) { + my @types = map{ + my $type; + + if (my $ref = ref $_) { + $type = $ref eq 'ARRAY' ? 'array' : 'table'; + } + else { + if (/^(true|false)$/) { + $type = 'bool'; + } + elsif (looks_like_number($_)) { + if ("$_" =~ /[.]/) { + $type = 'float'; + } else { + $type = 'integer'; + } + } + elsif (/(?&DateTime) $TOML/x) { + $type = 'datetime'; + } + else { + $type = 'string'; + } + } + } @array; + + my $t = shift @types; + for (@types) { + $self->parse_error(undef, "expected value of type $t, but found $_") + if $_ ne $t; + } + } + return \@array; } diff --git a/lib/TOML/Tiny/Tokenizer.pm b/lib/TOML/Tiny/Tokenizer.pm index 60b1b5c..3ea9b3c 100644 --- a/lib/TOML/Tiny/Tokenizer.pm +++ b/lib/TOML/Tiny/Tokenizer.pm @@ -122,7 +122,7 @@ sub next_token { } default{ - my $substr = substr($self->{source}, $self->{position}, 30) // 'undef'; + my $substr = substr($self->{source}, $self->{position} - 20, 40) // 'undef'; die "toml syntax error on line $self->{line}\n\t--> $substr\n"; } } diff --git a/test-bin/from-toml b/test-bin/from-toml index dc92515..85ea20d 100755 --- a/test-bin/from-toml +++ b/test-bin/from-toml @@ -12,7 +12,9 @@ binmode STDIN, ':encoding(UTF-8)'; binmode STDOUT, ':encoding(UTF-8)'; my $toml = do{ local $/; }; -my ($parsed, $error) = from_toml $toml, annotated => 1; +my ($parsed, $error) = from_toml $toml, + annotated => 1, + strict_arrays => 1; if ($error) { warn $error;