#-----------------------------------------------------------------------------
# Key
#-----------------------------------------------------------------------------
- (?<BareKey> [-_a-zA-Z0-9]+)
+ (?<BareKey> [-_a-zA-Z0-9]+)
(?<QuotedKey> (?&BasicString) | (?&StringLiteral))
- (?<DottedKey>
- (?: (?&BareKey) | (?&QuotedKey) )
- (?: (?&WS) [.] (?&WS) (?: (?&BareKey) | (?&QuotedKey) ) )+
- )
- (?<Key> (?&DottedKey) | (?&BareKey) | (?&QuotedKey) )
+ (?<SimpleKey> (?&BareKey) | (?&QuotedKey))
+ (?<DottedKey> (?&SimpleKey) (?: \x2E (?&SimpleKey) )+)
+ (?<Key> (?&BareKey) | (?&QuotedKey) | (?&DottedKey))
#-----------------------------------------------------------------------------
# Boolean
#-----------------------------------------------------------------------------
- (?<Boolean> \b(?:true)|(?:false)\b)
+ (?<Boolean> (?: \b (?:true) | (?:false) \b ))
#-----------------------------------------------------------------------------
# Integer
#-----------------------------------------------------------------------------
- (?<DecFirstChar> [1-9])
- (?<DecChar> [0-9])
- (?<HexChar> [0-9 a-f A-F])
- (?<OctChar> [0-7])
- (?<BinChar> [01])
+ (?<DecFirstChar> [1-9])
+ (?<DecChar> [0-9])
+ (?<HexChar> [0-9 a-f A-F])
+ (?<OctChar> [0-7])
+ (?<BinChar> [01])
(?<Zero> [-+]? 0)
(?<Dec> (?&Zero) | (?: [-+]? (?&DecFirstChar) (?: (?&DecChar) | (?: _ (?&DecChar) ))*))
#-----------------------------------------------------------------------------
# Float
#-----------------------------------------------------------------------------
- (?<Exponent> [eE] (?&Dec))
- (?<SpecialFloat> [-+]? (?:inf) | (?:nan))
- (?<Fraction> [.] (?&Dec) )
+ (?<Exponent> [eE] (?&Dec))
+ (?<SpecialFloat> [-+]? (?:inf) | (?:nan))
+ (?<Fraction> [.] (?&Dec) )
(?<Float>
- (?:
- (?: (?&Dec) (?&Fraction) (?&Exponent) )
- | (?: (?&Dec) (?&Exponent) )
- | (?: (?&Dec) (?&Fraction) )
- )
- |
- (?&SpecialFloat)
+ (?:
+ (?&Dec)
+
+ (?:
+ (?: (?&Fraction) (?&Exponent)? )
+ | (?&Exponent)
+ )
+ )
+ | (?&SpecialFloat)
)
#-----------------------------------------------------------------------------
(?<EscapeChar>
\x5C # leading \
(?:
- [\x5C"btnfr] # escapes: \\ \b \t \n \f \r
+ [\x5C"btnfr] # escapes: \\ \" \b \t \n \f \r
| (?: u [_0-9a-fA-F]{4} ) # unicode (4 bytes)
| (?: U [_0-9a-fA-F]{8} ) # unicode (8 bytes)
)
(?:
" # opening quote
(?: # escape sequences or any char except " or \
- (?: (?&EscapeChar) )
- | [^"\\]
+ [^"\\]
+ | (?&EscapeChar)
)*
" # closing quote
)
)
(?<MultiLineString>
+ (?m)
(?s)
""" # opening triple-quote
(?:
- (?: (?&EscapeChar) ) # escaped char
- | .
+ [^"\\]
+ | "{1,2} # 1-2 quotation marks
+ | (?&EscapeChar) # escape
+ | (?: \\ $)
)*?
""" # closing triple-quote
(?-s)
+ (?-m)
)
(?<String>
use Carp;
use Data::Dumper;
use List::Util qw(all);
-use Scalar::Util qw(looks_like_number);
use TOML::Tiny::Grammar;
use TOML::Tiny::Tokenizer;
use TOML::Tiny::Util qw(is_strict_array);
}
sub next_token {
- my $self = shift;
- return unless $self->{tokenizer};
- my $token = $self->{tokenizer}->next_token;
- return $token;
+ $_[0]->{tokenizer} && $_[0]->{tokenizer}->next_token;
}
sub parse {
sub parse_error {
my ($self, $token, $msg) = @_;
- my $line = $token ? $token->line : 'EOF';
+ my $line = $token ? $token->{line} : 'EOF';
if ($self->{annotated}) {
my $root = Dumper($self->{root});
my $tok = Dumper($token);
sub expect_type {
my ($self, $token, $expected) = @_;
- my $actual = $token->type;
+ my $actual = $token->{type};
$self->parse_error($token, "expected $expected, but found $actual")
unless $actual eq $expected;
}
sub push_keys {
my ($self, $token) = @_;
- push @{ $self->{keys} }, $token->value;
+ push @{ $self->{keys} }, $token->{value};
}
sub pop_keys {
}
sub scan_to_key {
- my ($self, $keys) = @_;
+ my $self = shift;
+ my $keys = shift // [ $self->get_keys ];
my $node = $self->{root};
for my $key (@$keys) {
if (exists $node->{$key}) {
for (ref $node->{$key}) {
- $node = $node->{$key} when /HASH/;
- $node = $node->{$key}[-1] when /ARRAY/;
+ $node = $node->{$key} when 'HASH';
+ $node = $node->{$key}[-1] when 'ARRAY';
default{
my $full_key = join '.', @$keys;
die "$full_key is already defined\n";
sub parse_table {
my $self = shift;
- my $token = shift // $self->next_token;
+ my $token = shift // $self->next_token // return; # may be undef on first token in empty document
$self->expect_type($token, 'table');
$self->push_keys($token);
- $self->scan_to_key([$self->get_keys]);
+ $self->scan_to_key;
my @keys = $self->get_keys;
my $key = join '.', @keys;
}
TOKEN: while (my $token = $self->next_token) {
- for ($token->type) {
- next TOKEN when /EOL/;
+ for ($token->{type}) {
+ next TOKEN when 'EOL';
- when (/key/) {
+ when ('key') {
$self->expect_type($self->next_token, 'assign');
$self->push_keys($token);
$self->set_keys;
}
}
- when (/array_table/) {
+ when ('array_table') {
$self->pop_keys;
@_ = ($self, $token);
goto \&parse_array_table;
}
- when (/table/) {
+ when ('table') {
$self->pop_keys;
@_ = ($self, $token);
goto \&parse_table;
push @{ $node->{$key} }, {};
TOKEN: while (my $token = $self->next_token) {
- for ($token->type) {
+ for ($token->{type}) {
next TOKEN when /EOL/;
when (/key/) {
my $self = shift;
my $token = shift // $self->next_token;
$self->expect_type($token, 'key');
- return $token->value;
+ return $token->{value};
}
sub parse_value {
my $self = shift;
my $token = shift // $self->next_token;
- for ($token->type) {
+ for ($token->{type}) {
when (/float/) {
if ($self->{annotated}) {
- return $token->value;
+ return $token->{value};
} else {
use bignum;
- return $token->value + 0;
+ return $token->{value} + 0;
}
}
when (/integer/) {
if ($self->{annotated}) {
- return $token->value;
+ return $token->{value};
} else {
- for (my $n = $token->value) {
+ for (my $n = $token->{value}) {
use bigint;
when (/(?&Oct) $TOML/x) {
}
}
- return $token->value when /string/;
- return $self->{inflate_boolean}->($token->value) when /bool/;
- return $self->{inflate_datetime}->($token->value) when /datetime/;
+ return $token->{value} when /string/;
+ return $self->{inflate_boolean}->($token->{value}) when /bool/;
+ return $self->{inflate_datetime}->($token->{value}) when /datetime/;
return $self->parse_inline_table when /inline_table/;
return $self->parse_inline_array when /inline_array/;
my @array;
TOKEN: while (my $token = $self->next_token) {
- for ($token->type) {
+ for ($token->{type}) {
next TOKEN when /comma/;
next TOKEN when /EOL/;
last TOKEN when /inline_array_close/;
my $table = {};
TOKEN: while (my $token = $self->next_token) {
- for ($token->type) {
+ for ($token->{type}) {
next TOKEN when /comma/;
last TOKEN when /inline_table_close/;
when (/key/) {
$self->expect_type($self->next_token, 'assign');
- my $key = $token->value->[0];
+ my $key = $token->{value}[0];
$table->{ $key } = $self->parse_value;
}
use TOML::Tiny::Grammar;
-use Class::Struct 'TOML::Tiny::Token' => {
- type => '$',
- line => '$',
- pos => '$',
- value => '$',
-};
-
sub new {
my ($class, %param) = @_;
my $self = bless{
- source => $param{source},
- is_exhausted => 0,
- position => 0,
- line => 0,
- tokens => [],
+ source => $param{source},
+ last_position => length $param{source},
+ position => 0,
+ line => 0,
+ tokens => [],
}, $class;
return $self;
return;
}
- if ($self->{is_exhausted}) {
+ if ($self->is_exhausted) {
return;
}
my $token;
- while (!defined($token) && !$self->{is_exhausted}) {
+ while (!defined($token) && !$self->is_exhausted) {
for ($self->{source}) {
when (/\G (?&NL) $TOML/xgc) {
++$self->{line};
;
}
- when (/\G ((?&Key)) (?= (?&WS) =) $TOML/xgc) {
+ when (/\G ((?&Key)) (?&WS) (?= =) $TOML/xgc) {
$token = $self->_make_token('key', $1);
}
+ when (/\G \[ (?&WS) ((?&Key)) (?&WS) \] (?&WS) (?=(?&NL) | $)$TOML/xgc) {
+ my $key = $self->tokenize_key($1);
+ $token = $self->_make_token('table', $key);
+ }
+
+ when (/\G \[\[ (?&WS) ((?&Key)) (?&WS) \]\] (?&WS) (?=(?&NL) | $) $TOML/xgc) {
+ my $key = $self->tokenize_key($1);
+ $token = $self->_make_token('array_table', $key);
+ }
+
+ when (/\G \[ /xgc) {
+ $token = $self->_make_token('inline_array', $1);
+ }
+
+ when (/\G \] /xgc) {
+ $token = $self->_make_token('inline_array_close', $1);
+ }
+
+ when (/\G \{ /xgc) {
+ $token = $self->_make_token('inline_table', $1);
+ }
+
+ when (/\G \} /xgc) {
+ $token = $self->_make_token('inline_table_close', $1);
+ }
+
when (/\G ((?&Boolean)) $TOML/xgc) {
$token = $self->_make_token('bool', $1);
}
$token = $self->_make_token('comma', $1);
}
- when (/\G \[ (?&WS) ((?&Key)) (?&WS) \] (?&WS) (?=(?&NL) | $)$TOML/xgc) {
- my $key = $self->tokenize_key($1);
- $token = $self->_make_token('table', $key);
- }
-
- when (/\G \[\[ (?&WS) ((?&Key)) (?&WS) \]\] (?&WS) (?=(?&NL) | $) $TOML/xgc) {
- my $key = $self->tokenize_key($1);
- $token = $self->_make_token('array_table', $key);
- }
-
- when (/\G \[ /xgc) {
- $token = $self->_make_token('inline_array', $1);
- }
-
- when (/\G \] /xgc) {
- $token = $self->_make_token('inline_array_close', $1);
- }
-
- when (/\G \{ /xgc) {
- $token = $self->_make_token('inline_table', $1);
- }
-
- when (/\G \} /xgc) {
- $token = $self->_make_token('inline_table_close', $1);
- }
-
default{
my $substr = substr($self->{source}, $self->{position} - 20, 40) // 'undef';
die "toml syntax error on line $self->{line}\n\t--> $substr\n";
sub _make_token {
my ($self, $type, $value) = @_;
- my $token = TOML::Tiny::Token->new(
+ my $token = {
type => $type,
line => $self->{line},
pos => $self->{position},
- );
-
- $self->update_position;
-
- if (my $tokenize = $self->can("tokenize_$type")) {
- $value = $tokenize->($self, $value);
- }
-
- $token->value($value);
+ value => $self->can("tokenize_$type") ? $self->can("tokenize_$type")->($self, $value) : $value,
+ };
return $token;
}
substr $rest, 0, $stop;
}
+sub is_exhausted {
+ return $_[0]->{position} >= $_[0]->{last_position};
+}
+
sub update_position {
my $self = shift;
$self->{position} = pos($self->{source}) // 0;
- $self->{is_exhausted} = $self->{position} >= length($self->{source});
}
sub error {
my $self = shift;
my $token = shift;
my $msg = shift // 'unknown';
- my $line = $token ? $token->line : $self->{line};
+ my $line = $token ? $token->{line} : $self->{line};
die "toml: parse error at line $line: $msg\n";
}
sub tokenize_key {
my $self = shift;
my $toml = shift;
+ my @keys;
- for ($toml) {
- my @parts;
-
- $toml =~ qr{
- (
- (?:
- ( (?&QuotedKey) | (?&BareKey) )
- [.]?
- (?{push @parts, $^N})
- )+
- )
- $TOML
- }x;
-
- for (@parts) {
- s/^["']//;
- s/["']$//;
- }
+ while ($toml =~ s/^ ((?&SimpleKey)) [.]? $TOML//x) {
+ push @keys, $1;
+ }
- return \@parts;
+ for (@keys) {
+ s/^["']//;
+ s/["']$//;
}
+
+ return \@keys;
}
sub tokenize_float {
sub tokenize_string {
my $self = shift;
my $toml = shift;
- my $str = '';
-
- for ($toml) {
- when (/^ ((?&MultiLineString)) $TOML/x) {
- $str = substr $1, 3, length($1) - 6;
-
- my @newlines = $str =~ /((?&NL)) $TOML/xg;
- $self->{line} += scalar( grep{ defined $_ } @newlines );
+ my $ml = $toml =~ /^(?:''')|(?:""")/;
+ my $lit = $toml =~ /^'/;
+ my $str = '';
+
+ if ($ml) {
+ $str = substr $toml, 3, length($toml) - 6;
+ my @newlines = $str =~ /(\x0D?\x0A)/g;
+ $self->{line} += scalar @newlines;
+ $str =~ s/^(?&WS) (?&NL) $TOML//x; # trim leading whitespace
+ $str =~ s/\\(?&NL)\s* $TOML//xgs; # trim newlines from lines ending in backslash
+ } else {
+ $str = substr($toml, 1, length($toml) - 2);
+ }
- $str =~ s/^(?&WS) (?&NL) $TOML//x;
- $str =~ s/\\(?&NL)\s* $TOML//xgs;
- $str = $self->unescape_str($str);
- }
+ if (!$lit) {
+ $str = $self->unescape_str($str);
+ }
- when (/^ ((?&BasicString)) $TOML/x) {
- $str = substr($1, 1, length($1) - 2);
- $str = $self->unescape_str($str);
- }
+ return ''.$str;
+}
- when (/^ ((?&MultiLineStringLiteral)) $TOML/x) {
- $str = substr $1, 3, length($1) - 6;
+sub unescape_chars {
+ state %esc = (
+ '\b' => "\x08",
+ '\t' => "\x09",
+ '\n' => "\x0A",
+ '\f' => "\x0C",
+ '\r' => "\x0D",
+ '\"' => "\x22",
+ '\/' => "\x2F",
+ '\\\\' => "\x5C",
+ );
- my @newlines = $str =~ /(?&NL) $TOML/xg;
- $self->{line} += scalar( grep{ defined $_ } @newlines );
+ if (exists $esc{$_[0]}) {
+ return $esc{$_[0]};
+ }
- $str =~ s/^(?&WS) (?&NL) $TOML//x;
- $str =~ s/\\(?&NL)\s* $TOML//xgs;
- }
+ my $hex = hex substr($_[0], 2);
- when (/^ ((?&StringLiteral)) $TOML/x) {
- $str = substr($1, 1, length($1) - 2);
- }
+ if (charnames::viacode($hex)) {
+ return chr $hex;
}
- return ''.$str;
+ return;
}
-# Adapted from TOML::Parser::Util
sub unescape_str {
- my $self = shift;
- my $str = shift;
-
- $str =~ s/((?&EscapeChar)) $TOML/
- my $ch = $1; for ($1) {
- $ch = "\x08" when '\b';
- $ch = "\x09" when '\t';
- $ch = "\x0A" when '\n';
- $ch = "\x0C" when '\f';
- $ch = "\x0D" when '\r';
- $ch = "\x22" when '\"';
- $ch = "\x2F" when '\/';
- $ch = "\x5C" when '\\\\';
-
- default{
- my $hex = hex substr($ch, 2);
- if (charnames::viacode($hex)) {
- $ch = chr $hex;
- } else {
- $self->error(undef, "invalid unicode escape: $ch");
- }
- }
- }
-
- $ch;
- /xge;
-
- return $str;
+ state $re = qr/((?&EscapeChar)) $TOML/x;
+ $_[1] =~ s|$re|unescape_chars($1) // $_[0]->error(undef, "invalid unicode escape: $1")|xge;
+ $_[1];
}
1;