| (?&InlineTable)
)
- (?<NLSeq> (?> \x0D? \x0A))
+ (?<NLSeq> \x0D? \x0A)
(?<NL> (?&NLSeq) | (?&Comment))
(?<WSChar> \x20 | \x09) # (space, tab)
#-----------------------------------------------------------------------------
# Key
#-----------------------------------------------------------------------------
- (?<BareKey> [-_a-zA-Z0-9]+)
- (?<QuotedKey> (?&BasicString) | (?&StringLiteral))
- (?<SimpleKey> (?&BareKey) | (?&QuotedKey))
- (?<DottedKey> (?&SimpleKey) (?: \x2E (?&SimpleKey) )+)
+ (?<BareKey> (?> [-_a-zA-Z0-9]+ ))
+ (?<QuotedKey> (?> (?&BasicString) | (?&StringLiteral)))
+ (?<SimpleKey> (?> (?&BareKey) | (?&QuotedKey)))
+ (?<DottedKey> (?> (?&SimpleKey) (?: \x2E (?&SimpleKey) )+))
(?<Key> (?&BareKey) | (?&QuotedKey) | (?&DottedKey))
#-----------------------------------------------------------------------------
(?<BinChar> [01])
(?<Zero> [-+]? 0)
- (?<Dec> (?&Zero) | (?: [-+]? (?&DecFirstChar) (?: (?&DecChar) | (?: _ (?&DecChar) ))*))
- (?<Hex> 0x (?&HexChar) (?: (?&HexChar) | (?: [_] (?&HexChar) ))*)
- (?<Oct> 0o (?&OctChar) (?: (?&OctChar) | (?: [_] (?&OctChar) ))*)
- (?<Bin> 0b (?&BinChar) (?: (?&BinChar) | (?: [_] (?&BinChar) ))*)
+ (?<Hex> 0x (?&HexChar) (?> _? (?&HexChar) )*)
+ (?<Oct> 0o (?&OctChar) (?> _? (?&OctChar) )*)
+ (?<Bin> 0b (?&BinChar) (?> _? (?&BinChar) )*)
+ (?<Dec>
+ (?&Zero)
+ | (?> [-+]? (?&DecFirstChar) (?> _? (?&DecChar) )* )
+ )
- (?<Integer> (?&Hex) | (?&Oct) | (?&Bin) | (?&Dec))
+ (?<Integer>
+ (?>
+ (?&Hex)
+ | (?&Oct)
+ | (?&Bin)
+ | (?&Dec)
+ )
+ )
#-----------------------------------------------------------------------------
# Float
#-----------------------------------------------------------------------------
(?<Exponent> [eE] (?&Dec))
- (?<SpecialFloat> [-+]? (?:inf) | (?:nan))
+ (?<SpecialFloat> [-+]? (?> (?:inf) | (?:nan)))
(?<Fraction> [.] (?&Dec) )
(?<Float>
- (?:
- (?&Dec)
+ (?>
+ (?&Dec)
- (?:
- (?: (?&Fraction) (?&Exponent)? )
- | (?&Exponent)
- )
+ (?>
+ (?> (?&Fraction) (?&Exponent)? )
+ | (?&Exponent)
)
+ )
| (?&SpecialFloat)
)
#-----------------------------------------------------------------------------
(?<EscapeChar>
\x5C # leading \
- (?:
+ (?>
[\x5C"btnfr] # escapes: \\ \" \b \t \n \f \r
- | (?: u [_0-9a-fA-F]{4} ) # unicode (4 bytes)
- | (?: U [_0-9a-fA-F]{8} ) # unicode (8 bytes)
+ | (?> u [_0-9a-fA-F]{4} ) # unicode (4 bytes)
+ | (?> U [_0-9a-fA-F]{8} ) # unicode (8 bytes)
)
)
(?<StringLiteral>
- (?: ' [^']* ') # single quoted string (no escaped chars allowed)
+ (?> ' [^']* ') # single quoted string (no escaped chars allowed)
)
(?<MultiLineStringLiteral>
- (?m)
- (?s)
- ''' # opening triple-quote
- .*?
- ''' # closing triple-quote
- (?-s)
- (?-m)
+ (?>
+ ''' # opening triple-quote
+ (?>
+ [^']
+ | '{1,2}
+ )*?
+ ''' # closing triple-quote
+ )
)
(?<BasicString>
- (?:
+ (?>
" # opening quote
- (?: # escape sequences or any char except " or \
+ (?> # escape sequences or any char except " or \
[^"\\]
| (?&EscapeChar)
)*
)
(?<MultiLineString>
- (?m)
- (?s)
""" # opening triple-quote
- (?:
+ (?>
[^"\\]
| "{1,2} # 1-2 quotation marks
| (?&EscapeChar) # escape
- | (?: \\ $)
+ | (?: \\ (?&NLSeq)) # backslash-terminated line
)*?
""" # closing triple-quote
- (?-s)
- (?-m)
)
(?<String>
- (?&MultiLineString)
+ (?&MultiLineString) # multi-line first or first two chars match empty basic string
| (?&BasicString)
- | (?&MultiLineStringLiteral)
+ | (?&MultiLineStringLiteral)
| (?&StringLiteral)
)
# Dates (RFC 3339)
# 1985-04-12T23:20:50.52Z
#-----------------------------------------------------------------------------
- (?<Date> \d{4}-\d{2}-\d{2})
-
- (?<Offset>
- (?: [-+] \d{2}:\d{2} )
- | [Z]
- )
-
- (?<SimpleTime>
- \d{2}:\d{2}:\d{2}
- (?: [.] \d+ )?
- )
-
- (?<Time>
- (?&SimpleTime)
- (?&Offset)?
- )
-
- (?<DateTime>
- (?: (?&Date) [T ] (?&Time) )
- | (?&Date)
- | (?&Time)
- )
+ (?<Date> \d{4}-\d{2}-\d{2} )
+ (?<Offset> (?: [-+] \d{2}:\d{2} ) | Z )
+ (?<SimpleTime> \d{2}:\d{2}:\d{2} (?: \. \d+ )? )
+ (?<Time> (?&SimpleTime) (?&Offset)? )
+ (?<DateTime> (?> (?&Date) (?> [T ] (?&Time) )? ) | (?&Time) )
)
}x;
sub next_token {
my $self = shift;
- if (!defined($self->{source})) {
- return;
- }
-
- if ($self->is_exhausted) {
- return;
- }
+ return unless defined $self->{source}
+ && $self->{position} < $self->{last_position};
if (!@{ $self->{tokens} }) {
my $root = $self->_make_token('table', []);
my $token;
- while (!defined($token) && !$self->is_exhausted) {
+ state $key = qr/(?&Key) $TOML/x;
+
+ while ($self->{position} < $self->{last_position} && !$token) {
for ($self->{source}) {
- when (/\G (?&NL) $TOML/xgc) {
- ++$self->{line};
- $token = $self->_make_token('EOL');
+ when (/\G [\x20 \x09]+/xgc) {
+ ;
}
- when (/\G (?&WSChar)+ $TOML/xgc) {
+ when (/\G \x23 .*/xgc) {
;
}
- when (/\G ((?&Key)) (?&WS) (?= =) $TOML/xgc) {
- $token = $self->_make_token('key', $1);
+ when (/\G \x0D? \x0A/xgc) {
+ ++$self->{line};
+ $token = $self->_make_token('EOL');
}
- when (/\G \[ (?&WS) ((?&Key)) (?&WS) \] (?&WS) (?=(?&NL) | $)$TOML/xgc) {
- my $key = $self->tokenize_key($1);
- $token = $self->_make_token('table', $key);
+ when (/\G \[ [\x20 \x09]* ($key) [\x20 \x09]* \] [\x20 \x09]* (?= (:? \x23 .* )? (?: \x0D? \x0A) | $ )/xgc) {
+ $token = $self->_make_token('table', $self->tokenize_key($1));
}
- when (/\G \[\[ (?&WS) ((?&Key)) (?&WS) \]\] (?&WS) (?=(?&NL) | $) $TOML/xgc) {
- my $key = $self->tokenize_key($1);
- $token = $self->_make_token('array_table', $key);
+ when (/\G \[\[ [\x20 \x09]* ($key) [\x20 \x09]* \]\] [\x20 \x09]* (?= (:? \x23 .* )? (?: \x0D? \x0A) | $ )/xgc) {
+ $token = $self->_make_token('array_table', $self->tokenize_key($1));
}
when (/\G \[ /xgc) {
$token = $self->_make_token('inline_table_close', $1);
}
- when (/\G ((?&Boolean)) $TOML/xgc) {
- $token = $self->_make_token('bool', $1);
+ when (/\G = /xgc) {
+ $token = $self->_make_token('assign', $1);
}
- when (/\G ((?&DateTime)) $TOML/xgc) {
- $token = $self->_make_token('datetime', $1);
+ when (/\G , /xgc) {
+ $token = $self->_make_token('comma', $1);
}
- when (/\G ((?&Float)) $TOML/xgc) {
- $token = $self->_make_token('float', $1);
+ when (/\G ($key) [\x20 \x09]* (?= =)/xgc) {
+ $token = $self->_make_token('key', $1);
}
- when (/\G ((?&Integer)) $TOML/xgc) {
- $token = $self->_make_token('integer', $1);
+ when (/\G ((?&Boolean)) $TOML/xgc) {
+ $token = $self->_make_token('bool', $1);
}
when (/\G ((?&String)) $TOML/xgc) {
$token = $self->_make_token('string', $1);
}
- when (/\G = /xgc) {
- $token = $self->_make_token('assign', $1);
+ when (/\G ((?&DateTime)) $TOML/xgc) {
+ $token = $self->_make_token('datetime', $1);
}
- when (/\G , /xgc) {
- $token = $self->_make_token('comma', $1);
+ when (/\G ((?&Float)) $TOML/xgc) {
+ $token = $self->_make_token('float', $1);
+ }
+
+ when (/\G ((?&Integer)) $TOML/xgc) {
+ $token = $self->_make_token('integer', $1);
}
default{
- my $substr = substr($self->{source}, $self->{position} - 20, 40) // 'undef';
+ my $substr = substr($self->{source}, $self->{position}, 30) // 'undef';
die "toml syntax error on line $self->{line}\n\t--> $substr\n";
}
}
sub push_token {
my $self = shift;
- my $token = shift // return;
- push @{$self->{tokens}}, $token;
+ @_ && push @{$self->{tokens}}, @_;
}
sub pop_token {
sub _make_token {
my ($self, $type, $value) = @_;
-
- my $token = {
+ return {
type => $type,
line => $self->{line},
pos => $self->{position},
value => $self->can("tokenize_$type") ? $self->can("tokenize_$type")->($self, $value) : $value,
};
-
- return $token;
}
sub current_line {
substr $rest, 0, $stop;
}
-sub is_exhausted {
- return $_[0]->{position} >= $_[0]->{last_position};
-}
-
sub update_position {
my $self = shift;
$self->{position} = pos($self->{source}) // 0;
}
sub tokenize_float {
- my $self = shift;
- my $toml = shift;
- $toml =~ s/_//g;
- $toml;
+ $_[1] =~ tr/_//d;
+ $_[1];
}
sub tokenize_integer {
- my $self = shift;
- my $toml = shift;
- $toml =~ s/_//g;
- $toml =~ s/^[+]//;
- return $toml;
+ $_[1] =~ tr/_+//d;
+ $_[1];
}
sub tokenize_string {
$str = substr $toml, 3, length($toml) - 6;
my @newlines = $str =~ /(\x0D?\x0A)/g;
$self->{line} += scalar @newlines;
- $str =~ s/^(?&WS) (?&NL) $TOML//x; # trim leading whitespace
+ $str =~ s/^[\x20 \x09]* (?&NL) $TOML//x; # trim leading whitespace
$str =~ s/\\(?&NL)\s* $TOML//xgs; # trim newlines from lines ending in backslash
} else {
$str = substr($toml, 1, length($toml) - 2);
}
}
+subtest 'string group' => sub{
+ use Regexp::Debugger;
+ my $re = qr{ ((?&String)) $TOML }x;
+
+ test_simple_matches($re,
+ [q{"A"}, q{"A"}, 'basic string'],
+ [q{'A'}, q{'A'}, 'string literal'],
+ [q{"""A"""}, q{"""A"""}, 'multi-line string'],
+ [q{'''A'''}, q{'''A'''}, 'multi-line string literal'],
+ );
+};
+
subtest 'escaped characters' => sub{
my $re = qr{
((?&EscapeChar))
}x;
test_simple_matches($re,
- [
- qq{"""\nabc"""},
- qq{"""\nabc"""},
- 'simple',
- ],
-
- [
- qq{"""a\n"b"\nc"""},
- qq{"""a\n"b"\nc"""},
- 'individual quotes within ml string',
- ],
-
- [
- qq{"""foo"""bar"""},
- qq{"""foo"""},
- 'invalid: triple-quotes appear within ml string',
- ],
+ [ qq{"""\nabc"""}, qq{"""\nabc"""}, 'simple' ],
+ [ qq{""" " """}, q{""" " """}, 'containing 1 quote' ],
+ [ qq{""" "" """}, q{""" "" """}, 'containing 2 quotes' ],
+ [ qq{"""a\n"b"\nc"""}, qq{"""a\n"b"\nc"""}, 'individual quotes within ml string' ],
+ [ qq{"""foo"""bar"""}, qq{"""foo"""}, 'invalid: triple-quotes appear within ml string' ],
);
};
subtest 'multi-line string literals' => sub{
- my $re = qr{
- ((?&MultiLineStringLiteral))
- $TOML
- }x;
+ my $re = qr{ ((?&MultiLineStringLiteral)) $TOML }x;
test_simple_matches($re,
- [
- qq{'''\nabc'''},
- qq{'''\nabc'''},
- 'simple',
- ],
-
- [
- qq{'''foo'''bar'''},
- qq{'''foo'''},
- 'invalid: triple-quotes appear within ml string',
- ],
+ [ qq{'''\nabc'''}, qq{'''\nabc'''}, 'simple' ],
+ [ qq{''' ' '''}, q{''' ' '''}, 'containing 1 single tick' ],
+ [ qq{''' '' '''}, q{''' '' '''}, 'containing 2 single ticks' ],
+ [ qq{'''foo'''bar'''}, qq{'''foo'''}, 'invalid: triple-quotes appear within ml string' ],
);
};