use parent 'Exporter';
our @EXPORT = qw(
- $TOML
+ $WS
+ $CRLF
+ $EOL
+ $Comment
+
+ $BareKey
+ $QuotedKey
+ $SimpleKey
+ $DottedKey
+ $Key
+
+ $Boolean
+
+ $Escape
+ $StringLiteral
+ $MultiLineStringLiteral
+ $BasicString
+ $MultiLineString
+ $String
+
+ $Date
+ $Time
+ $DateTime
+
+ $Hex
+ $Oct
+ $Bin
+ $Dec
+ $Integer
+
+ $Float
);
-our $TOML = qr{
-
-(?(DEFINE)
- #-----------------------------------------------------------------------------
- # Misc
- #-----------------------------------------------------------------------------
- (?<WS> [ \x20 \x09 ]*) # space, tab
- (?<CRLF> \x0D? \x0A) # cr? lf
- (?<EOL> (?: \x23 .*)? (?&CRLF)) # crlf or comment -> crlf
-
- #-----------------------------------------------------------------------------
- # Key
- #-----------------------------------------------------------------------------
- (?<BareKey> (?> [-_a-zA-Z0-9]+ ))
- (?<QuotedKey> (?> (?&BasicString) | (?&StringLiteral)))
- (?<SimpleKey> (?> (?&BareKey) | (?&QuotedKey)))
- (?<DottedKey> (?> (?&SimpleKey) (?: \x2E (?&SimpleKey) )+))
- (?<Key> (?&BareKey) | (?&QuotedKey) | (?&DottedKey))
-
- #-----------------------------------------------------------------------------
- # Boolean
- #-----------------------------------------------------------------------------
- (?<Boolean> (?: \b (?:true) | (?:false) \b ))
-
- #-----------------------------------------------------------------------------
- # Integer
- #-----------------------------------------------------------------------------
- (?<DecFirstChar> [1-9])
- (?<DecChar> [0-9])
- (?<HexChar> [0-9 a-f A-F])
- (?<OctChar> [0-7])
- (?<BinChar> [01])
-
- (?<Zero> [-+]? 0)
- (?<Hex> 0x (?&HexChar) (?> _? (?&HexChar) )*)
- (?<Oct> 0o (?&OctChar) (?> _? (?&OctChar) )*)
- (?<Bin> 0b (?&BinChar) (?> _? (?&BinChar) )*)
- (?<Dec>
- (?&Zero)
- | (?> [-+]? (?&DecFirstChar) (?> _? (?&DecChar) )* )
- )
-
- (?<Integer>
- (?>
- (?&Hex)
- | (?&Oct)
- | (?&Bin)
- | (?&Dec)
- )
- )
-
- #-----------------------------------------------------------------------------
- # Float
- #-----------------------------------------------------------------------------
- (?<Exponent> [eE] (?&Dec))
- (?<SpecialFloat> [-+]? (?> (?:inf) | (?:nan)))
- (?<Fraction> [.] (?&DecChar) (?> _? (?&DecChar) )* )
-
- (?<Float>
- (?>
- (?&Dec)
-
- (?>
- (?> (?&Fraction) (?&Exponent)? )
- | (?&Exponent)
- )
- )
- | (?&SpecialFloat)
- )
-
- #-----------------------------------------------------------------------------
- # String
- #-----------------------------------------------------------------------------
- (?<EscapeChar>
- \x5C # leading \
- (?>
- [\x5C"btnfr] # escapes: \\ \" \b \t \n \f \r
- | (?> u [_0-9a-fA-F]{4} ) # unicode (4 bytes)
- | (?> U [_0-9a-fA-F]{8} ) # unicode (8 bytes)
- )
- )
-
- (?<StringLiteral>
- (?> ' [^']* ') # single quoted string (no escaped chars allowed)
+our $WS = qr/[\x20\x09]/; # space, tab
+our $CRLF = qr/\x0D?\x0A/; # cr? lf
+our $Comment = qr/\x23.*/; # #comment
+our $EOL = qr/$Comment?$CRLF/; # crlf or comment + crlf
+
+our $Escape = qr{
+ \x5C # leading \
+ (?>
+ [\x5C"btnfr] # escapes: \\ \" \b \t \n \f \r
+ | (?> u [_0-9a-fA-F]{4}) # unicode (4 bytes)
+ | (?> U [_0-9a-fA-F]{8}) # unicode (8 bytes)
)
+}x;
- (?<MultiLineStringLiteral>
- (?>
- ''' # opening triple-quote
- (?>
- [^']
- | '{1,2}
- )*?
- ''' # closing triple-quote
- )
- )
+our $StringLiteral = qr/'[^']*'/; # single quoted string (no escaped chars allowed)
- (?<BasicString>
- (?>
- " # opening quote
- (?> # escape sequences or any char except " or \
- [^"\\]
- | (?&EscapeChar)
- )*
- " # closing quote
- )
- )
+our $MultiLineStringLiteral = qr{
+ ''' # opening triple-quote
+ (?> [^'] | '{1,2} )*?
+ ''' # closing triple-quote
+}x;
- (?<MultiLineString>
- """ # opening triple-quote
- (?>
+our $BasicString = qr{
+ " # opening quote
+ (?> # escape sequences or any char except " or \
[^"\\]
- | "{1,2} # 1-2 quotation marks
- | (?&EscapeChar) # escape
- | (?: \\ (?&CRLF)) # backslash-terminated line
- )*?
- """ # closing triple-quote
- )
-
- (?<String>
- (?&MultiLineString) # multi-line first or first two chars match empty basic string
- | (?&BasicString)
- | (?&MultiLineStringLiteral)
- | (?&StringLiteral)
- )
+ | $Escape
+ )*
+ " # closing quote
+}x;
- #-----------------------------------------------------------------------------
- # Dates (RFC 3339)
- # 1985-04-12T23:20:50.52Z
- #-----------------------------------------------------------------------------
- (?<Date> \d{4}-\d{2}-\d{2} )
- (?<Offset> (?: [-+] \d{2}:\d{2} ) | Z )
- (?<SimpleTime> \d{2}:\d{2}:\d{2} (?: \. \d+ )? )
- (?<Time> (?&SimpleTime) (?&Offset)? )
- (?<DateTime> (?> (?&Date) (?> [T ] (?&Time) )? ) | (?&Time) )
-)
+our $MultiLineString = qr{
+ """ # opening triple-quote
+ (?>
+ [^"\\]
+ | "{1,2} # 1-2 quotation marks
+ | $Escape # escape
+ | (?: \\ $CRLF) # backslash-terminated line
+ )*?
+ """ # closing triple-quote
+}x;
+our $String = qr/$MultiLineString | $BasicString | $MultiLineStringLiteral | $StringLiteral/x;
+
+our $BareKey = qr/[-_a-zA-Z0-9]+/;
+our $QuotedKey = qr/$BasicString|$StringLiteral/;
+our $SimpleKey = qr/$BareKey|$QuotedKey/;
+our $DottedKey = qr/$SimpleKey(?:\.$SimpleKey)+/;
+our $Key = qr/$BareKey|$QuotedKey|$DottedKey/;
+
+our $Boolean = qr/\b(?:true)|(?:false)\b/;
+
+#-----------------------------------------------------------------------------
+# Dates (RFC 3339)
+# 1985-04-12T23:20:50.52Z
+#-----------------------------------------------------------------------------
+our $Date = qr/\d{4}-\d{2}-\d{2}/;
+our $Offset = qr/(?: [-+] \d{2}:\d{2} ) | Z/x;
+our $Time = qr/\d{2}:\d{2}:\d{2} (?: \. \d+)? $Offset?/x;
+our $DateTime = qr/(?> $Date (?> [T ] $Time )?) | $Time/x;
+
+#-----------------------------------------------------------------------------
+# Integer
+#-----------------------------------------------------------------------------
+our $DecFirstChar = qr/[1-9]/;
+our $DecChar = qr/[0-9]/;
+our $HexChar = qr/[0-9 a-f A-F]/;
+our $OctChar = qr/[0-7]/;
+our $BinChar = qr/[01]/;
+
+our $Zero = qr/[-+]? 0/x;
+our $Hex = qr/0x $HexChar (?> _? $HexChar )*/x;
+our $Oct = qr/0o $OctChar (?> _? $OctChar )*/x;
+our $Bin = qr/0b $BinChar (?> _? $BinChar )*/x;
+our $Dec = qr/$Zero | (?> [-+]? $DecFirstChar (?> _? $DecChar )* )/x;
+our $Integer = qr/$Hex | $Oct | $Bin | $Dec/x;
+
+#-----------------------------------------------------------------------------
+# Float
+#-----------------------------------------------------------------------------
+our $Exponent = qr/[eE] $Dec/x;
+our $SpecialFloat = qr/[-+]? (?:inf) | (?:nan)/x;
+our $Fraction = qr/\. $DecChar (?> _? $DecChar)*/x;
+
+our $Float = qr{
+ (?> $Dec (?> (?> $Fraction $Exponent?) | $Exponent ) )
+ | $SpecialFloat
}x;
+
1;
=head1 SYNOPSIS
use TOML::Tiny::Grammar;
- if ($src =~ /(?&MultiLineString) $TOML/x) {
+ if ($src =~ /($MultiLineString)/) {
...
}
=head1 DESCRIPTION
-Exports C<$TOML>, a regex grammar for parsing TOML source.
+Exports various regexex for parsing TOML source.
+
+=head1 PATTERNS
-=head1 RULES
+=head2 White space and ignorables
+=head3 $WS
+=head3 $CRLF
+=head3 $EOL
+=head3 $Comment
-=head2 White space
-=head3 (?&WS)
-=head3 (?&EOL)
+=head2 Keys
+=head3 $BareKey
+=head3 $QuotedKey
+=head3 $SimpleKey
+=head3 $DottedKey
+=head3 $Key
=head2 Values
-=head3 (?&Boolean)
-=head3 (?&DateTime)
-=head3 (?&Float)
-=head3 (?&Integer)
-=head3 (?&String)
-
-=head2 (?&Key)
-=head3 (?&BareKey)
-=head3 (?&QuotedKey)
-=head3 (?&DottedKey)
+=head3 $Boolean
+
+=head3 $Escape
+=head3 $StringLiteral
+=head3 $MultiLineStringLiteral
+=head3 $BasicString
+=head3 $MultiLineString
+=head3 $String
+
+=head3 $Date
+=head3 $Time
+=head3 $DateTime
+
+=head3 $Hex
+=head3 $Oct
+=head3 $Bin
+=head3 $Dec
+=head3 $Integer
+
+=head3 $Float
=cut
my $type;
my $value;
- state $key = qr/(?&Key) $TOML/x;
- state $key_set = qr/\G ($key) [\x20 \x09]* (?= =)/x;
- state $table = qr/\G \[ [\x20 \x09]* ($key) [\x20 \x09]* \] [\x20 \x09]* (?= (:? \x23 .* )? (?: \x0D? \x0A) | $ )/x;
- state $array_table = qr/\G \[\[ [\x20 \x09]* ($key) [\x20 \x09]* \]\] [\x20 \x09]* (?= (:? \x23 .* )? (?: \x0D? \x0A) | $ )/x;
- state $string = qr/\G ((?&String)) $TOML/x;
- state $datetime = qr/\G ((?&DateTime)) $TOML/x;
- state $float = qr/\G ((?&Float)) $TOML/x;
- state $integer = qr/\G ((?&Integer)) $TOML/x;
+ state $key_set = qr/\G ($Key) $WS* (?= =)/x;
+ state $table = qr/\G \[ $WS* ($Key) $WS* \] $WS* (?:$EOL | $)/x;
+ state $array_table = qr/\G \[\[ $WS* ($Key) $WS* \]\] $WS* (?:$EOL | $)/x;
state $simple = {
'[' => 'inline_array',
my $newline = !!($prev eq 'EOL' || $prev eq 'table' || $prev eq 'array_table');
for ($self->{source}) {
- /\G[\x20\x09]+/gc; # ignore whitespace
- /\G\x23.*$/mgc && next; # ignore comments
+ /\G$WS+/gc; # ignore whitespace
+ /\G$Comment$/mgc && next; # ignore comments
- last when /\G $/xgc;
+ last when /\G$/gc;
- when (/\G \x0D? \x0A/xgc) {
+ when (/\G$EOL/gc) {
++$self->{line};
$type = 'EOL';
}
$value = $1;
}
- when (/$string/xgc) {
+ when (/\G($String)/gc) {
$type = 'string';
$value = $1;
}
- when (/$datetime/xgc) {
+ when (/\G($DateTime)/gc) {
$type = 'datetime';
$value = $1;
}
- when (/$float/xgc) {
+ when (/\G($Float)/gc) {
$type = 'float';
$value = $1;
}
- when (/$integer/xgc) {
+ when (/\G($Integer)/gc) {
$type = 'integer';
$value = $1;
}
pop @{$self->{tokens}};
}
-sub _make_token {
- my ($self, $type, $value) = @_;
- return {
- type => $type,
- line => $self->{line},
- pos => $self->{position},
- value => $self->can("tokenize_$type") ? $self->can("tokenize_$type")->($self, $value) : $value,
- };
-}
-
sub current_line {
my $self = shift;
my $rest = substr $self->{source}, $self->{position};
my $toml = shift;
my @keys;
- while ($toml =~ s/^ ((?&SimpleKey)) [.]? $TOML//x) {
+ while ($toml =~ s/^ ($SimpleKey) [.]?//x) {
push @keys, $1;
}
if ($ml) {
$str = substr $toml, 3, length($toml) - 6;
- my @newlines = $str =~ /(\x0D?\x0A)/g;
+ my @newlines = $str =~ /($CRLF)/g;
$self->{line} += scalar @newlines;
- $str =~ s/^[\x20 \x09]* (?&EOL) $TOML//x; # trim leading whitespace
- $str =~ s/\\(?&EOL)\s* $TOML//xgs; # trim newlines from lines ending in backslash
+ $str =~ s/^$WS* $EOL//x; # trim leading whitespace
+ $str =~ s/\\$EOL\s*//xgs; # trim newlines from lines ending in backslash
} else {
$str = substr($toml, 1, length($toml) - 2);
}
}
sub unescape_str {
- state $re = qr/((?&EscapeChar)) $TOML/x;
+ state $re = qr/($Escape)/;
$_[1] =~ s|$re|unescape_chars($1) // $_[0]->error(undef, "invalid unicode escape: $1")|xge;
$_[1];
}