chiark / gitweb /
Significant speedup by breaking apart primary regex into individual rules
[nailing-cargo.git] / lib / TOML / Tiny / Grammar.pm
1 package TOML::Tiny::Grammar;
2 # ABSTRACT: exports regex definition of TOML grammar
3
4 use strict;
5 use warnings;
6 use v5.18;
7
8 use parent 'Exporter';
9
10 our @EXPORT = qw(
11   $WS
12   $CRLF
13   $EOL
14   $Comment
15
16   $BareKey
17   $QuotedKey
18   $SimpleKey
19   $DottedKey
20   $Key
21
22   $Boolean
23
24   $Escape
25   $StringLiteral
26   $MultiLineStringLiteral
27   $BasicString
28   $MultiLineString
29   $String
30
31   $Date
32   $Time
33   $DateTime
34
35   $Hex
36   $Oct
37   $Bin
38   $Dec
39   $Integer
40
41   $Float
42 );
43
44 our $WS      = qr/[\x20\x09]/;     # space, tab
45 our $CRLF    = qr/\x0D?\x0A/;      # cr? lf
46 our $Comment = qr/\x23.*/;         # #comment
47 our $EOL     = qr/$Comment?$CRLF/; # crlf or comment + crlf
48
49 our $Escape = qr{
50   \x5C                       # leading \
51   (?>
52       [\x5C"btnfr]           # escapes: \\ \" \b \t \n \f \r
53     | (?> u [_0-9a-fA-F]{4}) # unicode (4 bytes)
54     | (?> U [_0-9a-fA-F]{8}) # unicode (8 bytes)
55   )
56 }x;
57
58 our $StringLiteral = qr/'[^']*'/; # single quoted string (no escaped chars allowed)
59
60 our $MultiLineStringLiteral = qr{
61   '''                     # opening triple-quote
62   (?> [^'] | '{1,2} )*?
63   '''                     # closing triple-quote
64 }x;
65
66 our $BasicString = qr{
67     "                       # opening quote
68     (?>                     # escape sequences or any char except " or \
69         [^"\\]
70       | $Escape
71     )*
72     "                       # closing quote
73 }x;
74
75 our $MultiLineString = qr{
76   """                       # opening triple-quote
77   (?>
78       [^"\\]
79     | "{1,2}                # 1-2 quotation marks
80     | $Escape               # escape
81     | (?: \\ $CRLF)         # backslash-terminated line
82   )*?
83   """                       # closing triple-quote
84 }x;
85
86 our $String = qr/$MultiLineString | $BasicString | $MultiLineStringLiteral | $StringLiteral/x;
87
88 our $BareKey   = qr/[-_a-zA-Z0-9]+/;
89 our $QuotedKey = qr/$BasicString|$StringLiteral/;
90 our $SimpleKey = qr/$BareKey|$QuotedKey/;
91 our $DottedKey = qr/$SimpleKey(?:\.$SimpleKey)+/;
92 our $Key       = qr/$BareKey|$QuotedKey|$DottedKey/;
93
94 our $Boolean   = qr/\b(?:true)|(?:false)\b/;
95
96 #-----------------------------------------------------------------------------
97 # Dates (RFC 3339)
98 #   1985-04-12T23:20:50.52Z
99 #-----------------------------------------------------------------------------
100 our $Date     = qr/\d{4}-\d{2}-\d{2}/;
101 our $Offset   = qr/(?: [-+] \d{2}:\d{2} ) | Z/x;
102 our $Time     = qr/\d{2}:\d{2}:\d{2} (?: \. \d+)? $Offset?/x;
103 our $DateTime = qr/(?> $Date (?> [T ] $Time )?) | $Time/x;
104
105 #-----------------------------------------------------------------------------
106 # Integer
107 #-----------------------------------------------------------------------------
108 our $DecFirstChar = qr/[1-9]/;
109 our $DecChar      = qr/[0-9]/;
110 our $HexChar      = qr/[0-9 a-f A-F]/;
111 our $OctChar      = qr/[0-7]/;
112 our $BinChar      = qr/[01]/;
113
114 our $Zero         = qr/[-+]? 0/x;
115 our $Hex          = qr/0x $HexChar (?> _? $HexChar )*/x;
116 our $Oct          = qr/0o $OctChar (?> _? $OctChar )*/x;
117 our $Bin          = qr/0b $BinChar (?> _? $BinChar )*/x;
118 our $Dec          = qr/$Zero | (?> [-+]? $DecFirstChar (?> _?  $DecChar )* )/x;
119 our $Integer      = qr/$Hex | $Oct | $Bin | $Dec/x;
120
121 #-----------------------------------------------------------------------------
122 # Float
123 #-----------------------------------------------------------------------------
124 our $Exponent     = qr/[eE] $Dec/x;
125 our $SpecialFloat = qr/[-+]? (?:inf) | (?:nan)/x;
126 our $Fraction     = qr/\. $DecChar (?> _? $DecChar)*/x;
127
128 our $Float = qr{
129     (?> $Dec (?> (?> $Fraction $Exponent?) | $Exponent ) )
130   | $SpecialFloat
131 }x;
132
133
134 1;
135
136 =head1 SYNOPSIS
137
138   use TOML::Tiny::Grammar;
139
140   if ($src =~ /($MultiLineString)/) {
141     ...
142   }
143
144 =head1 DESCRIPTION
145
146 Exports various regexex for parsing TOML source.
147
148 =head1 PATTERNS
149
150 =head2 White space and ignorables
151 =head3 $WS
152 =head3 $CRLF
153 =head3 $EOL
154 =head3 $Comment
155
156 =head2 Keys
157 =head3 $BareKey
158 =head3 $QuotedKey
159 =head3 $SimpleKey
160 =head3 $DottedKey
161 =head3 $Key
162
163 =head2 Values
164 =head3 $Boolean
165
166 =head3 $Escape
167 =head3 $StringLiteral
168 =head3 $MultiLineStringLiteral
169 =head3 $BasicString
170 =head3 $MultiLineString
171 =head3 $String
172
173 =head3 $Date
174 =head3 $Time
175 =head3 $DateTime
176
177 =head3 $Hex
178 =head3 $Oct
179 =head3 $Bin
180 =head3 $Dec
181 =head3 $Integer
182
183 =head3 $Float
184
185 =cut