From da714e3cca2344ca407e57d7bb2133b4e19b8241 Mon Sep 17 00:00:00 2001 From: Jeff Ober Date: Fri, 17 Jan 2020 13:45:12 -0500 Subject: [PATCH] Significant speedup by breaking apart primary regex into individual rules --- lib/TOML/Tiny/Grammar.pm | 294 ++++++++++++++++++------------------- lib/TOML/Tiny/Parser.pm | 6 +- lib/TOML/Tiny/Tokenizer.pm | 47 ++---- lib/TOML/Tiny/Util.pm | 8 +- lib/TOML/Tiny/Writer.pm | 2 +- 5 files changed, 171 insertions(+), 186 deletions(-) diff --git a/lib/TOML/Tiny/Grammar.pm b/lib/TOML/Tiny/Grammar.pm index a1966e9..24fb1a9 100644 --- a/lib/TOML/Tiny/Grammar.pm +++ b/lib/TOML/Tiny/Grammar.pm @@ -8,178 +8,178 @@ use v5.18; use parent 'Exporter'; our @EXPORT = qw( - $TOML + $WS + $CRLF + $EOL + $Comment + + $BareKey + $QuotedKey + $SimpleKey + $DottedKey + $Key + + $Boolean + + $Escape + $StringLiteral + $MultiLineStringLiteral + $BasicString + $MultiLineString + $String + + $Date + $Time + $DateTime + + $Hex + $Oct + $Bin + $Dec + $Integer + + $Float ); -our $TOML = qr{ - -(?(DEFINE) - #----------------------------------------------------------------------------- - # Misc - #----------------------------------------------------------------------------- - (? [ \x20 \x09 ]*) # space, tab - (? \x0D? \x0A) # cr? lf - (? (?: \x23 .*)? (?&CRLF)) # crlf or comment -> crlf - - #----------------------------------------------------------------------------- - # Key - #----------------------------------------------------------------------------- - (? (?> [-_a-zA-Z0-9]+ )) - (? (?> (?&BasicString) | (?&StringLiteral))) - (? (?> (?&BareKey) | (?&QuotedKey))) - (? (?> (?&SimpleKey) (?: \x2E (?&SimpleKey) )+)) - (? (?&BareKey) | (?&QuotedKey) | (?&DottedKey)) - - #----------------------------------------------------------------------------- - # Boolean - #----------------------------------------------------------------------------- - (? (?: \b (?:true) | (?:false) \b )) - - #----------------------------------------------------------------------------- - # Integer - #----------------------------------------------------------------------------- - (? [1-9]) - (? [0-9]) - (? [0-9 a-f A-F]) - (? [0-7]) - (? [01]) - - (? [-+]? 0) - (? 0x (?&HexChar) (?> _? (?&HexChar) )*) - (? 0o (?&OctChar) (?> _? (?&OctChar) )*) - (? 0b (?&BinChar) (?> _? (?&BinChar) )*) - (? - (?&Zero) - | (?> [-+]? (?&DecFirstChar) (?> _? (?&DecChar) )* ) - ) - - (? - (?> - (?&Hex) - | (?&Oct) - | (?&Bin) - | (?&Dec) - ) - ) - - #----------------------------------------------------------------------------- - # Float - #----------------------------------------------------------------------------- - (? [eE] (?&Dec)) - (? [-+]? (?> (?:inf) | (?:nan))) - (? [.] (?&DecChar) (?> _? (?&DecChar) )* ) - - (? - (?> - (?&Dec) - - (?> - (?> (?&Fraction) (?&Exponent)? ) - | (?&Exponent) - ) - ) - | (?&SpecialFloat) - ) - - #----------------------------------------------------------------------------- - # String - #----------------------------------------------------------------------------- - (? - \x5C # leading \ - (?> - [\x5C"btnfr] # escapes: \\ \" \b \t \n \f \r - | (?> u [_0-9a-fA-F]{4} ) # unicode (4 bytes) - | (?> U [_0-9a-fA-F]{8} ) # unicode (8 bytes) - ) - ) - - (? - (?> ' [^']* ') # single quoted string (no escaped chars allowed) +our $WS = qr/[\x20\x09]/; # space, tab +our $CRLF = qr/\x0D?\x0A/; # cr? lf +our $Comment = qr/\x23.*/; # #comment +our $EOL = qr/$Comment?$CRLF/; # crlf or comment + crlf + +our $Escape = qr{ + \x5C # leading \ + (?> + [\x5C"btnfr] # escapes: \\ \" \b \t \n \f \r + | (?> u [_0-9a-fA-F]{4}) # unicode (4 bytes) + | (?> U [_0-9a-fA-F]{8}) # unicode (8 bytes) ) +}x; - (? - (?> - ''' # opening triple-quote - (?> - [^'] - | '{1,2} - )*? - ''' # closing triple-quote - ) - ) +our $StringLiteral = qr/'[^']*'/; # single quoted string (no escaped chars allowed) - (? - (?> - " # opening quote - (?> # escape sequences or any char except " or \ - [^"\\] - | (?&EscapeChar) - )* - " # closing quote - ) - ) +our $MultiLineStringLiteral = qr{ + ''' # opening triple-quote + (?> [^'] | '{1,2} )*? + ''' # closing triple-quote +}x; - (? - """ # opening triple-quote - (?> +our $BasicString = qr{ + " # opening quote + (?> # escape sequences or any char except " or \ [^"\\] - | "{1,2} # 1-2 quotation marks - | (?&EscapeChar) # escape - | (?: \\ (?&CRLF)) # backslash-terminated line - )*? - """ # closing triple-quote - ) - - (? - (?&MultiLineString) # multi-line first or first two chars match empty basic string - | (?&BasicString) - | (?&MultiLineStringLiteral) - | (?&StringLiteral) - ) + | $Escape + )* + " # closing quote +}x; - #----------------------------------------------------------------------------- - # Dates (RFC 3339) - # 1985-04-12T23:20:50.52Z - #----------------------------------------------------------------------------- - (? \d{4}-\d{2}-\d{2} ) - (? (?: [-+] \d{2}:\d{2} ) | Z ) - (? \d{2}:\d{2}:\d{2} (?: \. \d+ )? ) - (?