1 package TOML::Tiny::Tokenizer;
2 # ABSTRACT: tokenizer used by TOML::Tiny
6 no warnings qw(experimental);
7 use charnames qw(:full);
10 use TOML::Tiny::Grammar;
13 my ($class, %param) = @_;
16 source => $param{source},
17 last_position => length $param{source},
29 if (@{$self->{tokens}}) {
30 return $self->{tokens}[-1]{type} // 'EOL';
39 return unless defined $self->{source}
40 && $self->{position} < $self->{last_position};
42 if (!@{ $self->{tokens} }) {
43 my $root = {type => 'table', pos => 0, line => 1, value => []};
44 $self->push_token($root);
48 # Update the regex engine's position marker in case some other regex
49 # attempted to match against the source string and reset it.
50 pos($self->{source}) = $self->{position};
56 state $key_set = qr/\G ($Key) $WS* (?= =)/x;
57 state $table = qr/\G \[ $WS* ($Key) $WS* \] $WS* (?:$EOL | $)/x;
58 state $array_table = qr/\G \[\[ $WS* ($Key) $WS* \]\] $WS* (?:$EOL | $)/x;
61 '[' => 'inline_array',
62 ']' => 'inline_array_close',
63 '{' => 'inline_table',
64 '}' => 'inline_table_close',
71 # More complex matches with regexps
72 while ($self->{position} < $self->{last_position} && !defined($type)) {
73 my $prev = $self->prev_token_type;
74 my $newline = !!($prev eq 'EOL' || $prev eq 'table' || $prev eq 'array_table');
76 for ($self->{source}) {
77 /\G$WS+/gc; # ignore whitespace
78 /\G$Comment$/mgc && next; # ignore comments
90 $value = $self->tokenize_key($1);
93 when (/$array_table/gc) {
94 $type = 'array_table';
95 $value = $self->tokenize_key($1);
99 when (/\G ( [\[\]{}=,] | true | false )/xgc) {
101 $type = $simple->{$value};
104 when (/$key_set/gc) {
109 when (/\G($String)/gc) {
114 when (/\G($DateTime)/gc) {
119 when (/\G($Float)/gc) {
124 when (/\G($Integer)/gc) {
130 my $substr = substr($self->{source}, $self->{position}, 30) // 'undef';
131 die "toml syntax error on line $self->{line}\n\t-->|$substr|\n";
137 line => $self->{line},
140 value => $self->can("tokenize_$type") ? $self->can("tokenize_$type")->($self, $value) : $value,
143 $self->push_token($token);
146 $self->update_position;
154 @_ && push @{$self->{tokens}}, @_;
159 pop @{$self->{tokens}};
164 my $rest = substr $self->{source}, $self->{position};
165 my $stop = index $rest, "\n";
166 substr $rest, 0, $stop;
169 sub update_position {
171 $self->{position} = pos($self->{source}) // 0;
177 my $msg = shift // 'unknown';
178 my $line = $token ? $token->{line} : $self->{line};
179 die "toml: parse error at line $line: $msg\n";
187 while ($toml =~ s/^ ($SimpleKey) [.]?//x) {
204 sub tokenize_integer {
209 sub tokenize_string {
212 my $ml = $toml =~ /^(?:''')|(?:""")/;
213 my $lit = $toml =~ /^'/;
217 $str = substr $toml, 3, length($toml) - 6;
218 my @newlines = $str =~ /($CRLF)/g;
219 $self->{line} += scalar @newlines;
220 $str =~ s/^$WS* $EOL//x; # trim leading whitespace
221 $str =~ s/\\$EOL\s*//xgs; # trim newlines from lines ending in backslash
223 $str = substr($toml, 1, length($toml) - 2);
227 $str = $self->unescape_str($str);
245 if (exists $esc->{$_[0]}) {
246 return $esc->{$_[0]};
249 my $hex = hex substr($_[0], 2);
251 if (charnames::viacode($hex)) {
259 state $re = qr/($Escape)/;
260 $_[1] =~ s|$re|unescape_chars($1) // $_[0]->error(undef, "invalid unicode escape: $1")|xge;