1 package TOML::Tiny::Tokenizer;
2 # ABSTRACT: tokenizer used by TOML::Tiny
6 no warnings qw(experimental);
7 use charnames qw(:full);
10 use TOML::Tiny::Grammar;
13 my ($class, %param) = @_;
16 source => $param{source},
17 last_position => length $param{source},
29 return unless defined $self->{source}
30 && $self->{position} < $self->{last_position};
32 if (!$self->{last_token}) {
33 return $self->{last_token} = {type => 'table', pos => 0, line => 1, value => []};
36 # Update the regex engine's position marker in case some other regex
37 # attempted to match against the source string and reset it.
38 pos($self->{source}) = $self->{position};
44 state $key_set = qr/\G ($Key) $WS* (?= =)/x;
45 state $table = qr/\G \[ $WS* ($Key) $WS* \] $WS* (?:$EOL | $)/x;
46 state $array_table = qr/\G \[\[ $WS* ($Key) $WS* \]\] $WS* (?:$EOL | $)/x;
49 '[' => 'inline_array',
50 ']' => 'inline_array_close',
51 '{' => 'inline_table',
52 '}' => 'inline_table_close',
59 # More complex matches with regexps
60 while ($self->{position} < $self->{last_position} && !defined($type)) {
61 my $prev = $self->{last_token} ? $self->{last_token}{type} : 'EOL';
62 my $newline = !!($prev eq 'EOL' || $prev eq 'table' || $prev eq 'array_table');
64 for ($self->{source}) {
65 /\G$WS+/gc; # ignore whitespace
66 /\G$Comment$/mgc && next; # ignore comments
78 $value = $self->tokenize_key($1);
81 when (/$array_table/gc) {
82 $type = 'array_table';
83 $value = $self->tokenize_key($1);
87 when (/\G ( [\[\]{}=,] | true | false )/xgc) {
89 $type = $simple->{$value};
97 when (/\G($String)/gc) {
102 when (/\G($DateTime)/gc) {
107 when (/\G($Float)/gc) {
112 when (/\G($Integer)/gc) {
118 my $substr = substr($self->{source}, $self->{position}, 30) // 'undef';
119 die "toml syntax error on line $self->{line}\n\t-->|$substr|\n";
124 $token = $self->{last_token} = {
125 line => $self->{line},
128 value => $self->can("tokenize_$type") ? $self->can("tokenize_$type")->($self, $value) : $value,
132 $self->update_position;
140 my $rest = substr $self->{source}, $self->{position};
141 my $stop = index $rest, "\n";
142 substr $rest, 0, $stop;
145 sub update_position {
147 $self->{position} = pos($self->{source}) // 0;
153 my $msg = shift // 'unknown';
154 my $line = $token ? $token->{line} : $self->{line};
155 die "toml: parse error at line $line: $msg\n";
163 while ($toml =~ s/^ ($SimpleKey) [.]?//x) {
180 sub tokenize_integer {
185 sub tokenize_string {
188 my $ml = $toml =~ /^(?:''')|(?:""")/;
189 my $lit = $toml =~ /^'/;
193 $str = substr $toml, 3, length($toml) - 6;
194 my @newlines = $str =~ /($CRLF)/g;
195 $self->{line} += scalar @newlines;
196 $str =~ s/^$WS* $EOL//x; # trim leading whitespace
197 $str =~ s/\\$EOL\s*//xgs; # trim newlines from lines ending in backslash
199 $str = substr($toml, 1, length($toml) - 2);
203 $str = $self->unescape_str($str);
221 if (exists $esc->{$_[0]}) {
222 return $esc->{$_[0]};
225 my $hex = hex substr($_[0], 2);
227 if (charnames::viacode($hex)) {
235 state $re = qr/($Escape)/;
236 $_[1] =~ s|$re|unescape_chars($1) // $_[0]->error(undef, "invalid unicode escape: $1")|xge;