toml_parser/decoder/
string.rs

1use core::ops::RangeInclusive;
2
3use winnow::stream::ContainsToken as _;
4use winnow::stream::Offset as _;
5use winnow::stream::Stream as _;
6
7use crate::decoder::StringBuilder;
8use crate::lexer::APOSTROPHE;
9use crate::lexer::ML_BASIC_STRING_DELIM;
10use crate::lexer::ML_LITERAL_STRING_DELIM;
11use crate::lexer::QUOTATION_MARK;
12use crate::lexer::WSCHAR;
13use crate::ErrorSink;
14use crate::Expected;
15use crate::ParseError;
16use crate::Raw;
17use crate::Span;
18
19const ALLOCATION_ERROR: &str = "could not allocate for string";
20
21/// Parse literal string
22///
23/// ```abnf
24/// ;; Literal String
25///
26/// literal-string = apostrophe *literal-char apostrophe
27///
28/// apostrophe = %x27 ; ' apostrophe
29///
30/// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
31/// ```
32pub(crate) fn decode_literal_string<'i>(
33    raw: Raw<'i>,
34    output: &mut dyn StringBuilder<'i>,
35    error: &mut dyn ErrorSink,
36) {
37    const INVALID_STRING: &str = "invalid literal string";
38
39    output.clear();
40
41    let s = raw.as_str();
42    let s = if let Some(stripped) = s.strip_prefix(APOSTROPHE as char) {
43        stripped
44    } else {
45        error.report_error(
46            ParseError::new(INVALID_STRING)
47                .with_context(Span::new_unchecked(0, raw.len()))
48                .with_expected(&[Expected::Literal("'")])
49                .with_unexpected(Span::new_unchecked(0, 0)),
50        );
51        s
52    };
53    let s = if let Some(stripped) = s.strip_suffix(APOSTROPHE as char) {
54        stripped
55    } else {
56        error.report_error(
57            ParseError::new(INVALID_STRING)
58                .with_context(Span::new_unchecked(0, raw.len()))
59                .with_expected(&[Expected::Literal("'")])
60                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
61        );
62        s
63    };
64
65    for (i, b) in s.as_bytes().iter().enumerate() {
66        if !LITERAL_CHAR.contains_token(b) {
67            let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes());
68            error.report_error(
69                ParseError::new(INVALID_STRING)
70                    .with_context(Span::new_unchecked(0, raw.len()))
71                    .with_expected(&[Expected::Description("non-single-quote visible characters")])
72                    .with_unexpected(Span::new_unchecked(offset, offset)),
73            );
74        }
75    }
76
77    if !output.push_str(s) {
78        error.report_error(
79            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
80        );
81    }
82}
83
84/// ```abnf
85/// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
86/// ```
87const LITERAL_CHAR: (
88    u8,
89    RangeInclusive<u8>,
90    RangeInclusive<u8>,
91    RangeInclusive<u8>,
92) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
93
94/// ```abnf
95/// non-ascii = %x80-D7FF / %xE000-10FFFF
96/// ```
97/// - ASCII is 0xxxxxxx
98/// - First byte for UTF-8 is 11xxxxxx
99/// - Subsequent UTF-8 bytes are 10xxxxxx
100const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
101
102/// Parse multi-line literal string
103///
104/// ```abnf
105/// ;; Multiline Literal String
106///
107/// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
108///                     ml-literal-string-delim
109/// ml-literal-string-delim = 3apostrophe
110/// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
111///
112/// mll-content = literal-char / newline
113/// mll-quotes = 1*2apostrophe
114/// ```
115pub(crate) fn decode_ml_literal_string<'i>(
116    raw: Raw<'i>,
117    output: &mut dyn StringBuilder<'i>,
118    error: &mut dyn ErrorSink,
119) {
120    const INVALID_STRING: &str = "invalid multi-line literal string";
121    output.clear();
122
123    let s = raw.as_str();
124    let s = if let Some(stripped) = s.strip_prefix(ML_LITERAL_STRING_DELIM) {
125        stripped
126    } else {
127        error.report_error(
128            ParseError::new(INVALID_STRING)
129                .with_context(Span::new_unchecked(0, raw.len()))
130                .with_expected(&[Expected::Literal("'")])
131                .with_unexpected(Span::new_unchecked(0, 0)),
132        );
133        s
134    };
135    let s = strip_start_newline(s);
136    let s = if let Some(stripped) = s.strip_suffix(ML_LITERAL_STRING_DELIM) {
137        stripped
138    } else {
139        error.report_error(
140            ParseError::new(INVALID_STRING)
141                .with_context(Span::new_unchecked(0, raw.len()))
142                .with_expected(&[Expected::Literal("'")])
143                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
144        );
145        s.trim_end_matches('\'')
146    };
147
148    for (i, b) in s.as_bytes().iter().enumerate() {
149        if *b == b'\'' || *b == b'\n' {
150        } else if *b == b'\r' {
151            if s.as_bytes().get(i + 1) != Some(&b'\n') {
152                let offset = (&s.as_bytes()[i + 1..]).offset_from(&raw.as_bytes());
153                error.report_error(
154                    ParseError::new("carriage return must be followed by newline")
155                        .with_context(Span::new_unchecked(0, raw.len()))
156                        .with_expected(&[Expected::Literal("\n")])
157                        .with_unexpected(Span::new_unchecked(offset, offset)),
158                );
159            }
160        } else if !LITERAL_CHAR.contains_token(b) {
161            let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes());
162            error.report_error(
163                ParseError::new(INVALID_STRING)
164                    .with_context(Span::new_unchecked(0, raw.len()))
165                    .with_expected(&[Expected::Description("non-single-quote characters")])
166                    .with_unexpected(Span::new_unchecked(offset, offset)),
167            );
168        }
169    }
170
171    if !output.push_str(s) {
172        error.report_error(
173            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
174        );
175    }
176}
177
178/// Parse basic string
179///
180/// ```abnf
181/// ;; Basic String
182///
183/// basic-string = quotation-mark *basic-char quotation-mark
184///
185/// basic-char = basic-unescaped / escaped
186///
187/// escaped = escape escape-seq-char
188/// ```
189pub(crate) fn decode_basic_string<'i>(
190    raw: Raw<'i>,
191    output: &mut dyn StringBuilder<'i>,
192    error: &mut dyn ErrorSink,
193) {
194    const INVALID_STRING: &str = "invalid basic string";
195    output.clear();
196
197    let s = raw.as_str();
198    let s = if let Some(stripped) = s.strip_prefix(QUOTATION_MARK as char) {
199        stripped
200    } else {
201        error.report_error(
202            ParseError::new(INVALID_STRING)
203                .with_context(Span::new_unchecked(0, raw.len()))
204                .with_expected(&[Expected::Literal("\"")])
205                .with_unexpected(Span::new_unchecked(0, 0)),
206        );
207        s
208    };
209    let mut s = if let Some(stripped) = s.strip_suffix(QUOTATION_MARK as char) {
210        stripped
211    } else {
212        error.report_error(
213            ParseError::new(INVALID_STRING)
214                .with_context(Span::new_unchecked(0, raw.len()))
215                .with_expected(&[Expected::Literal("\"")])
216                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
217        );
218        s
219    };
220
221    let segment = basic_unescaped(&mut s);
222    if !output.push_str(segment) {
223        error.report_error(
224            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
225        );
226    }
227    while !s.is_empty() {
228        if s.starts_with("\\") {
229            let _ = s.next_token();
230
231            let c = escape_seq_char(&mut s, raw, error);
232            if !output.push_char(c) {
233                error.report_error(
234                    ParseError::new(ALLOCATION_ERROR)
235                        .with_unexpected(Span::new_unchecked(0, raw.len())),
236                );
237            }
238        } else {
239            let invalid = basic_invalid(&mut s);
240            let start = invalid.offset_from(&raw.as_str());
241            let end = start + invalid.len();
242            error.report_error(
243                ParseError::new(INVALID_STRING)
244                    .with_context(Span::new_unchecked(0, raw.len()))
245                    .with_expected(&[
246                        Expected::Description("non-double-quote visible characters"),
247                        Expected::Literal("\\"),
248                    ])
249                    .with_unexpected(Span::new_unchecked(start, end)),
250            );
251            let _ = output.push_str(invalid);
252        }
253
254        let segment = basic_unescaped(&mut s);
255        if !output.push_str(segment) {
256            let start = segment.offset_from(&raw.as_str());
257            let end = start + segment.len();
258            error.report_error(
259                ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)),
260            );
261        }
262    }
263}
264
265/// ```abnf
266/// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
267/// ```
268fn basic_unescaped<'i>(stream: &mut &'i str) -> &'i str {
269    let offset = stream
270        .as_bytes()
271        .offset_for(|b| !BASIC_UNESCAPED.contains_token(b))
272        .unwrap_or(stream.len());
273    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
274    unsafe {
275        stream.next_slice_unchecked(offset)
276    }
277    #[cfg(not(feature = "unsafe"))]
278    stream.next_slice(offset)
279}
280
281fn basic_invalid<'i>(stream: &mut &'i str) -> &'i str {
282    let offset = stream
283        .as_bytes()
284        .offset_for(|b| (BASIC_UNESCAPED, ESCAPE).contains_token(b))
285        .unwrap_or(stream.len());
286    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
287    unsafe {
288        stream.next_slice_unchecked(offset)
289    }
290    #[cfg(not(feature = "unsafe"))]
291    stream.next_slice(offset)
292}
293
294/// ```abnf
295/// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
296/// ```
297#[allow(clippy::type_complexity)]
298const BASIC_UNESCAPED: (
299    (u8, u8),
300    u8,
301    RangeInclusive<u8>,
302    RangeInclusive<u8>,
303    RangeInclusive<u8>,
304) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
305
306/// ```abnf
307/// escape = %x5C                    ; \
308/// ```
309const ESCAPE: u8 = b'\\';
310
311/// ```abnf
312/// escape-seq-char =  %x22         ; "    quotation mark  U+0022
313/// escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
314/// escape-seq-char =/ %x62         ; b    backspace       U+0008
315/// escape-seq-char =/ %x65         ; e    escape          U+001B
316/// escape-seq-char =/ %x66         ; f    form feed       U+000C
317/// escape-seq-char =/ %x6E         ; n    line feed       U+000A
318/// escape-seq-char =/ %x72         ; r    carriage return U+000D
319/// escape-seq-char =/ %x74         ; t    tab             U+0009
320/// escape-seq-char =/ %x78 2HEXDIG ; xHH                  U+00HH
321/// escape-seq-char =/ %x75 4HEXDIG ; uHHHH                U+HHHH
322/// escape-seq-char =/ %x55 8HEXDIG ; UHHHHHHHH            U+HHHHHHHH
323/// ```
324fn escape_seq_char(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) -> char {
325    const EXPECTED_ESCAPES: &[Expected] = &[
326        Expected::Literal("b"),
327        Expected::Literal("e"),
328        Expected::Literal("f"),
329        Expected::Literal("n"),
330        Expected::Literal("r"),
331        Expected::Literal("\\"),
332        Expected::Literal("\""),
333        Expected::Literal("x"),
334        Expected::Literal("u"),
335        Expected::Literal("U"),
336    ];
337
338    let start = stream.checkpoint();
339    let Some(id) = stream.next_token() else {
340        let offset = stream.offset_from(&raw.as_str());
341        error.report_error(
342            ParseError::new("missing escaped value")
343                .with_context(Span::new_unchecked(0, raw.len()))
344                .with_expected(EXPECTED_ESCAPES)
345                .with_unexpected(Span::new_unchecked(offset, offset)),
346        );
347        return '\\';
348    };
349    match id {
350        'b' => '\u{8}',
351        'e' => '\u{1b}',
352        'f' => '\u{c}',
353        'n' => '\n',
354        'r' => '\r',
355        't' => '\t',
356        'x' => hexescape(stream, 2, raw, error),
357        'u' => hexescape(stream, 4, raw, error),
358        'U' => hexescape(stream, 8, raw, error),
359        '\\' => '\\',
360        '"' => '"',
361        _ => {
362            stream.reset(&start);
363            let offset = stream.offset_from(&raw.as_str());
364            error.report_error(
365                ParseError::new("missing escaped value")
366                    .with_context(Span::new_unchecked(0, raw.len()))
367                    .with_expected(EXPECTED_ESCAPES)
368                    .with_unexpected(Span::new_unchecked(offset, offset)),
369            );
370            '\\'
371        }
372    }
373}
374
375fn hexescape(
376    stream: &mut &str,
377    num_digits: usize,
378    raw: Raw<'_>,
379    error: &mut dyn ErrorSink,
380) -> char {
381    let offset = stream
382        .as_bytes()
383        .offset_for(|b| !HEXDIG.contains_token(b))
384        .unwrap_or_else(|| stream.eof_offset())
385        .min(num_digits);
386    #[cfg(feature = "unsafe")] // SAFETY: HEXDIG ensure `offset` is along UTF-8 boundary
387    let value = unsafe { stream.next_slice_unchecked(offset) };
388    #[cfg(not(feature = "unsafe"))]
389    let value = stream.next_slice(offset);
390
391    if value.len() != num_digits {
392        let offset = stream.offset_from(&raw.as_str());
393        error.report_error(
394            ParseError::new("too few unicode value digits")
395                .with_context(Span::new_unchecked(0, raw.len()))
396                .with_expected(&[Expected::Description("unicode hexadecimal value")])
397                .with_unexpected(Span::new_unchecked(offset, offset)),
398        );
399        return '�';
400    }
401
402    let Some(value) = u32::from_str_radix(value, 16).ok().and_then(char::from_u32) else {
403        let offset = value.offset_from(&raw.as_str());
404        error.report_error(
405            ParseError::new("invalid value")
406                .with_context(Span::new_unchecked(0, raw.len()))
407                .with_expected(&[Expected::Description("unicode hexadecimal value")])
408                .with_unexpected(Span::new_unchecked(offset, offset)),
409        );
410        return '�';
411    };
412
413    value
414}
415
416/// ```abnf
417/// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
418/// ```
419const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) =
420    (DIGIT, b'A'..=b'F', b'a'..=b'f');
421
422/// ```abnf
423/// DIGIT = %x30-39 ; 0-9
424/// ```
425const DIGIT: RangeInclusive<u8> = b'0'..=b'9';
426
427fn strip_start_newline(s: &str) -> &str {
428    s.strip_prefix('\n')
429        .or_else(|| s.strip_prefix("\r\n"))
430        .unwrap_or(s)
431}
432
433/// Parse multi-line basic string
434///
435/// ```abnf
436/// ;; Multiline Basic String
437///
438/// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
439///                   ml-basic-string-delim
440/// ml-basic-string-delim = 3quotation-mark
441/// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
442///
443/// mlb-content = basic-char / newline / mlb-escaped-nl
444/// mlb-quotes = 1*2quotation-mark
445/// ```
446pub(crate) fn decode_ml_basic_string<'i>(
447    raw: Raw<'i>,
448    output: &mut dyn StringBuilder<'i>,
449    error: &mut dyn ErrorSink,
450) {
451    const INVALID_STRING: &str = "invalid multi-line basic string";
452
453    let s = raw.as_str();
454    let s = if let Some(stripped) = s.strip_prefix(ML_BASIC_STRING_DELIM) {
455        stripped
456    } else {
457        error.report_error(
458            ParseError::new(INVALID_STRING)
459                .with_context(Span::new_unchecked(0, raw.len()))
460                .with_expected(&[Expected::Literal("\"")])
461                .with_unexpected(Span::new_unchecked(0, 0)),
462        );
463        s
464    };
465    let s = strip_start_newline(s);
466    let mut s = if let Some(stripped) = s.strip_suffix(ML_BASIC_STRING_DELIM) {
467        stripped
468    } else {
469        error.report_error(
470            ParseError::new(INVALID_STRING)
471                .with_context(Span::new_unchecked(0, raw.len()))
472                .with_expected(&[Expected::Literal("\"")])
473                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
474        );
475        s
476    };
477
478    let segment = mlb_unescaped(&mut s);
479    if !output.push_str(segment) {
480        error.report_error(
481            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
482        );
483    }
484    while !s.is_empty() {
485        if s.starts_with("\\") {
486            let _ = s.next_token();
487
488            if s.as_bytes()
489                .first()
490                .map(|b| (WSCHAR, b'\r', b'\n').contains_token(b))
491                .unwrap_or(false)
492            {
493                mlb_escaped_nl(&mut s, raw, error);
494            } else {
495                let c = escape_seq_char(&mut s, raw, error);
496                if !output.push_char(c) {
497                    error.report_error(
498                        ParseError::new(ALLOCATION_ERROR)
499                            .with_unexpected(Span::new_unchecked(0, raw.len())),
500                    );
501                }
502            }
503        } else if s.starts_with("\r") {
504            let offset = if s.starts_with("\r\n") {
505                "\r\n".len()
506            } else {
507                let start = s.offset_from(&raw.as_str()) + 1;
508                error.report_error(
509                    ParseError::new("carriage return must be followed by newline")
510                        .with_context(Span::new_unchecked(0, raw.len()))
511                        .with_expected(&[Expected::Literal("\n")])
512                        .with_unexpected(Span::new_unchecked(start, start)),
513                );
514                "\r".len()
515            };
516            #[cfg(feature = "unsafe")]
517            // SAFETY: Newlines ensure `offset` is along UTF-8 boundary
518            let newline = unsafe { s.next_slice_unchecked(offset) };
519            #[cfg(not(feature = "unsafe"))]
520            let newline = s.next_slice(offset);
521            if !output.push_str(newline) {
522                let start = newline.offset_from(&raw.as_str());
523                let end = start + newline.len();
524                error.report_error(
525                    ParseError::new(ALLOCATION_ERROR)
526                        .with_unexpected(Span::new_unchecked(start, end)),
527                );
528            }
529        } else {
530            let invalid = mlb_invalid(&mut s);
531            let start = invalid.offset_from(&raw.as_str());
532            let end = start + invalid.len();
533            error.report_error(
534                ParseError::new(INVALID_STRING)
535                    .with_context(Span::new_unchecked(0, raw.len()))
536                    .with_expected(&[Expected::Literal("\\"), Expected::Description("characters")])
537                    .with_unexpected(Span::new_unchecked(start, end)),
538            );
539            let _ = output.push_str(invalid);
540        }
541
542        let segment = mlb_unescaped(&mut s);
543        if !output.push_str(segment) {
544            let start = segment.offset_from(&raw.as_str());
545            let end = start + segment.len();
546            error.report_error(
547                ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)),
548            );
549        }
550    }
551}
552
553/// ```abnf
554/// mlb-escaped-nl = escape ws newline *( wschar / newline )
555/// ```
556fn mlb_escaped_nl(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) {
557    const INVALID_STRING: &str = "invalid multi-line basic string";
558    let ws_offset = stream
559        .as_bytes()
560        .offset_for(|b| !WSCHAR.contains_token(b))
561        .unwrap_or(stream.len());
562    #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary
563    unsafe {
564        stream.next_slice_unchecked(ws_offset);
565    }
566    #[cfg(not(feature = "unsafe"))]
567    stream.next_slice(ws_offset);
568
569    let start = stream.checkpoint();
570    match stream.next_token() {
571        Some('\n') => {}
572        Some('\r') => {
573            if stream.as_bytes().first() == Some(&b'\n') {
574                let _ = stream.next_token();
575            } else {
576                let start = stream.offset_from(&raw.as_str());
577                let end = start;
578                error.report_error(
579                    ParseError::new("carriage return must be followed by newline")
580                        .with_context(Span::new_unchecked(0, raw.len()))
581                        .with_expected(&[Expected::Literal("\n")])
582                        .with_unexpected(Span::new_unchecked(start, end)),
583                );
584            }
585        }
586        _ => {
587            stream.reset(&start);
588
589            let start = stream.offset_from(&raw.as_str());
590            let end = start;
591            error.report_error(
592                ParseError::new(INVALID_STRING)
593                    .with_context(Span::new_unchecked(0, raw.len()))
594                    .with_expected(&[Expected::Literal("\n")])
595                    .with_unexpected(Span::new_unchecked(start, end)),
596            );
597        }
598    }
599
600    loop {
601        let start_offset = stream.offset_from(&raw.as_str());
602
603        let offset = stream
604            .as_bytes()
605            .offset_for(|b| !(WSCHAR, b'\n').contains_token(b))
606            .unwrap_or(stream.len());
607        #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary
608        unsafe {
609            stream.next_slice_unchecked(offset);
610        }
611        #[cfg(not(feature = "unsafe"))]
612        stream.next_slice(offset);
613
614        if stream.starts_with("\r") {
615            let offset = if stream.starts_with("\r\n") {
616                "\r\n".len()
617            } else {
618                let start = stream.offset_from(&raw.as_str()) + 1;
619                error.report_error(
620                    ParseError::new("carriage return must be followed by newline")
621                        .with_context(Span::new_unchecked(0, raw.len()))
622                        .with_expected(&[Expected::Literal("\n")])
623                        .with_unexpected(Span::new_unchecked(start, start)),
624                );
625                "\r".len()
626            };
627            #[cfg(feature = "unsafe")]
628            // SAFETY: Newlines ensure `offset` is along UTF-8 boundary
629            let _ = unsafe { stream.next_slice_unchecked(offset) };
630            #[cfg(not(feature = "unsafe"))]
631            let _ = stream.next_slice(offset);
632        }
633
634        let end_offset = stream.offset_from(&raw.as_str());
635        if start_offset == end_offset {
636            break;
637        }
638    }
639}
640
641/// `mlb-unescaped` extended with `mlb-quotes` and `LF`
642///
643/// This is a specialization of [`basic_unescaped`] to help with multi-line basic strings
644///
645/// **warning:** `newline` is not validated
646///
647/// ```abnf
648/// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
649///
650/// mlb-content = basic-cha / newline / mlb-escaped-nl
651/// mlb-quotes = 1*2quotation-mark
652/// ```
653fn mlb_unescaped<'i>(stream: &mut &'i str) -> &'i str {
654    let offset = stream
655        .as_bytes()
656        .offset_for(|b| !(BASIC_UNESCAPED, b'"', b'\n').contains_token(b))
657        .unwrap_or(stream.len());
658    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
659    unsafe {
660        stream.next_slice_unchecked(offset)
661    }
662    #[cfg(not(feature = "unsafe"))]
663    stream.next_slice(offset)
664}
665
666fn mlb_invalid<'i>(stream: &mut &'i str) -> &'i str {
667    let offset = stream
668        .as_bytes()
669        .offset_for(|b| (BASIC_UNESCAPED, b'"', b'\n', ESCAPE, '\r').contains_token(b))
670        .unwrap_or(stream.len());
671    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
672    unsafe {
673        stream.next_slice_unchecked(offset)
674    }
675    #[cfg(not(feature = "unsafe"))]
676    stream.next_slice(offset)
677}
678
679/// Parse unquoted key
680///
681/// ```abnf
682/// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
683/// ```
684pub(crate) fn decode_unquoted_key<'i>(
685    raw: Raw<'i>,
686    output: &mut dyn StringBuilder<'i>,
687    error: &mut dyn ErrorSink,
688) {
689    let s = raw.as_str();
690
691    if s.is_empty() {
692        error.report_error(
693            ParseError::new("unquoted keys cannot be empty")
694                .with_context(Span::new_unchecked(0, s.len()))
695                .with_expected(&[
696                    Expected::Description("letters"),
697                    Expected::Description("numbers"),
698                    Expected::Literal("-"),
699                    Expected::Literal("_"),
700                ])
701                .with_unexpected(Span::new_unchecked(0, s.len())),
702        );
703    }
704
705    for (i, b) in s.as_bytes().iter().enumerate() {
706        if !UNQUOTED_CHAR.contains_token(b) {
707            error.report_error(
708                ParseError::new("invalid unquoted key")
709                    .with_context(Span::new_unchecked(0, s.len()))
710                    .with_expected(&[
711                        Expected::Description("letters"),
712                        Expected::Description("numbers"),
713                        Expected::Literal("-"),
714                        Expected::Literal("_"),
715                    ])
716                    .with_unexpected(Span::new_unchecked(i, i)),
717            );
718        }
719    }
720
721    if !output.push_str(s) {
722        error.report_error(
723            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
724        );
725    }
726}
727
728/// ```abnf
729/// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
730/// ```
731const UNQUOTED_CHAR: (
732    RangeInclusive<u8>,
733    RangeInclusive<u8>,
734    RangeInclusive<u8>,
735    u8,
736    u8,
737) = (b'A'..=b'Z', b'a'..=b'z', b'0'..=b'9', b'-', b'_');
738
739#[cfg(test)]
740#[cfg(feature = "std")]
741mod test {
742    use super::*;
743    use crate::decoder::Encoding;
744
745    use alloc::borrow::Cow;
746
747    use snapbox::assert_data_eq;
748    use snapbox::prelude::*;
749    use snapbox::str;
750
751    #[test]
752    fn literal_string() {
753        let cases = [
754            (
755                r"'C:\Users\nodejs\templates'",
756                str![[r#"C:\Users\nodejs\templates"#]].raw(),
757                str![[r#"
758[]
759
760"#]]
761                .raw(),
762            ),
763            (
764                r"'\\ServerX\admin$\system32\'",
765                str![[r#"\\ServerX\admin$\system32\"#]].raw(),
766                str![[r#"
767[]
768
769"#]]
770                .raw(),
771            ),
772            (
773                r#"'Tom "Dubs" Preston-Werner'"#,
774                str![[r#"Tom "Dubs" Preston-Werner"#]].raw(),
775                str![[r#"
776[]
777
778"#]]
779                .raw(),
780            ),
781            (
782                r"'<\i\c*\s*>'",
783                str![[r#"<\i\c*\s*>"#]].raw(),
784                str![[r#"
785[]
786
787"#]]
788                .raw(),
789            ),
790        ];
791        for (input, expected, expected_error) in cases {
792            let mut error = Vec::new();
793            let mut actual = Cow::Borrowed("");
794            decode_literal_string(
795                Raw::new_unchecked(input, Some(Encoding::LiteralString), Default::default()),
796                &mut actual,
797                &mut error,
798            );
799            assert_data_eq!(actual.as_ref(), expected);
800            assert_data_eq!(error.to_debug(), expected_error);
801        }
802    }
803
804    #[test]
805    fn ml_literal_string() {
806        let cases = [
807            (
808                r"'''I [dw]on't need \d{2} apples'''",
809                str![[r#"I [dw]on't need \d{2} apples"#]].raw(),
810                str![[r#"
811[]
812
813"#]]
814                .raw(),
815            ),
816            (
817                r#"''''one_quote''''"#,
818                str!["'one_quote'"].raw(),
819                str![[r#"
820[]
821
822"#]]
823                .raw(),
824            ),
825            (
826                r#"'''
827The first newline is
828trimmed in raw strings.
829   All other whitespace
830   is preserved.
831'''"#,
832                str![[r#"
833The first newline is
834trimmed in raw strings.
835   All other whitespace
836   is preserved.
837
838"#]]
839                .raw(),
840                str![[r#"
841[]
842
843"#]]
844                .raw(),
845            ),
846        ];
847        for (input, expected, expected_error) in cases {
848            let mut error = Vec::new();
849            let mut actual = Cow::Borrowed("");
850            decode_ml_literal_string(
851                Raw::new_unchecked(input, Some(Encoding::MlLiteralString), Default::default()),
852                &mut actual,
853                &mut error,
854            );
855            assert_data_eq!(actual.as_ref(), expected);
856            assert_data_eq!(error.to_debug(), expected_error);
857        }
858    }
859
860    #[test]
861    fn basic_string() {
862        let cases = [
863            (
864                r#""""#,
865                str![""].raw(),
866                str![[r#"
867[]
868
869"#]]
870                .raw(),
871            ),
872            (
873                r#""content\"trailing""#,
874                str![[r#"content"trailing"#]].raw(),
875                str![[r#"
876[]
877
878"#]]
879                .raw(),
880            ),
881            (
882                r#""content\""#,
883                str![[r#"content\"#]].raw(),
884                str![[r#"
885[
886    ParseError {
887        context: Some(
888            0..10,
889        ),
890        description: "missing escaped value",
891        expected: Some(
892            [
893                Literal(
894                    "b",
895                ),
896                Literal(
897                    "e",
898                ),
899                Literal(
900                    "f",
901                ),
902                Literal(
903                    "n",
904                ),
905                Literal(
906                    "r",
907                ),
908                Literal(
909                    "\\",
910                ),
911                Literal(
912                    "\"",
913                ),
914                Literal(
915                    "x",
916                ),
917                Literal(
918                    "u",
919                ),
920                Literal(
921                    "U",
922                ),
923            ],
924        ),
925        unexpected: Some(
926            9..9,
927        ),
928    },
929]
930
931"#]]
932                .raw(),
933            ),
934            (
935                r#""content
936trailing""#,
937                str![[r#"
938content
939trailing
940"#]]
941                .raw(),
942                str![[r#"
943[
944    ParseError {
945        context: Some(
946            0..18,
947        ),
948        description: "invalid basic string",
949        expected: Some(
950            [
951                Description(
952                    "non-double-quote visible characters",
953                ),
954                Literal(
955                    "\\",
956                ),
957            ],
958        ),
959        unexpected: Some(
960            8..9,
961        ),
962    },
963]
964
965"#]]
966                .raw(),
967            ),
968            (
969                r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#,
970                str![[r#"
971I'm a string. "You can quote me". Name	José
972Location	SF. 𠜎
973"#]]
974                .raw(),
975                str![[r#"
976[]
977
978"#]]
979                .raw(),
980            ),
981        ];
982        for (input, expected, expected_error) in cases {
983            let mut error = Vec::new();
984            let mut actual = Cow::Borrowed("");
985            decode_basic_string(
986                Raw::new_unchecked(input, Some(Encoding::BasicString), Default::default()),
987                &mut actual,
988                &mut error,
989            );
990            assert_data_eq!(actual.as_ref(), expected);
991            assert_data_eq!(error.to_debug(), expected_error);
992        }
993    }
994
995    #[test]
996    fn ml_basic_string() {
997        let cases = [
998            (
999                r#""""
1000Roses are red
1001Violets are blue""""#,
1002                str![[r#"
1003Roses are red
1004Violets are blue
1005"#]]
1006                .raw(),
1007                str![[r#"
1008[]
1009
1010"#]]
1011                .raw(),
1012            ),
1013            (
1014                r#"""" \""" """"#,
1015                str![[r#" """ "#]].raw(),
1016                str![[r#"
1017[]
1018
1019"#]]
1020                .raw(),
1021            ),
1022            (
1023                r#"""" \\""""#,
1024                str![[r#" \"#]].raw(),
1025                str![[r#"
1026[]
1027
1028"#]]
1029                .raw(),
1030            ),
1031            (
1032                r#""""
1033The quick brown \
1034
1035
1036  fox jumps over \
1037    the lazy dog.""""#,
1038                str!["The quick brown fox jumps over the lazy dog."].raw(),
1039                str![[r#"
1040[]
1041
1042"#]]
1043                .raw(),
1044            ),
1045            (
1046                r#""""\
1047       The quick brown \
1048       fox jumps over \
1049       the lazy dog.\
1050       """"#,
1051                str!["The quick brown fox jumps over the lazy dog."].raw(),
1052                str![[r#"
1053[]
1054
1055"#]]
1056                .raw(),
1057            ),
1058            (
1059                r#""""\
1060       """"#,
1061                str![""].raw(),
1062                str![[r#"
1063[]
1064
1065"#]]
1066                .raw(),
1067            ),
1068            (
1069                r#""""
1070\
1071  \
1072""""#,
1073                str![""].raw(),
1074                str![[r#"
1075[]
1076
1077"#]]
1078                .raw(),
1079            ),
1080            (
1081                r#""""  """#,
1082                str![[r#"  """#]].raw(),
1083                str![[r#"
1084[
1085    ParseError {
1086        context: Some(
1087            0..7,
1088        ),
1089        description: "invalid multi-line basic string",
1090        expected: Some(
1091            [
1092                Literal(
1093                    "\"",
1094                ),
1095            ],
1096        ),
1097        unexpected: Some(
1098            7..7,
1099        ),
1100    },
1101]
1102
1103"#]]
1104                .raw(),
1105            ),
1106            (
1107                r#""""  \""""#,
1108                str![[r#"  \"#]].raw(),
1109                str![[r#"
1110[
1111    ParseError {
1112        context: Some(
1113            0..9,
1114        ),
1115        description: "missing escaped value",
1116        expected: Some(
1117            [
1118                Literal(
1119                    "b",
1120                ),
1121                Literal(
1122                    "e",
1123                ),
1124                Literal(
1125                    "f",
1126                ),
1127                Literal(
1128                    "n",
1129                ),
1130                Literal(
1131                    "r",
1132                ),
1133                Literal(
1134                    "\\",
1135                ),
1136                Literal(
1137                    "\"",
1138                ),
1139                Literal(
1140                    "x",
1141                ),
1142                Literal(
1143                    "u",
1144                ),
1145                Literal(
1146                    "U",
1147                ),
1148            ],
1149        ),
1150        unexpected: Some(
1151            6..6,
1152        ),
1153    },
1154]
1155
1156"#]]
1157                .raw(),
1158            ),
1159        ];
1160        for (input, expected, expected_error) in cases {
1161            let mut error = Vec::new();
1162            let mut actual = Cow::Borrowed("");
1163            decode_ml_basic_string(
1164                Raw::new_unchecked(input, Some(Encoding::MlBasicString), Default::default()),
1165                &mut actual,
1166                &mut error,
1167            );
1168            assert_data_eq!(actual.as_ref(), expected);
1169            assert_data_eq!(error.to_debug(), expected_error);
1170        }
1171    }
1172
1173    #[test]
1174    fn unquoted_keys() {
1175        let cases = [
1176            (
1177                "a",
1178                str!["a"].raw(),
1179                str![[r#"
1180[]
1181
1182"#]]
1183                .raw(),
1184            ),
1185            (
1186                "hello",
1187                str!["hello"].raw(),
1188                str![[r#"
1189[]
1190
1191"#]]
1192                .raw(),
1193            ),
1194            (
1195                "-",
1196                str!["-"].raw(),
1197                str![[r#"
1198[]
1199
1200"#]]
1201                .raw(),
1202            ),
1203            (
1204                "_",
1205                str!["_"].raw(),
1206                str![[r#"
1207[]
1208
1209"#]]
1210                .raw(),
1211            ),
1212            (
1213                "-hello-world-",
1214                str!["-hello-world-"].raw(),
1215                str![[r#"
1216[]
1217
1218"#]]
1219                .raw(),
1220            ),
1221            (
1222                "_hello_world_",
1223                str!["_hello_world_"].raw(),
1224                str![[r#"
1225[]
1226
1227"#]]
1228                .raw(),
1229            ),
1230            (
1231                "",
1232                str![""].raw(),
1233                str![[r#"
1234[
1235    ParseError {
1236        context: Some(
1237            0..0,
1238        ),
1239        description: "unquoted keys cannot be empty",
1240        expected: Some(
1241            [
1242                Description(
1243                    "letters",
1244                ),
1245                Description(
1246                    "numbers",
1247                ),
1248                Literal(
1249                    "-",
1250                ),
1251                Literal(
1252                    "_",
1253                ),
1254            ],
1255        ),
1256        unexpected: Some(
1257            0..0,
1258        ),
1259    },
1260]
1261
1262"#]]
1263                .raw(),
1264            ),
1265        ];
1266
1267        for (input, expected, expected_error) in cases {
1268            let mut error = Vec::new();
1269            let mut actual = Cow::Borrowed("");
1270            decode_unquoted_key(
1271                Raw::new_unchecked(input, None, Default::default()),
1272                &mut actual,
1273                &mut error,
1274            );
1275            assert_data_eq!(actual.as_ref(), expected);
1276            assert_data_eq!(error.to_debug(), expected_error);
1277        }
1278    }
1279}