1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The
25 * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST
26 * control sequences and generic escape sequences.
27 * The parser itself does not perform any actions but lets the caller react to
35 #include "term-internal.h"
39 * term_utf8_encode() - Encode single UCS-4 character as UTF-8
40 * @out_utf8: output buffer of at least 4 bytes or NULL
41 * @g: UCS-4 character to encode
43 * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
44 * The length of the character is returned. It is not zero-terminated! If the
45 * output buffer is NULL, only the length is returned.
47 * Returns: The length in bytes that the UTF-8 representation does or would
50 size_t term_utf8_encode(char *out_utf8, uint32_t g) {
53 out_utf8[0] = g & 0x7f;
55 } else if (g < (1 << 11)) {
57 out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
58 out_utf8[1] = 0x80 | (g & 0x3f);
61 } else if (g < (1 << 16)) {
63 out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
64 out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
65 out_utf8[2] = 0x80 | (g & 0x3f);
68 } else if (g < (1 << 21)) {
70 out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
71 out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
72 out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
73 out_utf8[3] = 0x80 | (g & 0x3f);
82 * term_utf8_decode() - Try decoding the next UCS-4 character
83 * @p: decoder object to operate on or NULL
84 * @out_len: output storage for pointer to decoded UCS-4 string or NULL
85 * @c: next char to push into decoder
87 * This decodes a UTF-8 stream. It must be called for each input-byte of the
88 * UTF-8 stream and returns a UCS-4 stream. A pointer to the parsed UCS-4
89 * string is stored in @out_buf if non-NULL. The length of this string (number
90 * of parsed UCS4 characters) is returned as result. The string is not
91 * zero-terminated! Furthermore, the string is only valid until the next
92 * invocation of this function. It is also bound to the parser state @p and
93 * must not be freed nor written to by the caller.
95 * This function is highly optimized to work with terminal-emulators. Instead
96 * of being strict about UTF-8 validity, this tries to perform a fallback to
97 * ISO-8859-1 in case a wrong series was detected. Therefore, this function
98 * might return multiple UCS-4 characters by parsing just a single UTF-8 byte.
100 * The parser state @p should be allocated and managed by the caller. There're
101 * no helpers to do that for you. To initialize it, simply reset it to all
102 * zero. You can reset or free the object at any point in time.
104 * Returns: Number of parsed UCS4 characters
106 size_t term_utf8_decode(term_utf8 *p, uint32_t **out_buf, char c) {
107 static uint32_t ucs4_null = 0;
108 uint32_t t, *res = NULL;
117 if (!p->valid || p->i_bytes >= p->n_bytes) {
119 * If the previous sequence was invalid or fully parsed, start
120 * parsing a fresh new sequence.
123 if ((byte & 0xE0) == 0xC0) {
124 /* start of two byte sequence */
129 } else if ((byte & 0xF0) == 0xE0) {
130 /* start of three byte sequence */
135 } else if ((byte & 0xF8) == 0xF0) {
136 /* start of four byte sequence */
143 * - single ASCII 7-bit char
144 * - out-of-sync continuation byte
145 * - overlong encoding
146 * All of them are treated as single byte ISO-8859-1 */
154 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
157 * ..otherwise, try to continue the previous sequence..
160 if ((byte & 0xC0) == 0x80) {
162 * Valid continuation byte. Append to sequence and
163 * update the ucs4 cache accordingly.
167 p->chars[p->i_bytes++] = byte;
168 p->ucs4 |= t << (6 * (p->n_bytes - p->i_bytes));
171 * Invalid continuation? Treat cached sequence as
172 * ISO-8859-1, but parse the new char as valid new
173 * starting character. If it's a new single-byte UTF-8
174 * sequence, we immediately return it in the same run,
175 * otherwise, we might suffer from starvation.
178 if ((byte & 0xE0) == 0xC0 ||
179 (byte & 0xF0) == 0xE0 ||
180 (byte & 0xF8) == 0xF0) {
182 * New multi-byte sequence. Move to-be-returned
183 * data at the end and start new sequence. Only
184 * return the old sequence.
187 memmove(p->chars + 1,
189 sizeof(*p->chars) * p->i_bytes);
193 if ((byte & 0xE0) == 0xC0) {
194 /* start of two byte sequence */
199 } else if ((byte & 0xF0) == 0xE0) {
200 /* start of three byte sequence */
205 } else if ((byte & 0xF8) == 0xF0) {
206 /* start of four byte sequence */
212 assert_not_reached("Should not happen");
215 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
220 * New single byte sequence, append to output
221 * and return combined sequence.
224 p->chars[p->i_bytes++] = byte;
231 * Check whether a full sequence (valid or invalid) has been parsed and
232 * then return it. Otherwise, return nothing.
235 /* still parsing? then bail out */
236 if (p->i_bytes < p->n_bytes)
252 *out_buf = res ? : &ucs4_null;
258 * The ctl-seq parser "term_parser" only detects whole sequences, it does not
259 * detect the specific command. Once a sequence is parsed, the command-parsers
260 * are used to figure out their meaning. Note that this depends on whether we
261 * run on the host or terminal side.
264 static unsigned int term_parse_host_control(const term_seq *seq) {
265 assert_return(seq, TERM_CMD_NONE);
267 switch (seq->terminator) {
269 return TERM_CMD_NULL;
295 /* this is already handled by the state-machine */
300 /* this is already handled by the state-machine */
303 /* this is already handled by the state-machine */
318 /* this is already handled by the state-machine */
325 /* this is already handled by the state-machine */
327 case 0x9a: /* DECID */
328 return TERM_CMD_DECID;
330 /* this is already handled by the state-machine */
335 /* this is already handled by the state-machine */
338 /* this is already handled by the state-machine */
341 /* this is already handled by the state-machine */
345 return TERM_CMD_NONE;
348 static inline int charset_from_cmd(uint32_t raw, unsigned int flags, bool require_96) {
349 static const struct {
353 /* 96-compat charsets */
354 [TERM_CHARSET_ISO_LATIN1_SUPPLEMENTAL] = { .raw = 'A', .flags = 0 },
355 [TERM_CHARSET_ISO_LATIN2_SUPPLEMENTAL] = { .raw = 'B', .flags = 0 },
356 [TERM_CHARSET_ISO_LATIN5_SUPPLEMENTAL] = { .raw = 'M', .flags = 0 },
357 [TERM_CHARSET_ISO_GREEK_SUPPLEMENTAL] = { .raw = 'F', .flags = 0 },
358 [TERM_CHARSET_ISO_HEBREW_SUPPLEMENTAL] = { .raw = 'H', .flags = 0 },
359 [TERM_CHARSET_ISO_LATIN_CYRILLIC] = { .raw = 'L', .flags = 0 },
361 /* 94-compat charsets */
362 [TERM_CHARSET_DEC_SPECIAL_GRAPHIC] = { .raw = '0', .flags = 0 },
363 [TERM_CHARSET_DEC_SUPPLEMENTAL] = { .raw = '5', .flags = TERM_SEQ_FLAG_PERCENT },
364 [TERM_CHARSET_DEC_TECHNICAL] = { .raw = '>', .flags = 0 },
365 [TERM_CHARSET_CYRILLIC_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_AND },
366 [TERM_CHARSET_DUTCH_NRCS] = { .raw = '4', .flags = 0 },
367 [TERM_CHARSET_FINNISH_NRCS] = { .raw = '5', .flags = 0 },
368 [TERM_CHARSET_FRENCH_NRCS] = { .raw = 'R', .flags = 0 },
369 [TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = '9', .flags = 0 },
370 [TERM_CHARSET_GERMAN_NRCS] = { .raw = 'K', .flags = 0 },
371 [TERM_CHARSET_GREEK_DEC] = { .raw = '?', .flags = TERM_SEQ_FLAG_DQUOTE },
372 [TERM_CHARSET_GREEK_NRCS] = { .raw = '>', .flags = TERM_SEQ_FLAG_DQUOTE },
373 [TERM_CHARSET_HEBREW_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_DQUOTE },
374 [TERM_CHARSET_HEBREW_NRCS] = { .raw = '=', .flags = TERM_SEQ_FLAG_PERCENT },
375 [TERM_CHARSET_ITALIAN_NRCS] = { .raw = 'Y', .flags = 0 },
376 [TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '`', .flags = 0 },
377 [TERM_CHARSET_PORTUGUESE_NRCS] = { .raw = '6', .flags = TERM_SEQ_FLAG_PERCENT },
378 [TERM_CHARSET_RUSSIAN_NRCS] = { .raw = '5', .flags = TERM_SEQ_FLAG_AND },
379 [TERM_CHARSET_SCS_NRCS] = { .raw = '3', .flags = TERM_SEQ_FLAG_PERCENT },
380 [TERM_CHARSET_SPANISH_NRCS] = { .raw = 'Z', .flags = 0 },
381 [TERM_CHARSET_SWEDISH_NRCS] = { .raw = '7', .flags = 0 },
382 [TERM_CHARSET_SWISS_NRCS] = { .raw = '=', .flags = 0 },
383 [TERM_CHARSET_TURKISH_DEC] = { .raw = '0', .flags = TERM_SEQ_FLAG_PERCENT },
384 [TERM_CHARSET_TURKISH_NRCS] = { .raw = '2', .flags = TERM_SEQ_FLAG_PERCENT },
386 /* special charsets */
387 [TERM_CHARSET_USERPREF_SUPPLEMENTAL] = { .raw = '<', .flags = 0 },
389 /* secondary choices */
390 [TERM_CHARSET_CNT + TERM_CHARSET_FINNISH_NRCS] = { .raw = 'C', .flags = 0 },
391 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_NRCS] = { .raw = 'f', .flags = 0 },
392 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = 'Q', .flags = 0 },
393 [TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = 'E', .flags = 0 },
394 [TERM_CHARSET_CNT + TERM_CHARSET_SWEDISH_NRCS] = { .raw = 'H', .flags = 0 }, /* unused; conflicts with ISO_HEBREW */
396 /* tertiary choices */
397 [TERM_CHARSET_CNT + TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '6', .flags = 0 },
402 * Secondary choice on SWEDISH_NRCS and primary choice on
403 * ISO_HEBREW_SUPPLEMENTAL have a conflict: raw=="H", flags==0.
404 * We always choose the ISO 96-compat set, which is what VT510 does.
407 for (i = 0; i < ELEMENTSOF(charset_cmds); ++i) {
408 if (charset_cmds[i].raw == raw && charset_cmds[i].flags == flags) {
410 while (cs >= TERM_CHARSET_CNT)
411 cs -= TERM_CHARSET_CNT;
413 if (!require_96 || cs < TERM_CHARSET_96_CNT || cs >= TERM_CHARSET_94_CNT)
421 /* true if exactly one bit in @value is set */
422 static inline bool exactly_one_bit_set(unsigned int value) {
423 return __builtin_popcount(value) == 1;
426 static unsigned int term_parse_host_escape(const term_seq *seq, unsigned int *cs_out) {
427 unsigned int t, flags;
430 assert_return(seq, TERM_CMD_NONE);
432 flags = seq->intermediates;
433 t = TERM_SEQ_FLAG_POPEN | TERM_SEQ_FLAG_PCLOSE | TERM_SEQ_FLAG_MULT |
434 TERM_SEQ_FLAG_PLUS | TERM_SEQ_FLAG_MINUS | TERM_SEQ_FLAG_DOT |
437 if (exactly_one_bit_set(flags & t)) {
439 case TERM_SEQ_FLAG_POPEN:
440 case TERM_SEQ_FLAG_PCLOSE:
441 case TERM_SEQ_FLAG_MULT:
442 case TERM_SEQ_FLAG_PLUS:
443 cs = charset_from_cmd(seq->terminator, flags & ~t, false);
445 case TERM_SEQ_FLAG_MINUS:
446 case TERM_SEQ_FLAG_DOT:
447 case TERM_SEQ_FLAG_SLASH:
448 cs = charset_from_cmd(seq->terminator, flags & ~t, true);
461 /* looked like a charset-cmd but wasn't; continue */
464 switch (seq->terminator) {
466 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL top-half */
467 return TERM_CMD_DECDHL_TH;
470 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL bottom-half */
471 return TERM_CMD_DECDHL_BH;
474 if (flags == TERM_SEQ_FLAG_HASH) /* DECSWL */
475 return TERM_CMD_DECSWL;
478 if (flags == 0) /* DECBI */
479 return TERM_CMD_DECBI;
480 else if (flags == TERM_SEQ_FLAG_HASH) /* DECDWL */
481 return TERM_CMD_DECDWL;
484 if (flags == 0) /* DECSC */
485 return TERM_CMD_DECSC;
488 if (flags == 0) /* DECRC */
489 return TERM_CMD_DECRC;
490 else if (flags == TERM_SEQ_FLAG_HASH) /* DECALN */
491 return TERM_CMD_DECALN;
494 if (flags == 0) /* DECFI */
495 return TERM_CMD_DECFI;
498 if (flags == 0) /* DECANM */
499 return TERM_CMD_DECANM;
502 if (flags == 0) /* DECKPAM */
503 return TERM_CMD_DECKPAM;
506 if (flags == 0) /* DECKPNM */
507 return TERM_CMD_DECKPNM;
510 if (flags == TERM_SEQ_FLAG_PERCENT) {
511 /* Select default character set */
512 return TERM_CMD_XTERM_SDCS;
516 if (flags == 0) /* IND */
520 if (flags == 0) /* NEL */
524 if (flags == 0) /* Cursor to lower-left corner of screen */
525 return TERM_CMD_XTERM_CLLHP;
526 else if (flags == TERM_SEQ_FLAG_SPACE) /* S7C1T */
527 return TERM_CMD_S7C1T;
530 if (flags == TERM_SEQ_FLAG_SPACE) { /* S8C1T */
531 return TERM_CMD_S8C1T;
532 } else if (flags == TERM_SEQ_FLAG_PERCENT) {
533 /* Select UTF-8 character set */
534 return TERM_CMD_XTERM_SUCS;
538 if (flags == 0) /* HTS */
542 if (flags == TERM_SEQ_FLAG_SPACE) {
543 /* Set ANSI conformance level 1 */
544 return TERM_CMD_XTERM_SACL1;
548 if (flags == 0) { /* RI */
550 } else if (flags == TERM_SEQ_FLAG_SPACE) {
551 /* Set ANSI conformance level 2 */
552 return TERM_CMD_XTERM_SACL2;
556 if (flags == 0) { /* SS2 */
558 } else if (flags == TERM_SEQ_FLAG_SPACE) {
559 /* Set ANSI conformance level 3 */
560 return TERM_CMD_XTERM_SACL3;
564 if (flags == 0) /* SS3 */
568 if (flags == 0) /* DCS: this is already handled by the state-machine */
572 if (flags == 0) /* SPA */
576 if (flags == 0) /* EPA */
580 if (flags == 0) { /* SOS */
581 /* this is already handled by the state-machine */
586 if (flags == 0) /* DECID */
587 return TERM_CMD_DECID;
590 if (flags == 0) { /* CSI */
591 /* this is already handled by the state-machine */
596 if (flags == 0) /* ST */
600 if (flags == 0) { /* OSC */
601 /* this is already handled by the state-machine */
606 if (flags == 0) { /* PM */
607 /* this is already handled by the state-machine */
612 if (flags == 0) { /* APC */
613 /* this is already handled by the state-machine */
618 if (flags == 0) /* RIS */
622 if (flags == 0) /* Memory lock */
623 return TERM_CMD_XTERM_MLHP;
626 if (flags == 0) /* Memory unlock */
627 return TERM_CMD_XTERM_MUHP;
630 if (flags == 0) /* LS2 */
634 if (flags == 0) /* LS3 */
638 if (flags == 0) /* LS3R */
639 return TERM_CMD_LS3R;
642 if (flags == 0) /* LS2R */
643 return TERM_CMD_LS2R;
646 if (flags == 0) /* LS1R */
647 return TERM_CMD_LS1R;
651 return TERM_CMD_NONE;
654 static unsigned int term_parse_host_csi(const term_seq *seq) {
657 assert_return(seq, TERM_CMD_NONE);
659 flags = seq->intermediates;
661 switch (seq->terminator) {
663 if (flags == 0) /* CUU */
667 if (flags == 0) /* HPR */
671 if (flags == 0) /* CUD */
675 if (flags == 0) /* REP */
679 if (flags == 0) /* CUF */
683 if (flags == 0) /* DA1 */
685 else if (flags == TERM_SEQ_FLAG_GT) /* DA2 */
687 else if (flags == TERM_SEQ_FLAG_EQUAL) /* DA3 */
691 if (flags == 0) /* CUB */
695 if (flags == 0) /* VPA */
699 if (flags == 0) /* CNL */
703 if (flags == 0) /* VPR */
707 if (flags == 0) /* CPL */
711 if (flags == 0) /* HVP */
715 if (flags == 0) /* CHA */
719 if (flags == 0) /* TBC */
721 else if (flags == TERM_SEQ_FLAG_MULT) /* DECLFKC */
722 return TERM_CMD_DECLFKC;
725 if (flags == 0) /* CUP */
729 if (flags == 0) /* SM ANSI */
730 return TERM_CMD_SM_ANSI;
731 else if (flags == TERM_SEQ_FLAG_WHAT) /* SM DEC */
732 return TERM_CMD_SM_DEC;
735 if (flags == 0) /* CHT */
739 if (flags == 0) /* MC ANSI */
740 return TERM_CMD_MC_ANSI;
741 else if (flags == TERM_SEQ_FLAG_WHAT) /* MC DEC */
742 return TERM_CMD_MC_DEC;
745 if (flags == 0) /* ED */
747 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSED */
748 return TERM_CMD_DECSED;
751 if (flags == 0) /* EL */
753 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSEL */
754 return TERM_CMD_DECSEL;
757 if (flags == 0) /* IL */
761 if (flags == 0) /* RM ANSI */
762 return TERM_CMD_RM_ANSI;
763 else if (flags == TERM_SEQ_FLAG_WHAT) /* RM DEC */
764 return TERM_CMD_RM_DEC;
767 if (flags == 0) /* DL */
771 if (flags == 0) /* SGR */
773 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SMR */
774 return TERM_CMD_XTERM_SRV;
777 if (flags == 0) /* DSR ANSI */
778 return TERM_CMD_DSR_ANSI;
779 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM RMR */
780 return TERM_CMD_XTERM_RRV;
781 else if (flags == TERM_SEQ_FLAG_WHAT) /* DSR DEC */
782 return TERM_CMD_DSR_DEC;
785 if (flags == 0) /* DCH */
787 else if (flags == TERM_SEQ_FLAG_SPACE) /* PPA */
791 if (flags == 0) /* DECSSL */
792 return TERM_CMD_DECSSL;
793 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSSCLS */
794 return TERM_CMD_DECSSCLS;
795 else if (flags == TERM_SEQ_FLAG_BANG) /* DECSTR */
796 return TERM_CMD_DECSTR;
797 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCL */
798 return TERM_CMD_DECSCL;
799 else if (flags == TERM_SEQ_FLAG_CASH) /* DECRQM-ANSI */
800 return TERM_CMD_DECRQM_ANSI;
801 else if (flags == (TERM_SEQ_FLAG_CASH | TERM_SEQ_FLAG_WHAT)) /* DECRQM-DEC */
802 return TERM_CMD_DECRQM_DEC;
803 else if (flags == TERM_SEQ_FLAG_PCLOSE) /* DECSDPT */
804 return TERM_CMD_DECSDPT;
805 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSPPCS */
806 return TERM_CMD_DECSPPCS;
807 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSR */
808 return TERM_CMD_DECSR;
809 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECLTOD */
810 return TERM_CMD_DECLTOD;
811 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SPM */
812 return TERM_CMD_XTERM_SPM;
815 if (flags == TERM_SEQ_FLAG_SPACE) /* PPR */
819 if (flags == 0) /* DECLL */
820 return TERM_CMD_DECLL;
821 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSCUSR */
822 return TERM_CMD_DECSCUSR;
823 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCA */
824 return TERM_CMD_DECSCA;
825 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSDDT */
826 return TERM_CMD_DECSDDT;
827 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSRC */
828 return TERM_CMD_DECSR;
829 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECELF */
830 return TERM_CMD_DECELF;
831 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECTID */
832 return TERM_CMD_DECTID;
835 if (flags == TERM_SEQ_FLAG_SPACE) /* PPB */
841 return TERM_CMD_DECSTBM;
842 } else if (flags == TERM_SEQ_FLAG_SPACE) {
844 return TERM_CMD_DECSKCV;
845 } else if (flags == TERM_SEQ_FLAG_CASH) {
847 return TERM_CMD_DECCARA;
848 } else if (flags == TERM_SEQ_FLAG_MULT) {
850 return TERM_CMD_DECSCS;
851 } else if (flags == TERM_SEQ_FLAG_PLUS) {
853 return TERM_CMD_DECSMKR;
854 } else if (flags == TERM_SEQ_FLAG_WHAT) {
856 * There's a conflict between DECPCTERM and XTERM-RPM.
857 * XTERM-RPM takes a single argument, DECPCTERM takes 2.
858 * Split both up and forward the call to the closer
861 if (seq->n_args <= 1) /* XTERM RPM */
862 return TERM_CMD_XTERM_RPM;
863 else if (seq->n_args >= 2) /* DECPCTERM */
864 return TERM_CMD_DECPCTERM;
868 if (flags == 0) /* SU */
870 else if (flags == TERM_SEQ_FLAG_WHAT) /* XTERM SGFX */
871 return TERM_CMD_XTERM_SGFX;
876 * There's a conflict between DECSLRM and SC-ANSI which
877 * cannot be resolved without knowing the state of
878 * DECLRMM. We leave that decision up to the caller.
880 return TERM_CMD_DECSLRM_OR_SC;
881 } else if (flags == TERM_SEQ_FLAG_CASH) {
883 return TERM_CMD_DECSPRTT;
884 } else if (flags == TERM_SEQ_FLAG_MULT) {
886 return TERM_CMD_DECSFC;
887 } else if (flags == TERM_SEQ_FLAG_WHAT) {
889 return TERM_CMD_XTERM_SPM;
895 * Awesome: There's a conflict between SD and XTERM IHMT
896 * that we have to resolve by checking the parameter
897 * count.. XTERM_IHMT needs exactly 5 arguments, SD
898 * takes 0 or 1. We're conservative here and give both
899 * a wider range to allow unused arguments (compat...).
901 if (seq->n_args >= 5) {
903 return TERM_CMD_XTERM_IHMT;
904 } else if (seq->n_args < 5) {
908 } else if (flags == TERM_SEQ_FLAG_GT) {
910 return TERM_CMD_XTERM_RTM;
915 if (seq->n_args > 0 && seq->args[0] < 24) {
917 return TERM_CMD_XTERM_WM;
920 return TERM_CMD_DECSLPP;
922 } else if (flags == TERM_SEQ_FLAG_SPACE) {
924 return TERM_CMD_DECSWBV;
925 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
927 return TERM_CMD_DECSRFR;
928 } else if (flags == TERM_SEQ_FLAG_CASH) {
930 return TERM_CMD_DECRARA;
931 } else if (flags == TERM_SEQ_FLAG_GT) {
933 return TERM_CMD_XTERM_STM;
937 if (flags == 0) /* NP */
944 } else if (flags == TERM_SEQ_FLAG_SPACE) {
946 return TERM_CMD_DECSMBV;
947 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
949 return TERM_CMD_DECSTRL;
950 } else if (flags == TERM_SEQ_FLAG_WHAT) {
952 return TERM_CMD_DECRQUPSS;
953 } else if (seq->args[0] == 1 && flags == TERM_SEQ_FLAG_CASH) {
955 return TERM_CMD_DECRQTSR;
956 } else if (flags == TERM_SEQ_FLAG_MULT) {
958 return TERM_CMD_DECSCP;
959 } else if (flags == TERM_SEQ_FLAG_COMMA) {
961 return TERM_CMD_DECRQKT;
965 if (flags == 0) /* PP */
969 if (flags == TERM_SEQ_FLAG_SPACE) /* DECSLCK */
970 return TERM_CMD_DECSLCK;
971 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECRQDE */
972 return TERM_CMD_DECRQDE;
973 else if (flags == TERM_SEQ_FLAG_CASH) /* DECCRA */
974 return TERM_CMD_DECCRA;
975 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECRPKT */
976 return TERM_CMD_DECRPKT;
979 if (seq->args[0] == 5 && flags == TERM_SEQ_FLAG_WHAT) {
981 return TERM_CMD_DECST8C;
985 if (flags == TERM_SEQ_FLAG_CASH) /* DECRQPSR */
986 return TERM_CMD_DECRQPSR;
987 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECEFR */
988 return TERM_CMD_DECEFR;
989 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSPP */
990 return TERM_CMD_DECSPP;
993 if (flags == 0) /* ECH */
997 if (flags == 0) /* DECREQTPARM */
998 return TERM_CMD_DECREQTPARM;
999 else if (flags == TERM_SEQ_FLAG_CASH) /* DECFRA */
1000 return TERM_CMD_DECFRA;
1001 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSACE */
1002 return TERM_CMD_DECSACE;
1003 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECRQPKFM */
1004 return TERM_CMD_DECRQPKFM;
1007 if (flags == 0) /* DECTST */
1008 return TERM_CMD_DECTST;
1009 else if (flags == TERM_SEQ_FLAG_MULT) /* DECRQCRA */
1010 return TERM_CMD_DECRQCRA;
1011 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKFMR */
1012 return TERM_CMD_DECPKFMR;
1015 if (flags == 0) /* CBT */
1016 return TERM_CMD_CBT;
1019 if (flags == TERM_SEQ_FLAG_CASH) /* DECERA */
1020 return TERM_CMD_DECERA;
1021 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECELR */
1022 return TERM_CMD_DECELR;
1023 else if (flags == TERM_SEQ_FLAG_MULT) /* DECINVM */
1024 return TERM_CMD_DECINVM;
1025 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKA */
1026 return TERM_CMD_DECPKA;
1029 if (flags == 0) /* ICH */
1030 return TERM_CMD_ICH;
1033 if (flags == 0) /* HPA */
1034 return TERM_CMD_HPA;
1037 if (flags == TERM_SEQ_FLAG_CASH) /* DECSERA */
1038 return TERM_CMD_DECSERA;
1039 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECSLE */
1040 return TERM_CMD_DECSLE;
1043 if (flags == TERM_SEQ_FLAG_CASH) /* DECSCPP */
1044 return TERM_CMD_DECSCPP;
1045 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECRQLP */
1046 return TERM_CMD_DECRQLP;
1047 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSNLS */
1048 return TERM_CMD_DECSNLS;
1051 if (flags == TERM_SEQ_FLAG_SPACE) /* DECKBD */
1052 return TERM_CMD_DECKBD;
1053 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSASD */
1054 return TERM_CMD_DECSASD;
1055 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECIC */
1056 return TERM_CMD_DECIC;
1059 if (flags == TERM_SEQ_FLAG_SPACE) /* DECTME */
1060 return TERM_CMD_DECTME;
1061 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSSDT */
1062 return TERM_CMD_DECSSDT;
1063 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECDC */
1064 return TERM_CMD_DECDC;
1068 return TERM_CMD_NONE;
1073 * This parser controls the parser-state and returns any detected sequence to
1074 * the caller. The parser is based on this state-diagram from Paul Williams:
1075 * http://vt100.net/emu/
1076 * It was written from scratch and extended where needed.
1077 * This parser is fully compatible up to the vt500 series. We expect UCS-4 as
1078 * input. It's the callers responsibility to do any UTF-8 parsing.
1082 STATE_NONE, /* placeholder */
1083 STATE_GROUND, /* initial state and ground */
1084 STATE_ESC, /* ESC sequence was started */
1085 STATE_ESC_INT, /* intermediate escape characters */
1086 STATE_CSI_ENTRY, /* starting CSI sequence */
1087 STATE_CSI_PARAM, /* CSI parameters */
1088 STATE_CSI_INT, /* intermediate CSI characters */
1089 STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */
1090 STATE_DCS_ENTRY, /* starting DCS sequence */
1091 STATE_DCS_PARAM, /* DCS parameters */
1092 STATE_DCS_INT, /* intermediate DCS characters */
1093 STATE_DCS_PASS, /* DCS data passthrough */
1094 STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */
1095 STATE_OSC_STRING, /* parsing OSC sequence */
1096 STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */
1100 enum parser_action {
1101 ACTION_NONE, /* placeholder */
1102 ACTION_CLEAR, /* clear parameters */
1103 ACTION_IGNORE, /* ignore the character entirely */
1104 ACTION_PRINT, /* print the character on the console */
1105 ACTION_EXECUTE, /* execute single control character (C0/C1) */
1106 ACTION_COLLECT, /* collect intermediate character */
1107 ACTION_PARAM, /* collect parameter character */
1108 ACTION_ESC_DISPATCH, /* dispatch escape sequence */
1109 ACTION_CSI_DISPATCH, /* dispatch csi sequence */
1110 ACTION_DCS_START, /* start of DCS data */
1111 ACTION_DCS_COLLECT, /* collect DCS data */
1112 ACTION_DCS_CONSUME, /* consume DCS terminator */
1113 ACTION_DCS_DISPATCH, /* dispatch dcs sequence */
1114 ACTION_OSC_START, /* start of OSC data */
1115 ACTION_OSC_COLLECT, /* collect OSC data */
1116 ACTION_OSC_CONSUME, /* consume OSC terminator */
1117 ACTION_OSC_DISPATCH, /* dispatch osc sequence */
1121 int term_parser_new(term_parser **out, bool host) {
1122 _term_parser_free_ term_parser *parser = NULL;
1124 assert_return(out, -EINVAL);
1126 parser = new0(term_parser, 1);
1130 parser->is_host = host;
1131 parser->st_alloc = 64;
1132 parser->seq.st = new0(char, parser->st_alloc + 1);
1133 if (!parser->seq.st)
1141 term_parser *term_parser_free(term_parser *parser) {
1145 free(parser->seq.st);
1150 static inline void parser_clear(term_parser *parser) {
1153 parser->seq.command = TERM_CMD_NONE;
1154 parser->seq.terminator = 0;
1155 parser->seq.intermediates = 0;
1156 parser->seq.charset = TERM_CHARSET_NONE;
1157 parser->seq.n_args = 0;
1158 for (i = 0; i < TERM_PARSER_ARG_MAX; ++i)
1159 parser->seq.args[i] = -1;
1161 parser->seq.n_st = 0;
1162 parser->seq.st[0] = 0;
1165 static int parser_ignore(term_parser *parser, uint32_t raw) {
1166 parser_clear(parser);
1167 parser->seq.type = TERM_SEQ_IGNORE;
1168 parser->seq.command = TERM_CMD_NONE;
1169 parser->seq.terminator = raw;
1170 parser->seq.charset = TERM_CHARSET_NONE;
1172 return parser->seq.type;
1175 static int parser_print(term_parser *parser, uint32_t raw) {
1176 parser_clear(parser);
1177 parser->seq.type = TERM_SEQ_GRAPHIC;
1178 parser->seq.command = TERM_CMD_GRAPHIC;
1179 parser->seq.terminator = raw;
1180 parser->seq.charset = TERM_CHARSET_NONE;
1182 return parser->seq.type;
1185 static int parser_execute(term_parser *parser, uint32_t raw) {
1186 parser_clear(parser);
1187 parser->seq.type = TERM_SEQ_CONTROL;
1188 parser->seq.command = TERM_CMD_GRAPHIC;
1189 parser->seq.terminator = raw;
1190 parser->seq.charset = TERM_CHARSET_NONE;
1191 if (!parser->is_host)
1192 parser->seq.command = term_parse_host_control(&parser->seq);
1194 return parser->seq.type;
1197 static void parser_collect(term_parser *parser, uint32_t raw) {
1199 * Usually, characters from 0x30 to 0x3f are only allowed as leading
1200 * markers (or as part of the parameters), characters from 0x20 to 0x2f
1201 * are only allowed as trailing markers. However, our state-machine
1202 * already verifies those restrictions so we can handle them the same
1203 * way here. Note that we safely allow markers to be specified multiple
1207 if (raw >= 0x20 && raw <= 0x3f)
1208 parser->seq.intermediates |= 1 << (raw - 0x20);
1211 static void parser_param(term_parser *parser, uint32_t raw) {
1215 if (parser->seq.n_args < TERM_PARSER_ARG_MAX)
1216 ++parser->seq.n_args;
1221 if (parser->seq.n_args >= TERM_PARSER_ARG_MAX)
1224 if (raw >= '0' && raw <= '9') {
1225 new = parser->seq.args[parser->seq.n_args];
1228 new = new * 10 + raw - '0';
1230 /* VT510 tells us to clamp all values to [0, 9999], however, it
1231 * also allows commands with values up to 2^15-1. We simply use
1232 * 2^16 as maximum here to be compatible to all commands, but
1233 * avoid overflows in any calculations. */
1237 parser->seq.args[parser->seq.n_args] = new;
1241 static int parser_esc(term_parser *parser, uint32_t raw) {
1242 parser->seq.type = TERM_SEQ_ESCAPE;
1243 parser->seq.command = TERM_CMD_NONE;
1244 parser->seq.terminator = raw;
1245 parser->seq.charset = TERM_CHARSET_NONE;
1246 if (!parser->is_host)
1247 parser->seq.command = term_parse_host_escape(&parser->seq, &parser->seq.charset);
1249 return parser->seq.type;
1252 static int parser_csi(term_parser *parser, uint32_t raw) {
1253 /* parser->seq is cleared during CSI-ENTER state, thus there's no need
1254 * to clear invalid fields here. */
1256 if (parser->seq.n_args < TERM_PARSER_ARG_MAX) {
1257 if (parser->seq.n_args > 0 ||
1258 parser->seq.args[parser->seq.n_args] >= 0)
1259 ++parser->seq.n_args;
1262 parser->seq.type = TERM_SEQ_CSI;
1263 parser->seq.command = TERM_CMD_NONE;
1264 parser->seq.terminator = raw;
1265 parser->seq.charset = TERM_CHARSET_NONE;
1266 if (!parser->is_host)
1267 parser->seq.command = term_parse_host_csi(&parser->seq);
1269 return parser->seq.type;
1272 /* perform state transition and dispatch related actions */
1273 static int parser_transition(term_parser *parser, uint32_t raw, unsigned int state, unsigned int action) {
1274 if (state != STATE_NONE)
1275 parser->state = state;
1279 return TERM_SEQ_NONE;
1281 parser_clear(parser);
1282 return TERM_SEQ_NONE;
1284 return parser_ignore(parser, raw);
1286 return parser_print(parser, raw);
1287 case ACTION_EXECUTE:
1288 return parser_execute(parser, raw);
1289 case ACTION_COLLECT:
1290 parser_collect(parser, raw);
1291 return TERM_SEQ_NONE;
1293 parser_param(parser, raw);
1294 return TERM_SEQ_NONE;
1295 case ACTION_ESC_DISPATCH:
1296 return parser_esc(parser, raw);
1297 case ACTION_CSI_DISPATCH:
1298 return parser_csi(parser, raw);
1299 case ACTION_DCS_START:
1300 /* not implemented */
1301 return TERM_SEQ_NONE;
1302 case ACTION_DCS_COLLECT:
1303 /* not implemented */
1304 return TERM_SEQ_NONE;
1305 case ACTION_DCS_CONSUME:
1306 /* not implemented */
1307 return TERM_SEQ_NONE;
1308 case ACTION_DCS_DISPATCH:
1309 /* not implemented */
1310 return TERM_SEQ_NONE;
1311 case ACTION_OSC_START:
1312 /* not implemented */
1313 return TERM_SEQ_NONE;
1314 case ACTION_OSC_COLLECT:
1315 /* not implemented */
1316 return TERM_SEQ_NONE;
1317 case ACTION_OSC_CONSUME:
1318 /* not implemented */
1319 return TERM_SEQ_NONE;
1320 case ACTION_OSC_DISPATCH:
1321 /* not implemented */
1322 return TERM_SEQ_NONE;
1324 assert_not_reached("invalid vte-parser action");
1325 return TERM_SEQ_NONE;
1329 static int parser_feed_to_state(term_parser *parser, uint32_t raw) {
1330 switch (parser->state) {
1333 * During initialization, parser->state is cleared. Treat this
1334 * as STATE_GROUND. We will then never get to STATE_NONE again.
1338 case 0x00 ... 0x1f: /* C0 */
1339 case 0x80 ... 0x9b: /* C1 \ { ST } */
1341 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1343 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1346 return parser_transition(parser, raw, STATE_NONE, ACTION_PRINT);
1349 case 0x00 ... 0x1f: /* C0 */
1350 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1351 case 0x20 ... 0x2f: /* [' ' - '\'] */
1352 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1353 case 0x30 ... 0x4f: /* ['0' - '~'] \ { 'P', 'X', '[', ']', '^', '_' } */
1358 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1359 case 0x50: /* 'P' */
1360 return parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1361 case 0x5b: /* '[' */
1362 return parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1363 case 0x5d: /* ']' */
1364 return parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1365 case 0x58: /* 'X' */
1366 case 0x5e: /* '^' */
1367 case 0x5f: /* '_' */
1368 return parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1369 case 0x7f: /* DEL */
1370 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1372 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1375 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1378 case 0x00 ... 0x1f: /* C0 */
1379 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1380 case 0x20 ... 0x2f: /* [' ' - '\'] */
1381 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1382 case 0x30 ... 0x7e: /* ['0' - '~'] */
1383 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1384 case 0x7f: /* DEL */
1385 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1387 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1390 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1391 case STATE_CSI_ENTRY:
1393 case 0x00 ... 0x1f: /* C0 */
1394 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1395 case 0x20 ... 0x2f: /* [' ' - '\'] */
1396 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1397 case 0x3a: /* ':' */
1398 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1399 case 0x30 ... 0x39: /* ['0' - '9'] */
1400 case 0x3b: /* ';' */
1401 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_PARAM);
1402 case 0x3c ... 0x3f: /* ['<' - '?'] */
1403 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_COLLECT);
1404 case 0x40 ... 0x7e: /* ['@' - '~'] */
1405 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1406 case 0x7f: /* DEL */
1407 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1409 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1412 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1413 case STATE_CSI_PARAM:
1415 case 0x00 ... 0x1f: /* C0 */
1416 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1417 case 0x20 ... 0x2f: /* [' ' - '\'] */
1418 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1419 case 0x30 ... 0x39: /* ['0' - '9'] */
1420 case 0x3b: /* ';' */
1421 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1422 case 0x3a: /* ':' */
1423 case 0x3c ... 0x3f: /* ['<' - '?'] */
1424 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1425 case 0x40 ... 0x7e: /* ['@' - '~'] */
1426 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1427 case 0x7f: /* DEL */
1428 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1430 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1433 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1436 case 0x00 ... 0x1f: /* C0 */
1437 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1438 case 0x20 ... 0x2f: /* [' ' - '\'] */
1439 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1440 case 0x30 ... 0x3f: /* ['0' - '?'] */
1441 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1442 case 0x40 ... 0x7e: /* ['@' - '~'] */
1443 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1444 case 0x7f: /* DEL */
1445 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1447 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1450 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1451 case STATE_CSI_IGNORE:
1453 case 0x00 ... 0x1f: /* C0 */
1454 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1455 case 0x20 ... 0x3f: /* [' ' - '?'] */
1456 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1457 case 0x40 ... 0x7e: /* ['@' - '~'] */
1458 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1459 case 0x7f: /* DEL */
1460 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1462 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1465 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1466 case STATE_DCS_ENTRY:
1468 case 0x00 ... 0x1f: /* C0 */
1469 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1470 case 0x20 ... 0x2f: /* [' ' - '\'] */
1471 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1472 case 0x3a: /* ':' */
1473 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1474 case 0x30 ... 0x39: /* ['0' - '9'] */
1475 case 0x3b: /* ';' */
1476 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_PARAM);
1477 case 0x3c ... 0x3f: /* ['<' - '?'] */
1478 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_COLLECT);
1479 case 0x40 ... 0x7e: /* ['@' - '~'] */
1480 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1481 case 0x7f: /* DEL */
1482 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1484 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1487 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1488 case STATE_DCS_PARAM:
1490 case 0x00 ... 0x1f: /* C0 */
1491 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1492 case 0x20 ... 0x2f: /* [' ' - '\'] */
1493 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1494 case 0x30 ... 0x39: /* ['0' - '9'] */
1495 case 0x3b: /* ';' */
1496 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1497 case 0x3a: /* ':' */
1498 case 0x3c ... 0x3f: /* ['<' - '?'] */
1499 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1500 case 0x40 ... 0x7e: /* ['@' - '~'] */
1501 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1502 case 0x7f: /* DEL */
1503 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1505 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1508 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1511 case 0x00 ... 0x1f: /* C0 */
1512 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1513 case 0x20 ... 0x2f: /* [' ' - '\'] */
1514 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1515 case 0x30 ... 0x3f: /* ['0' - '?'] */
1516 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1517 case 0x40 ... 0x7e: /* ['@' - '~'] */
1518 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1519 case 0x7f: /* DEL */
1520 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1522 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1525 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1526 case STATE_DCS_PASS:
1528 case 0x00 ... 0x7e: /* ASCII \ { DEL } */
1529 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1530 case 0x7f: /* DEL */
1531 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1533 return parser_transition(parser, raw, STATE_GROUND, ACTION_DCS_DISPATCH);
1536 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1537 case STATE_DCS_IGNORE:
1539 case 0x00 ... 0x7f: /* ASCII */
1540 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1542 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1545 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1546 case STATE_OSC_STRING:
1548 case 0x00 ... 0x06: /* C0 \ { BEL } */
1550 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1551 case 0x20 ... 0x7f: /* [' ' - DEL] */
1552 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1553 case 0x07: /* BEL */
1555 return parser_transition(parser, raw, STATE_GROUND, ACTION_OSC_DISPATCH);
1558 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1559 case STATE_ST_IGNORE:
1561 case 0x00 ... 0x7f: /* ASCII */
1562 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1564 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1567 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1570 assert_not_reached("bad vte-parser state");
1574 int term_parser_feed(term_parser *parser, const term_seq **seq_out, uint32_t raw) {
1577 assert_return(parser, -EINVAL);
1578 assert_return(seq_out, -EINVAL);
1582 * * DEC treats GR codes as GL. We don't do that as we require UTF-8
1583 * as charset and, thus, it doesn't make sense to treat GR special.
1584 * * During control sequences, unexpected C1 codes cancel the sequence
1585 * and immediately start a new one. C0 codes, however, may or may not
1586 * be ignored/executed depending on the sequence.
1590 case 0x18: /* CAN */
1591 r = parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1593 case 0x1a: /* SUB */
1594 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1596 case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, CSI, ST, OSC, PM, APC} */
1599 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1601 case 0x1b: /* ESC */
1602 r = parser_transition(parser, raw, STATE_ESC, ACTION_CLEAR);
1604 case 0x98: /* SOS */
1606 case 0x9f: /* APC */
1607 r = parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1609 case 0x90: /* DCS */
1610 r = parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1612 case 0x9d: /* OSC */
1613 r = parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1615 case 0x9b: /* CSI */
1616 r = parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1619 r = parser_feed_to_state(parser, raw);
1626 *seq_out = &parser->seq;