1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The
25 * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST
26 * control sequences and generic escape sequences.
27 * The parser itself does not perform any actions but lets the caller react to
35 #include "term-internal.h"
38 static const uint8_t default_palette[18][3] = {
39 { 0, 0, 0 }, /* black */
40 { 205, 0, 0 }, /* red */
41 { 0, 205, 0 }, /* green */
42 { 205, 205, 0 }, /* yellow */
43 { 0, 0, 238 }, /* blue */
44 { 205, 0, 205 }, /* magenta */
45 { 0, 205, 205 }, /* cyan */
46 { 229, 229, 229 }, /* light grey */
47 { 127, 127, 127 }, /* dark grey */
48 { 255, 0, 0 }, /* light red */
49 { 0, 255, 0 }, /* light green */
50 { 255, 255, 0 }, /* light yellow */
51 { 92, 92, 255 }, /* light blue */
52 { 255, 0, 255 }, /* light magenta */
53 { 0, 255, 255 }, /* light cyan */
54 { 255, 255, 255 }, /* white */
56 { 229, 229, 229 }, /* light grey */
57 { 0, 0, 0 }, /* black */
60 static uint32_t term_color_to_argb32(const term_color *color, const term_attr *attr, const uint8_t *palette) {
61 static const uint8_t bval[] = {
70 palette = (void*)default_palette;
72 switch (color->ccode) {
82 r = palette[t * 3 + 0];
83 g = palette[t * 3 + 1];
84 b = palette[t * 3 + 2];
93 t = (t - 232) * 10 + 8;
100 case TERM_CCODE_BLACK ... TERM_CCODE_LIGHT_WHITE:
101 t = color->ccode - TERM_CCODE_BLACK;
103 /* bold causes light colors (only for foreground colors) */
104 if (t < 8 && attr->bold && color == &attr->fg)
107 r = palette[t * 3 + 0];
108 g = palette[t * 3 + 1];
109 b = palette[t * 3 + 2];
111 case TERM_CCODE_DEFAULT:
114 t = 16 + !(color == &attr->fg);
115 r = palette[t * 3 + 0];
116 g = palette[t * 3 + 1];
117 b = palette[t * 3 + 2];
121 return (0xff << 24) | (r << 16) | (g << 8) | b;
125 * term_attr_to_argb32() - Encode terminal colors as native ARGB32 value
126 * @color: Terminal attributes to work on
127 * @fg: Storage for foreground color (or NULL)
128 * @bg: Storage for background color (or NULL)
129 * @palette: The color palette to use (or NULL for default)
131 * This encodes the colors attr->fg and attr->bg as native-endian ARGB32 values
132 * and returns them. Any color conversions are automatically applied.
134 void term_attr_to_argb32(const term_attr *attr, uint32_t *fg, uint32_t *bg, const uint8_t *palette) {
139 f = term_color_to_argb32(&attr->fg, attr, palette);
140 b = term_color_to_argb32(&attr->bg, attr, palette);
155 * term_utf8_decode() - Try decoding the next UCS-4 character
156 * @p: decoder object to operate on or NULL
157 * @out_len: output storage for pointer to decoded UCS-4 string or NULL
158 * @c: next char to push into decoder
160 * This decodes a UTF-8 stream. It must be called for each input-byte of the
161 * UTF-8 stream and returns a UCS-4 stream. A pointer to the parsed UCS-4
162 * string is stored in @out_buf if non-NULL. The length of this string (number
163 * of parsed UCS4 characters) is returned as result. The string is not
164 * zero-terminated! Furthermore, the string is only valid until the next
165 * invocation of this function. It is also bound to the parser state @p and
166 * must not be freed nor written to by the caller.
168 * This function is highly optimized to work with terminal-emulators. Instead
169 * of being strict about UTF-8 validity, this tries to perform a fallback to
170 * ISO-8859-1 in case a wrong series was detected. Therefore, this function
171 * might return multiple UCS-4 characters by parsing just a single UTF-8 byte.
173 * The parser state @p should be allocated and managed by the caller. There're
174 * no helpers to do that for you. To initialize it, simply reset it to all
175 * zero. You can reset or free the object at any point in time.
177 * Returns: Number of parsed UCS4 characters
179 size_t term_utf8_decode(term_utf8 *p, uint32_t **out_buf, char c) {
180 static uint32_t ucs4_null = 0;
181 uint32_t t, *res = NULL;
190 if (!p->valid || p->i_bytes >= p->n_bytes) {
192 * If the previous sequence was invalid or fully parsed, start
193 * parsing a fresh new sequence.
196 if ((byte & 0xE0) == 0xC0) {
197 /* start of two byte sequence */
202 } else if ((byte & 0xF0) == 0xE0) {
203 /* start of three byte sequence */
208 } else if ((byte & 0xF8) == 0xF0) {
209 /* start of four byte sequence */
216 * - single ASCII 7-bit char
217 * - out-of-sync continuation byte
218 * - overlong encoding
219 * All of them are treated as single byte ISO-8859-1 */
227 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
230 * ..otherwise, try to continue the previous sequence..
233 if ((byte & 0xC0) == 0x80) {
235 * Valid continuation byte. Append to sequence and
236 * update the ucs4 cache accordingly.
240 p->chars[p->i_bytes++] = byte;
241 p->ucs4 |= t << (6 * (p->n_bytes - p->i_bytes));
244 * Invalid continuation? Treat cached sequence as
245 * ISO-8859-1, but parse the new char as valid new
246 * starting character. If it's a new single-byte UTF-8
247 * sequence, we immediately return it in the same run,
248 * otherwise, we might suffer from starvation.
251 if ((byte & 0xE0) == 0xC0 ||
252 (byte & 0xF0) == 0xE0 ||
253 (byte & 0xF8) == 0xF0) {
255 * New multi-byte sequence. Move to-be-returned
256 * data at the end and start new sequence. Only
257 * return the old sequence.
260 memmove(p->chars + 1,
262 sizeof(*p->chars) * p->i_bytes);
266 if ((byte & 0xE0) == 0xC0) {
267 /* start of two byte sequence */
272 } else if ((byte & 0xF0) == 0xE0) {
273 /* start of three byte sequence */
278 } else if ((byte & 0xF8) == 0xF0) {
279 /* start of four byte sequence */
285 assert_not_reached("Should not happen");
288 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
293 * New single byte sequence, append to output
294 * and return combined sequence.
297 p->chars[p->i_bytes++] = byte;
304 * Check whether a full sequence (valid or invalid) has been parsed and
305 * then return it. Otherwise, return nothing.
308 /* still parsing? then bail out */
309 if (p->i_bytes < p->n_bytes)
325 *out_buf = res ? : &ucs4_null;
331 * The ctl-seq parser "term_parser" only detects whole sequences, it does not
332 * detect the specific command. Once a sequence is parsed, the command-parsers
333 * are used to figure out their meaning. Note that this depends on whether we
334 * run on the host or terminal side.
337 static unsigned int term_parse_host_control(const term_seq *seq) {
338 assert_return(seq, TERM_CMD_NONE);
340 switch (seq->terminator) {
342 return TERM_CMD_NULL;
368 /* this is already handled by the state-machine */
373 /* this is already handled by the state-machine */
376 /* this is already handled by the state-machine */
391 /* this is already handled by the state-machine */
398 /* this is already handled by the state-machine */
400 case 0x9a: /* DECID */
401 return TERM_CMD_DECID;
403 /* this is already handled by the state-machine */
408 /* this is already handled by the state-machine */
411 /* this is already handled by the state-machine */
414 /* this is already handled by the state-machine */
418 return TERM_CMD_NONE;
421 static inline int charset_from_cmd(uint32_t raw, unsigned int flags, bool require_96) {
422 static const struct {
426 /* 96-compat charsets */
427 [TERM_CHARSET_ISO_LATIN1_SUPPLEMENTAL] = { .raw = 'A', .flags = 0 },
428 [TERM_CHARSET_ISO_LATIN2_SUPPLEMENTAL] = { .raw = 'B', .flags = 0 },
429 [TERM_CHARSET_ISO_LATIN5_SUPPLEMENTAL] = { .raw = 'M', .flags = 0 },
430 [TERM_CHARSET_ISO_GREEK_SUPPLEMENTAL] = { .raw = 'F', .flags = 0 },
431 [TERM_CHARSET_ISO_HEBREW_SUPPLEMENTAL] = { .raw = 'H', .flags = 0 },
432 [TERM_CHARSET_ISO_LATIN_CYRILLIC] = { .raw = 'L', .flags = 0 },
434 /* 94-compat charsets */
435 [TERM_CHARSET_DEC_SPECIAL_GRAPHIC] = { .raw = '0', .flags = 0 },
436 [TERM_CHARSET_DEC_SUPPLEMENTAL] = { .raw = '5', .flags = TERM_SEQ_FLAG_PERCENT },
437 [TERM_CHARSET_DEC_TECHNICAL] = { .raw = '>', .flags = 0 },
438 [TERM_CHARSET_CYRILLIC_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_AND },
439 [TERM_CHARSET_DUTCH_NRCS] = { .raw = '4', .flags = 0 },
440 [TERM_CHARSET_FINNISH_NRCS] = { .raw = '5', .flags = 0 },
441 [TERM_CHARSET_FRENCH_NRCS] = { .raw = 'R', .flags = 0 },
442 [TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = '9', .flags = 0 },
443 [TERM_CHARSET_GERMAN_NRCS] = { .raw = 'K', .flags = 0 },
444 [TERM_CHARSET_GREEK_DEC] = { .raw = '?', .flags = TERM_SEQ_FLAG_DQUOTE },
445 [TERM_CHARSET_GREEK_NRCS] = { .raw = '>', .flags = TERM_SEQ_FLAG_DQUOTE },
446 [TERM_CHARSET_HEBREW_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_DQUOTE },
447 [TERM_CHARSET_HEBREW_NRCS] = { .raw = '=', .flags = TERM_SEQ_FLAG_PERCENT },
448 [TERM_CHARSET_ITALIAN_NRCS] = { .raw = 'Y', .flags = 0 },
449 [TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '`', .flags = 0 },
450 [TERM_CHARSET_PORTUGUESE_NRCS] = { .raw = '6', .flags = TERM_SEQ_FLAG_PERCENT },
451 [TERM_CHARSET_RUSSIAN_NRCS] = { .raw = '5', .flags = TERM_SEQ_FLAG_AND },
452 [TERM_CHARSET_SCS_NRCS] = { .raw = '3', .flags = TERM_SEQ_FLAG_PERCENT },
453 [TERM_CHARSET_SPANISH_NRCS] = { .raw = 'Z', .flags = 0 },
454 [TERM_CHARSET_SWEDISH_NRCS] = { .raw = '7', .flags = 0 },
455 [TERM_CHARSET_SWISS_NRCS] = { .raw = '=', .flags = 0 },
456 [TERM_CHARSET_TURKISH_DEC] = { .raw = '0', .flags = TERM_SEQ_FLAG_PERCENT },
457 [TERM_CHARSET_TURKISH_NRCS] = { .raw = '2', .flags = TERM_SEQ_FLAG_PERCENT },
459 /* special charsets */
460 [TERM_CHARSET_USERPREF_SUPPLEMENTAL] = { .raw = '<', .flags = 0 },
462 /* secondary choices */
463 [TERM_CHARSET_CNT + TERM_CHARSET_FINNISH_NRCS] = { .raw = 'C', .flags = 0 },
464 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_NRCS] = { .raw = 'f', .flags = 0 },
465 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = 'Q', .flags = 0 },
466 [TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = 'E', .flags = 0 },
467 [TERM_CHARSET_CNT + TERM_CHARSET_SWEDISH_NRCS] = { .raw = 'H', .flags = 0 }, /* unused; conflicts with ISO_HEBREW */
469 /* tertiary choices */
470 [TERM_CHARSET_CNT + TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '6', .flags = 0 },
475 * Secondary choice on SWEDISH_NRCS and primary choice on
476 * ISO_HEBREW_SUPPLEMENTAL have a conflict: raw=="H", flags==0.
477 * We always choose the ISO 96-compat set, which is what VT510 does.
480 for (i = 0; i < ELEMENTSOF(charset_cmds); ++i) {
481 if (charset_cmds[i].raw == raw && charset_cmds[i].flags == flags) {
483 while (cs >= TERM_CHARSET_CNT)
484 cs -= TERM_CHARSET_CNT;
486 if (!require_96 || cs < TERM_CHARSET_96_CNT || cs >= TERM_CHARSET_94_CNT)
494 /* true if exactly one bit in @value is set */
495 static inline bool exactly_one_bit_set(unsigned int value) {
496 return __builtin_popcount(value) == 1;
499 static unsigned int term_parse_host_escape(const term_seq *seq, unsigned int *cs_out) {
500 unsigned int t, flags;
503 assert_return(seq, TERM_CMD_NONE);
505 flags = seq->intermediates;
506 t = TERM_SEQ_FLAG_POPEN | TERM_SEQ_FLAG_PCLOSE | TERM_SEQ_FLAG_MULT |
507 TERM_SEQ_FLAG_PLUS | TERM_SEQ_FLAG_MINUS | TERM_SEQ_FLAG_DOT |
510 if (exactly_one_bit_set(flags & t)) {
512 case TERM_SEQ_FLAG_POPEN:
513 case TERM_SEQ_FLAG_PCLOSE:
514 case TERM_SEQ_FLAG_MULT:
515 case TERM_SEQ_FLAG_PLUS:
516 cs = charset_from_cmd(seq->terminator, flags & ~t, false);
518 case TERM_SEQ_FLAG_MINUS:
519 case TERM_SEQ_FLAG_DOT:
520 case TERM_SEQ_FLAG_SLASH:
521 cs = charset_from_cmd(seq->terminator, flags & ~t, true);
534 /* looked like a charset-cmd but wasn't; continue */
537 switch (seq->terminator) {
539 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL top-half */
540 return TERM_CMD_DECDHL_TH;
543 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL bottom-half */
544 return TERM_CMD_DECDHL_BH;
547 if (flags == TERM_SEQ_FLAG_HASH) /* DECSWL */
548 return TERM_CMD_DECSWL;
551 if (flags == 0) /* DECBI */
552 return TERM_CMD_DECBI;
553 else if (flags == TERM_SEQ_FLAG_HASH) /* DECDWL */
554 return TERM_CMD_DECDWL;
557 if (flags == 0) /* DECSC */
558 return TERM_CMD_DECSC;
561 if (flags == 0) /* DECRC */
562 return TERM_CMD_DECRC;
563 else if (flags == TERM_SEQ_FLAG_HASH) /* DECALN */
564 return TERM_CMD_DECALN;
567 if (flags == 0) /* DECFI */
568 return TERM_CMD_DECFI;
571 if (flags == 0) /* DECANM */
572 return TERM_CMD_DECANM;
575 if (flags == 0) /* DECKPAM */
576 return TERM_CMD_DECKPAM;
579 if (flags == 0) /* DECKPNM */
580 return TERM_CMD_DECKPNM;
583 if (flags == TERM_SEQ_FLAG_PERCENT) {
584 /* Select default character set */
585 return TERM_CMD_XTERM_SDCS;
589 if (flags == 0) /* IND */
593 if (flags == 0) /* NEL */
597 if (flags == 0) /* Cursor to lower-left corner of screen */
598 return TERM_CMD_XTERM_CLLHP;
599 else if (flags == TERM_SEQ_FLAG_SPACE) /* S7C1T */
600 return TERM_CMD_S7C1T;
603 if (flags == TERM_SEQ_FLAG_SPACE) { /* S8C1T */
604 return TERM_CMD_S8C1T;
605 } else if (flags == TERM_SEQ_FLAG_PERCENT) {
606 /* Select UTF-8 character set */
607 return TERM_CMD_XTERM_SUCS;
611 if (flags == 0) /* HTS */
615 if (flags == TERM_SEQ_FLAG_SPACE) {
616 /* Set ANSI conformance level 1 */
617 return TERM_CMD_XTERM_SACL1;
621 if (flags == 0) { /* RI */
623 } else if (flags == TERM_SEQ_FLAG_SPACE) {
624 /* Set ANSI conformance level 2 */
625 return TERM_CMD_XTERM_SACL2;
629 if (flags == 0) { /* SS2 */
631 } else if (flags == TERM_SEQ_FLAG_SPACE) {
632 /* Set ANSI conformance level 3 */
633 return TERM_CMD_XTERM_SACL3;
637 if (flags == 0) /* SS3 */
641 if (flags == 0) /* DCS: this is already handled by the state-machine */
645 if (flags == 0) /* SPA */
649 if (flags == 0) /* EPA */
653 if (flags == 0) { /* SOS */
654 /* this is already handled by the state-machine */
659 if (flags == 0) /* DECID */
660 return TERM_CMD_DECID;
663 if (flags == 0) { /* CSI */
664 /* this is already handled by the state-machine */
669 if (flags == 0) /* ST */
673 if (flags == 0) { /* OSC */
674 /* this is already handled by the state-machine */
679 if (flags == 0) { /* PM */
680 /* this is already handled by the state-machine */
685 if (flags == 0) { /* APC */
686 /* this is already handled by the state-machine */
691 if (flags == 0) /* RIS */
695 if (flags == 0) /* Memory lock */
696 return TERM_CMD_XTERM_MLHP;
699 if (flags == 0) /* Memory unlock */
700 return TERM_CMD_XTERM_MUHP;
703 if (flags == 0) /* LS2 */
707 if (flags == 0) /* LS3 */
711 if (flags == 0) /* LS3R */
712 return TERM_CMD_LS3R;
715 if (flags == 0) /* LS2R */
716 return TERM_CMD_LS2R;
719 if (flags == 0) /* LS1R */
720 return TERM_CMD_LS1R;
724 return TERM_CMD_NONE;
727 static unsigned int term_parse_host_csi(const term_seq *seq) {
730 assert_return(seq, TERM_CMD_NONE);
732 flags = seq->intermediates;
734 switch (seq->terminator) {
736 if (flags == 0) /* CUU */
740 if (flags == 0) /* HPR */
744 if (flags == 0) /* CUD */
748 if (flags == 0) /* REP */
752 if (flags == 0) /* CUF */
756 if (flags == 0) /* DA1 */
758 else if (flags == TERM_SEQ_FLAG_GT) /* DA2 */
760 else if (flags == TERM_SEQ_FLAG_EQUAL) /* DA3 */
764 if (flags == 0) /* CUB */
768 if (flags == 0) /* VPA */
772 if (flags == 0) /* CNL */
776 if (flags == 0) /* VPR */
780 if (flags == 0) /* CPL */
784 if (flags == 0) /* HVP */
788 if (flags == 0) /* CHA */
792 if (flags == 0) /* TBC */
794 else if (flags == TERM_SEQ_FLAG_MULT) /* DECLFKC */
795 return TERM_CMD_DECLFKC;
798 if (flags == 0) /* CUP */
802 if (flags == 0) /* SM ANSI */
803 return TERM_CMD_SM_ANSI;
804 else if (flags == TERM_SEQ_FLAG_WHAT) /* SM DEC */
805 return TERM_CMD_SM_DEC;
808 if (flags == 0) /* CHT */
812 if (flags == 0) /* MC ANSI */
813 return TERM_CMD_MC_ANSI;
814 else if (flags == TERM_SEQ_FLAG_WHAT) /* MC DEC */
815 return TERM_CMD_MC_DEC;
818 if (flags == 0) /* ED */
820 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSED */
821 return TERM_CMD_DECSED;
824 if (flags == 0) /* EL */
826 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSEL */
827 return TERM_CMD_DECSEL;
830 if (flags == 0) /* IL */
834 if (flags == 0) /* RM ANSI */
835 return TERM_CMD_RM_ANSI;
836 else if (flags == TERM_SEQ_FLAG_WHAT) /* RM DEC */
837 return TERM_CMD_RM_DEC;
840 if (flags == 0) /* DL */
844 if (flags == 0) /* SGR */
846 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SMR */
847 return TERM_CMD_XTERM_SRV;
850 if (flags == 0) /* DSR ANSI */
851 return TERM_CMD_DSR_ANSI;
852 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM RMR */
853 return TERM_CMD_XTERM_RRV;
854 else if (flags == TERM_SEQ_FLAG_WHAT) /* DSR DEC */
855 return TERM_CMD_DSR_DEC;
858 if (flags == 0) /* DCH */
860 else if (flags == TERM_SEQ_FLAG_SPACE) /* PPA */
864 if (flags == 0) /* DECSSL */
865 return TERM_CMD_DECSSL;
866 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSSCLS */
867 return TERM_CMD_DECSSCLS;
868 else if (flags == TERM_SEQ_FLAG_BANG) /* DECSTR */
869 return TERM_CMD_DECSTR;
870 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCL */
871 return TERM_CMD_DECSCL;
872 else if (flags == TERM_SEQ_FLAG_CASH) /* DECRQM-ANSI */
873 return TERM_CMD_DECRQM_ANSI;
874 else if (flags == (TERM_SEQ_FLAG_CASH | TERM_SEQ_FLAG_WHAT)) /* DECRQM-DEC */
875 return TERM_CMD_DECRQM_DEC;
876 else if (flags == TERM_SEQ_FLAG_PCLOSE) /* DECSDPT */
877 return TERM_CMD_DECSDPT;
878 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSPPCS */
879 return TERM_CMD_DECSPPCS;
880 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSR */
881 return TERM_CMD_DECSR;
882 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECLTOD */
883 return TERM_CMD_DECLTOD;
884 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SPM */
885 return TERM_CMD_XTERM_SPM;
888 if (flags == TERM_SEQ_FLAG_SPACE) /* PPR */
892 if (flags == 0) /* DECLL */
893 return TERM_CMD_DECLL;
894 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSCUSR */
895 return TERM_CMD_DECSCUSR;
896 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCA */
897 return TERM_CMD_DECSCA;
898 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSDDT */
899 return TERM_CMD_DECSDDT;
900 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSRC */
901 return TERM_CMD_DECSR;
902 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECELF */
903 return TERM_CMD_DECELF;
904 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECTID */
905 return TERM_CMD_DECTID;
908 if (flags == TERM_SEQ_FLAG_SPACE) /* PPB */
914 return TERM_CMD_DECSTBM;
915 } else if (flags == TERM_SEQ_FLAG_SPACE) {
917 return TERM_CMD_DECSKCV;
918 } else if (flags == TERM_SEQ_FLAG_CASH) {
920 return TERM_CMD_DECCARA;
921 } else if (flags == TERM_SEQ_FLAG_MULT) {
923 return TERM_CMD_DECSCS;
924 } else if (flags == TERM_SEQ_FLAG_PLUS) {
926 return TERM_CMD_DECSMKR;
927 } else if (flags == TERM_SEQ_FLAG_WHAT) {
929 * There's a conflict between DECPCTERM and XTERM-RPM.
930 * XTERM-RPM takes a single argument, DECPCTERM takes 2.
931 * Split both up and forward the call to the closer
934 if (seq->n_args <= 1) /* XTERM RPM */
935 return TERM_CMD_XTERM_RPM;
936 else if (seq->n_args >= 2) /* DECPCTERM */
937 return TERM_CMD_DECPCTERM;
941 if (flags == 0) /* SU */
943 else if (flags == TERM_SEQ_FLAG_WHAT) /* XTERM SGFX */
944 return TERM_CMD_XTERM_SGFX;
949 * There's a conflict between DECSLRM and SC-ANSI which
950 * cannot be resolved without knowing the state of
951 * DECLRMM. We leave that decision up to the caller.
953 return TERM_CMD_DECSLRM_OR_SC;
954 } else if (flags == TERM_SEQ_FLAG_CASH) {
956 return TERM_CMD_DECSPRTT;
957 } else if (flags == TERM_SEQ_FLAG_MULT) {
959 return TERM_CMD_DECSFC;
960 } else if (flags == TERM_SEQ_FLAG_WHAT) {
962 return TERM_CMD_XTERM_SPM;
968 * Awesome: There's a conflict between SD and XTERM IHMT
969 * that we have to resolve by checking the parameter
970 * count.. XTERM_IHMT needs exactly 5 arguments, SD
971 * takes 0 or 1. We're conservative here and give both
972 * a wider range to allow unused arguments (compat...).
974 if (seq->n_args >= 5) {
976 return TERM_CMD_XTERM_IHMT;
977 } else if (seq->n_args < 5) {
981 } else if (flags == TERM_SEQ_FLAG_GT) {
983 return TERM_CMD_XTERM_RTM;
988 if (seq->n_args > 0 && seq->args[0] < 24) {
990 return TERM_CMD_XTERM_WM;
993 return TERM_CMD_DECSLPP;
995 } else if (flags == TERM_SEQ_FLAG_SPACE) {
997 return TERM_CMD_DECSWBV;
998 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
1000 return TERM_CMD_DECSRFR;
1001 } else if (flags == TERM_SEQ_FLAG_CASH) {
1003 return TERM_CMD_DECRARA;
1004 } else if (flags == TERM_SEQ_FLAG_GT) {
1006 return TERM_CMD_XTERM_STM;
1010 if (flags == 0) /* NP */
1017 } else if (flags == TERM_SEQ_FLAG_SPACE) {
1019 return TERM_CMD_DECSMBV;
1020 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
1022 return TERM_CMD_DECSTRL;
1023 } else if (flags == TERM_SEQ_FLAG_WHAT) {
1025 return TERM_CMD_DECRQUPSS;
1026 } else if (seq->args[0] == 1 && flags == TERM_SEQ_FLAG_CASH) {
1028 return TERM_CMD_DECRQTSR;
1029 } else if (flags == TERM_SEQ_FLAG_MULT) {
1031 return TERM_CMD_DECSCP;
1032 } else if (flags == TERM_SEQ_FLAG_COMMA) {
1034 return TERM_CMD_DECRQKT;
1038 if (flags == 0) /* PP */
1042 if (flags == TERM_SEQ_FLAG_SPACE) /* DECSLCK */
1043 return TERM_CMD_DECSLCK;
1044 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECRQDE */
1045 return TERM_CMD_DECRQDE;
1046 else if (flags == TERM_SEQ_FLAG_CASH) /* DECCRA */
1047 return TERM_CMD_DECCRA;
1048 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECRPKT */
1049 return TERM_CMD_DECRPKT;
1052 if (seq->args[0] == 5 && flags == TERM_SEQ_FLAG_WHAT) {
1054 return TERM_CMD_DECST8C;
1058 if (flags == TERM_SEQ_FLAG_CASH) /* DECRQPSR */
1059 return TERM_CMD_DECRQPSR;
1060 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECEFR */
1061 return TERM_CMD_DECEFR;
1062 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSPP */
1063 return TERM_CMD_DECSPP;
1066 if (flags == 0) /* ECH */
1067 return TERM_CMD_ECH;
1070 if (flags == 0) /* DECREQTPARM */
1071 return TERM_CMD_DECREQTPARM;
1072 else if (flags == TERM_SEQ_FLAG_CASH) /* DECFRA */
1073 return TERM_CMD_DECFRA;
1074 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSACE */
1075 return TERM_CMD_DECSACE;
1076 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECRQPKFM */
1077 return TERM_CMD_DECRQPKFM;
1080 if (flags == 0) /* DECTST */
1081 return TERM_CMD_DECTST;
1082 else if (flags == TERM_SEQ_FLAG_MULT) /* DECRQCRA */
1083 return TERM_CMD_DECRQCRA;
1084 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKFMR */
1085 return TERM_CMD_DECPKFMR;
1088 if (flags == 0) /* CBT */
1089 return TERM_CMD_CBT;
1092 if (flags == TERM_SEQ_FLAG_CASH) /* DECERA */
1093 return TERM_CMD_DECERA;
1094 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECELR */
1095 return TERM_CMD_DECELR;
1096 else if (flags == TERM_SEQ_FLAG_MULT) /* DECINVM */
1097 return TERM_CMD_DECINVM;
1098 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKA */
1099 return TERM_CMD_DECPKA;
1102 if (flags == 0) /* ICH */
1103 return TERM_CMD_ICH;
1106 if (flags == 0) /* HPA */
1107 return TERM_CMD_HPA;
1110 if (flags == TERM_SEQ_FLAG_CASH) /* DECSERA */
1111 return TERM_CMD_DECSERA;
1112 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECSLE */
1113 return TERM_CMD_DECSLE;
1116 if (flags == TERM_SEQ_FLAG_CASH) /* DECSCPP */
1117 return TERM_CMD_DECSCPP;
1118 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECRQLP */
1119 return TERM_CMD_DECRQLP;
1120 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSNLS */
1121 return TERM_CMD_DECSNLS;
1124 if (flags == TERM_SEQ_FLAG_SPACE) /* DECKBD */
1125 return TERM_CMD_DECKBD;
1126 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSASD */
1127 return TERM_CMD_DECSASD;
1128 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECIC */
1129 return TERM_CMD_DECIC;
1132 if (flags == TERM_SEQ_FLAG_SPACE) /* DECTME */
1133 return TERM_CMD_DECTME;
1134 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSSDT */
1135 return TERM_CMD_DECSSDT;
1136 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECDC */
1137 return TERM_CMD_DECDC;
1141 return TERM_CMD_NONE;
1146 * This parser controls the parser-state and returns any detected sequence to
1147 * the caller. The parser is based on this state-diagram from Paul Williams:
1148 * http://vt100.net/emu/
1149 * It was written from scratch and extended where needed.
1150 * This parser is fully compatible up to the vt500 series. We expect UCS-4 as
1151 * input. It's the callers responsibility to do any UTF-8 parsing.
1155 STATE_NONE, /* placeholder */
1156 STATE_GROUND, /* initial state and ground */
1157 STATE_ESC, /* ESC sequence was started */
1158 STATE_ESC_INT, /* intermediate escape characters */
1159 STATE_CSI_ENTRY, /* starting CSI sequence */
1160 STATE_CSI_PARAM, /* CSI parameters */
1161 STATE_CSI_INT, /* intermediate CSI characters */
1162 STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */
1163 STATE_DCS_ENTRY, /* starting DCS sequence */
1164 STATE_DCS_PARAM, /* DCS parameters */
1165 STATE_DCS_INT, /* intermediate DCS characters */
1166 STATE_DCS_PASS, /* DCS data passthrough */
1167 STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */
1168 STATE_OSC_STRING, /* parsing OSC sequence */
1169 STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */
1173 enum parser_action {
1174 ACTION_NONE, /* placeholder */
1175 ACTION_CLEAR, /* clear parameters */
1176 ACTION_IGNORE, /* ignore the character entirely */
1177 ACTION_PRINT, /* print the character on the console */
1178 ACTION_EXECUTE, /* execute single control character (C0/C1) */
1179 ACTION_COLLECT, /* collect intermediate character */
1180 ACTION_PARAM, /* collect parameter character */
1181 ACTION_ESC_DISPATCH, /* dispatch escape sequence */
1182 ACTION_CSI_DISPATCH, /* dispatch csi sequence */
1183 ACTION_DCS_START, /* start of DCS data */
1184 ACTION_DCS_COLLECT, /* collect DCS data */
1185 ACTION_DCS_CONSUME, /* consume DCS terminator */
1186 ACTION_DCS_DISPATCH, /* dispatch dcs sequence */
1187 ACTION_OSC_START, /* start of OSC data */
1188 ACTION_OSC_COLLECT, /* collect OSC data */
1189 ACTION_OSC_CONSUME, /* consume OSC terminator */
1190 ACTION_OSC_DISPATCH, /* dispatch osc sequence */
1194 int term_parser_new(term_parser **out, bool host) {
1195 _term_parser_free_ term_parser *parser = NULL;
1197 assert_return(out, -EINVAL);
1199 parser = new0(term_parser, 1);
1203 parser->is_host = host;
1204 parser->st_alloc = 64;
1205 parser->seq.st = new0(char, parser->st_alloc + 1);
1206 if (!parser->seq.st)
1214 term_parser *term_parser_free(term_parser *parser) {
1218 free(parser->seq.st);
1223 static inline void parser_clear(term_parser *parser) {
1226 parser->seq.command = TERM_CMD_NONE;
1227 parser->seq.terminator = 0;
1228 parser->seq.intermediates = 0;
1229 parser->seq.charset = TERM_CHARSET_NONE;
1230 parser->seq.n_args = 0;
1231 for (i = 0; i < TERM_PARSER_ARG_MAX; ++i)
1232 parser->seq.args[i] = -1;
1234 parser->seq.n_st = 0;
1235 parser->seq.st[0] = 0;
1238 static int parser_ignore(term_parser *parser, uint32_t raw) {
1239 parser_clear(parser);
1240 parser->seq.type = TERM_SEQ_IGNORE;
1241 parser->seq.command = TERM_CMD_NONE;
1242 parser->seq.terminator = raw;
1243 parser->seq.charset = TERM_CHARSET_NONE;
1245 return parser->seq.type;
1248 static int parser_print(term_parser *parser, uint32_t raw) {
1249 parser_clear(parser);
1250 parser->seq.type = TERM_SEQ_GRAPHIC;
1251 parser->seq.command = TERM_CMD_GRAPHIC;
1252 parser->seq.terminator = raw;
1253 parser->seq.charset = TERM_CHARSET_NONE;
1255 return parser->seq.type;
1258 static int parser_execute(term_parser *parser, uint32_t raw) {
1259 parser_clear(parser);
1260 parser->seq.type = TERM_SEQ_CONTROL;
1261 parser->seq.command = TERM_CMD_GRAPHIC;
1262 parser->seq.terminator = raw;
1263 parser->seq.charset = TERM_CHARSET_NONE;
1264 if (!parser->is_host)
1265 parser->seq.command = term_parse_host_control(&parser->seq);
1267 return parser->seq.type;
1270 static void parser_collect(term_parser *parser, uint32_t raw) {
1272 * Usually, characters from 0x30 to 0x3f are only allowed as leading
1273 * markers (or as part of the parameters), characters from 0x20 to 0x2f
1274 * are only allowed as trailing markers. However, our state-machine
1275 * already verifies those restrictions so we can handle them the same
1276 * way here. Note that we safely allow markers to be specified multiple
1280 if (raw >= 0x20 && raw <= 0x3f)
1281 parser->seq.intermediates |= 1 << (raw - 0x20);
1284 static void parser_param(term_parser *parser, uint32_t raw) {
1288 if (parser->seq.n_args < TERM_PARSER_ARG_MAX)
1289 ++parser->seq.n_args;
1294 if (parser->seq.n_args >= TERM_PARSER_ARG_MAX)
1297 if (raw >= '0' && raw <= '9') {
1298 new = parser->seq.args[parser->seq.n_args];
1301 new = new * 10 + raw - '0';
1303 /* VT510 tells us to clamp all values to [0, 9999], however, it
1304 * also allows commands with values up to 2^15-1. We simply use
1305 * 2^16 as maximum here to be compatible to all commands, but
1306 * avoid overflows in any calculations. */
1310 parser->seq.args[parser->seq.n_args] = new;
1314 static int parser_esc(term_parser *parser, uint32_t raw) {
1315 parser->seq.type = TERM_SEQ_ESCAPE;
1316 parser->seq.command = TERM_CMD_NONE;
1317 parser->seq.terminator = raw;
1318 parser->seq.charset = TERM_CHARSET_NONE;
1319 if (!parser->is_host)
1320 parser->seq.command = term_parse_host_escape(&parser->seq, &parser->seq.charset);
1322 return parser->seq.type;
1325 static int parser_csi(term_parser *parser, uint32_t raw) {
1326 /* parser->seq is cleared during CSI-ENTER state, thus there's no need
1327 * to clear invalid fields here. */
1329 if (parser->seq.n_args < TERM_PARSER_ARG_MAX) {
1330 if (parser->seq.n_args > 0 ||
1331 parser->seq.args[parser->seq.n_args] >= 0)
1332 ++parser->seq.n_args;
1335 parser->seq.type = TERM_SEQ_CSI;
1336 parser->seq.command = TERM_CMD_NONE;
1337 parser->seq.terminator = raw;
1338 parser->seq.charset = TERM_CHARSET_NONE;
1339 if (!parser->is_host)
1340 parser->seq.command = term_parse_host_csi(&parser->seq);
1342 return parser->seq.type;
1345 /* perform state transition and dispatch related actions */
1346 static int parser_transition(term_parser *parser, uint32_t raw, unsigned int state, unsigned int action) {
1347 if (state != STATE_NONE)
1348 parser->state = state;
1352 return TERM_SEQ_NONE;
1354 parser_clear(parser);
1355 return TERM_SEQ_NONE;
1357 return parser_ignore(parser, raw);
1359 return parser_print(parser, raw);
1360 case ACTION_EXECUTE:
1361 return parser_execute(parser, raw);
1362 case ACTION_COLLECT:
1363 parser_collect(parser, raw);
1364 return TERM_SEQ_NONE;
1366 parser_param(parser, raw);
1367 return TERM_SEQ_NONE;
1368 case ACTION_ESC_DISPATCH:
1369 return parser_esc(parser, raw);
1370 case ACTION_CSI_DISPATCH:
1371 return parser_csi(parser, raw);
1372 case ACTION_DCS_START:
1373 /* not implemented */
1374 return TERM_SEQ_NONE;
1375 case ACTION_DCS_COLLECT:
1376 /* not implemented */
1377 return TERM_SEQ_NONE;
1378 case ACTION_DCS_CONSUME:
1379 /* not implemented */
1380 return TERM_SEQ_NONE;
1381 case ACTION_DCS_DISPATCH:
1382 /* not implemented */
1383 return TERM_SEQ_NONE;
1384 case ACTION_OSC_START:
1385 /* not implemented */
1386 return TERM_SEQ_NONE;
1387 case ACTION_OSC_COLLECT:
1388 /* not implemented */
1389 return TERM_SEQ_NONE;
1390 case ACTION_OSC_CONSUME:
1391 /* not implemented */
1392 return TERM_SEQ_NONE;
1393 case ACTION_OSC_DISPATCH:
1394 /* not implemented */
1395 return TERM_SEQ_NONE;
1397 assert_not_reached("invalid vte-parser action");
1398 return TERM_SEQ_NONE;
1402 static int parser_feed_to_state(term_parser *parser, uint32_t raw) {
1403 switch (parser->state) {
1406 * During initialization, parser->state is cleared. Treat this
1407 * as STATE_GROUND. We will then never get to STATE_NONE again.
1411 case 0x00 ... 0x1f: /* C0 */
1412 case 0x80 ... 0x9b: /* C1 \ { ST } */
1414 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1416 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1419 return parser_transition(parser, raw, STATE_NONE, ACTION_PRINT);
1422 case 0x00 ... 0x1f: /* C0 */
1423 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1424 case 0x20 ... 0x2f: /* [' ' - '\'] */
1425 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1426 case 0x30 ... 0x4f: /* ['0' - '~'] \ { 'P', 'X', '[', ']', '^', '_' } */
1431 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1432 case 0x50: /* 'P' */
1433 return parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1434 case 0x5b: /* '[' */
1435 return parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1436 case 0x5d: /* ']' */
1437 return parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1438 case 0x58: /* 'X' */
1439 case 0x5e: /* '^' */
1440 case 0x5f: /* '_' */
1441 return parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1442 case 0x7f: /* DEL */
1443 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1445 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1448 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1451 case 0x00 ... 0x1f: /* C0 */
1452 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1453 case 0x20 ... 0x2f: /* [' ' - '\'] */
1454 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1455 case 0x30 ... 0x7e: /* ['0' - '~'] */
1456 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1457 case 0x7f: /* DEL */
1458 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1460 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1463 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1464 case STATE_CSI_ENTRY:
1466 case 0x00 ... 0x1f: /* C0 */
1467 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1468 case 0x20 ... 0x2f: /* [' ' - '\'] */
1469 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1470 case 0x3a: /* ':' */
1471 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1472 case 0x30 ... 0x39: /* ['0' - '9'] */
1473 case 0x3b: /* ';' */
1474 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_PARAM);
1475 case 0x3c ... 0x3f: /* ['<' - '?'] */
1476 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_COLLECT);
1477 case 0x40 ... 0x7e: /* ['@' - '~'] */
1478 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1479 case 0x7f: /* DEL */
1480 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1482 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1485 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1486 case STATE_CSI_PARAM:
1488 case 0x00 ... 0x1f: /* C0 */
1489 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1490 case 0x20 ... 0x2f: /* [' ' - '\'] */
1491 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1492 case 0x30 ... 0x39: /* ['0' - '9'] */
1493 case 0x3b: /* ';' */
1494 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1495 case 0x3a: /* ':' */
1496 case 0x3c ... 0x3f: /* ['<' - '?'] */
1497 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1498 case 0x40 ... 0x7e: /* ['@' - '~'] */
1499 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1500 case 0x7f: /* DEL */
1501 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1503 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1506 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1509 case 0x00 ... 0x1f: /* C0 */
1510 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1511 case 0x20 ... 0x2f: /* [' ' - '\'] */
1512 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1513 case 0x30 ... 0x3f: /* ['0' - '?'] */
1514 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1515 case 0x40 ... 0x7e: /* ['@' - '~'] */
1516 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1517 case 0x7f: /* DEL */
1518 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1520 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1523 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1524 case STATE_CSI_IGNORE:
1526 case 0x00 ... 0x1f: /* C0 */
1527 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1528 case 0x20 ... 0x3f: /* [' ' - '?'] */
1529 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1530 case 0x40 ... 0x7e: /* ['@' - '~'] */
1531 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1532 case 0x7f: /* DEL */
1533 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1535 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1538 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1539 case STATE_DCS_ENTRY:
1541 case 0x00 ... 0x1f: /* C0 */
1542 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1543 case 0x20 ... 0x2f: /* [' ' - '\'] */
1544 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1545 case 0x3a: /* ':' */
1546 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1547 case 0x30 ... 0x39: /* ['0' - '9'] */
1548 case 0x3b: /* ';' */
1549 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_PARAM);
1550 case 0x3c ... 0x3f: /* ['<' - '?'] */
1551 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_COLLECT);
1552 case 0x40 ... 0x7e: /* ['@' - '~'] */
1553 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1554 case 0x7f: /* DEL */
1555 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1557 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1560 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1561 case STATE_DCS_PARAM:
1563 case 0x00 ... 0x1f: /* C0 */
1564 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1565 case 0x20 ... 0x2f: /* [' ' - '\'] */
1566 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1567 case 0x30 ... 0x39: /* ['0' - '9'] */
1568 case 0x3b: /* ';' */
1569 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1570 case 0x3a: /* ':' */
1571 case 0x3c ... 0x3f: /* ['<' - '?'] */
1572 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1573 case 0x40 ... 0x7e: /* ['@' - '~'] */
1574 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1575 case 0x7f: /* DEL */
1576 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1578 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1581 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1584 case 0x00 ... 0x1f: /* C0 */
1585 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1586 case 0x20 ... 0x2f: /* [' ' - '\'] */
1587 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1588 case 0x30 ... 0x3f: /* ['0' - '?'] */
1589 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1590 case 0x40 ... 0x7e: /* ['@' - '~'] */
1591 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1592 case 0x7f: /* DEL */
1593 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1595 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1598 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1599 case STATE_DCS_PASS:
1601 case 0x00 ... 0x7e: /* ASCII \ { DEL } */
1602 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1603 case 0x7f: /* DEL */
1604 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1606 return parser_transition(parser, raw, STATE_GROUND, ACTION_DCS_DISPATCH);
1609 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1610 case STATE_DCS_IGNORE:
1612 case 0x00 ... 0x7f: /* ASCII */
1613 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1615 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1618 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1619 case STATE_OSC_STRING:
1621 case 0x00 ... 0x06: /* C0 \ { BEL } */
1623 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1624 case 0x20 ... 0x7f: /* [' ' - DEL] */
1625 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1626 case 0x07: /* BEL */
1628 return parser_transition(parser, raw, STATE_GROUND, ACTION_OSC_DISPATCH);
1631 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1632 case STATE_ST_IGNORE:
1634 case 0x00 ... 0x7f: /* ASCII */
1635 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1637 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1640 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1643 assert_not_reached("bad vte-parser state");
1647 int term_parser_feed(term_parser *parser, const term_seq **seq_out, uint32_t raw) {
1650 assert_return(parser, -EINVAL);
1651 assert_return(seq_out, -EINVAL);
1655 * * DEC treats GR codes as GL. We don't do that as we require UTF-8
1656 * as charset and, thus, it doesn't make sense to treat GR special.
1657 * * During control sequences, unexpected C1 codes cancel the sequence
1658 * and immediately start a new one. C0 codes, however, may or may not
1659 * be ignored/executed depending on the sequence.
1663 case 0x18: /* CAN */
1664 r = parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1666 case 0x1a: /* SUB */
1667 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1669 case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, CSI, ST, OSC, PM, APC} */
1672 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1674 case 0x1b: /* ESC */
1675 r = parser_transition(parser, raw, STATE_ESC, ACTION_CLEAR);
1677 case 0x98: /* SOS */
1679 case 0x9f: /* APC */
1680 r = parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1682 case 0x90: /* DCS */
1683 r = parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1685 case 0x9d: /* OSC */
1686 r = parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1688 case 0x9b: /* CSI */
1689 r = parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1692 r = parser_feed_to_state(parser, raw);
1699 *seq_out = &parser->seq;