1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The
25 * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST
26 * control sequences and generic escape sequences.
27 * The parser itself does not perform any actions but lets the caller react to
35 #include "term-internal.h"
38 static const uint8_t default_palette[18][3] = {
39 { 0, 0, 0 }, /* black */
40 { 205, 0, 0 }, /* red */
41 { 0, 205, 0 }, /* green */
42 { 205, 205, 0 }, /* yellow */
43 { 0, 0, 238 }, /* blue */
44 { 205, 0, 205 }, /* magenta */
45 { 0, 205, 205 }, /* cyan */
46 { 229, 229, 229 }, /* light grey */
47 { 127, 127, 127 }, /* dark grey */
48 { 255, 0, 0 }, /* light red */
49 { 0, 255, 0 }, /* light green */
50 { 255, 255, 0 }, /* light yellow */
51 { 92, 92, 255 }, /* light blue */
52 { 255, 0, 255 }, /* light magenta */
53 { 0, 255, 255 }, /* light cyan */
54 { 255, 255, 255 }, /* white */
56 { 229, 229, 229 }, /* light grey */
57 { 0, 0, 0 }, /* black */
60 static uint32_t term_color_to_argb32(const term_color *color, const term_attr *attr, const uint8_t *palette) {
61 static const uint8_t bval[] = {
70 palette = (void*)default_palette;
72 switch (color->ccode) {
82 r = palette[t * 3 + 0];
83 g = palette[t * 3 + 1];
84 b = palette[t * 3 + 2];
93 t = (t - 232) * 10 + 8;
100 case TERM_CCODE_BLACK ... TERM_CCODE_LIGHT_WHITE:
101 t = color->ccode - TERM_CCODE_BLACK;
103 /* bold causes light colors (only for foreground colors) */
104 if (t < 8 && attr->bold && color == &attr->fg)
107 r = palette[t * 3 + 0];
108 g = palette[t * 3 + 1];
109 b = palette[t * 3 + 2];
111 case TERM_CCODE_DEFAULT:
114 t = 16 + !(color == &attr->fg);
115 r = palette[t * 3 + 0];
116 g = palette[t * 3 + 1];
117 b = palette[t * 3 + 2];
121 return (0xff << 24) | (r << 16) | (g << 8) | b;
125 * term_attr_to_argb32() - Encode terminal colors as native ARGB32 value
126 * @color: Terminal attributes to work on
127 * @fg: Storage for foreground color (or NULL)
128 * @bg: Storage for background color (or NULL)
129 * @palette: The color palette to use (or NULL for default)
131 * This encodes the colors attr->fg and attr->bg as native-endian ARGB32 values
132 * and returns them. Any color conversions are automatically applied.
134 void term_attr_to_argb32(const term_attr *attr, uint32_t *fg, uint32_t *bg, const uint8_t *palette) {
139 f = term_color_to_argb32(&attr->fg, attr, palette);
140 b = term_color_to_argb32(&attr->bg, attr, palette);
155 * term_utf8_encode() - Encode single UCS-4 character as UTF-8
156 * @out_utf8: output buffer of at least 4 bytes or NULL
157 * @g: UCS-4 character to encode
159 * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
160 * The length of the character is returned. It is not zero-terminated! If the
161 * output buffer is NULL, only the length is returned.
163 * Returns: The length in bytes that the UTF-8 representation does or would
166 size_t term_utf8_encode(char *out_utf8, uint32_t g) {
169 out_utf8[0] = g & 0x7f;
171 } else if (g < (1 << 11)) {
173 out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
174 out_utf8[1] = 0x80 | (g & 0x3f);
177 } else if (g < (1 << 16)) {
179 out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
180 out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
181 out_utf8[2] = 0x80 | (g & 0x3f);
184 } else if (g < (1 << 21)) {
186 out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
187 out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
188 out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
189 out_utf8[3] = 0x80 | (g & 0x3f);
198 * term_utf8_decode() - Try decoding the next UCS-4 character
199 * @p: decoder object to operate on or NULL
200 * @out_len: output storage for pointer to decoded UCS-4 string or NULL
201 * @c: next char to push into decoder
203 * This decodes a UTF-8 stream. It must be called for each input-byte of the
204 * UTF-8 stream and returns a UCS-4 stream. A pointer to the parsed UCS-4
205 * string is stored in @out_buf if non-NULL. The length of this string (number
206 * of parsed UCS4 characters) is returned as result. The string is not
207 * zero-terminated! Furthermore, the string is only valid until the next
208 * invocation of this function. It is also bound to the parser state @p and
209 * must not be freed nor written to by the caller.
211 * This function is highly optimized to work with terminal-emulators. Instead
212 * of being strict about UTF-8 validity, this tries to perform a fallback to
213 * ISO-8859-1 in case a wrong series was detected. Therefore, this function
214 * might return multiple UCS-4 characters by parsing just a single UTF-8 byte.
216 * The parser state @p should be allocated and managed by the caller. There're
217 * no helpers to do that for you. To initialize it, simply reset it to all
218 * zero. You can reset or free the object at any point in time.
220 * Returns: Number of parsed UCS4 characters
222 size_t term_utf8_decode(term_utf8 *p, uint32_t **out_buf, char c) {
223 static uint32_t ucs4_null = 0;
224 uint32_t t, *res = NULL;
233 if (!p->valid || p->i_bytes >= p->n_bytes) {
235 * If the previous sequence was invalid or fully parsed, start
236 * parsing a fresh new sequence.
239 if ((byte & 0xE0) == 0xC0) {
240 /* start of two byte sequence */
245 } else if ((byte & 0xF0) == 0xE0) {
246 /* start of three byte sequence */
251 } else if ((byte & 0xF8) == 0xF0) {
252 /* start of four byte sequence */
259 * - single ASCII 7-bit char
260 * - out-of-sync continuation byte
261 * - overlong encoding
262 * All of them are treated as single byte ISO-8859-1 */
270 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
273 * ..otherwise, try to continue the previous sequence..
276 if ((byte & 0xC0) == 0x80) {
278 * Valid continuation byte. Append to sequence and
279 * update the ucs4 cache accordingly.
283 p->chars[p->i_bytes++] = byte;
284 p->ucs4 |= t << (6 * (p->n_bytes - p->i_bytes));
287 * Invalid continuation? Treat cached sequence as
288 * ISO-8859-1, but parse the new char as valid new
289 * starting character. If it's a new single-byte UTF-8
290 * sequence, we immediately return it in the same run,
291 * otherwise, we might suffer from starvation.
294 if ((byte & 0xE0) == 0xC0 ||
295 (byte & 0xF0) == 0xE0 ||
296 (byte & 0xF8) == 0xF0) {
298 * New multi-byte sequence. Move to-be-returned
299 * data at the end and start new sequence. Only
300 * return the old sequence.
303 memmove(p->chars + 1,
305 sizeof(*p->chars) * p->i_bytes);
309 if ((byte & 0xE0) == 0xC0) {
310 /* start of two byte sequence */
315 } else if ((byte & 0xF0) == 0xE0) {
316 /* start of three byte sequence */
321 } else if ((byte & 0xF8) == 0xF0) {
322 /* start of four byte sequence */
328 assert_not_reached("Should not happen");
331 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
336 * New single byte sequence, append to output
337 * and return combined sequence.
340 p->chars[p->i_bytes++] = byte;
347 * Check whether a full sequence (valid or invalid) has been parsed and
348 * then return it. Otherwise, return nothing.
351 /* still parsing? then bail out */
352 if (p->i_bytes < p->n_bytes)
368 *out_buf = res ? : &ucs4_null;
374 * The ctl-seq parser "term_parser" only detects whole sequences, it does not
375 * detect the specific command. Once a sequence is parsed, the command-parsers
376 * are used to figure out their meaning. Note that this depends on whether we
377 * run on the host or terminal side.
380 static unsigned int term_parse_host_control(const term_seq *seq) {
381 assert_return(seq, TERM_CMD_NONE);
383 switch (seq->terminator) {
385 return TERM_CMD_NULL;
411 /* this is already handled by the state-machine */
416 /* this is already handled by the state-machine */
419 /* this is already handled by the state-machine */
434 /* this is already handled by the state-machine */
441 /* this is already handled by the state-machine */
443 case 0x9a: /* DECID */
444 return TERM_CMD_DECID;
446 /* this is already handled by the state-machine */
451 /* this is already handled by the state-machine */
454 /* this is already handled by the state-machine */
457 /* this is already handled by the state-machine */
461 return TERM_CMD_NONE;
464 static inline int charset_from_cmd(uint32_t raw, unsigned int flags, bool require_96) {
465 static const struct {
469 /* 96-compat charsets */
470 [TERM_CHARSET_ISO_LATIN1_SUPPLEMENTAL] = { .raw = 'A', .flags = 0 },
471 [TERM_CHARSET_ISO_LATIN2_SUPPLEMENTAL] = { .raw = 'B', .flags = 0 },
472 [TERM_CHARSET_ISO_LATIN5_SUPPLEMENTAL] = { .raw = 'M', .flags = 0 },
473 [TERM_CHARSET_ISO_GREEK_SUPPLEMENTAL] = { .raw = 'F', .flags = 0 },
474 [TERM_CHARSET_ISO_HEBREW_SUPPLEMENTAL] = { .raw = 'H', .flags = 0 },
475 [TERM_CHARSET_ISO_LATIN_CYRILLIC] = { .raw = 'L', .flags = 0 },
477 /* 94-compat charsets */
478 [TERM_CHARSET_DEC_SPECIAL_GRAPHIC] = { .raw = '0', .flags = 0 },
479 [TERM_CHARSET_DEC_SUPPLEMENTAL] = { .raw = '5', .flags = TERM_SEQ_FLAG_PERCENT },
480 [TERM_CHARSET_DEC_TECHNICAL] = { .raw = '>', .flags = 0 },
481 [TERM_CHARSET_CYRILLIC_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_AND },
482 [TERM_CHARSET_DUTCH_NRCS] = { .raw = '4', .flags = 0 },
483 [TERM_CHARSET_FINNISH_NRCS] = { .raw = '5', .flags = 0 },
484 [TERM_CHARSET_FRENCH_NRCS] = { .raw = 'R', .flags = 0 },
485 [TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = '9', .flags = 0 },
486 [TERM_CHARSET_GERMAN_NRCS] = { .raw = 'K', .flags = 0 },
487 [TERM_CHARSET_GREEK_DEC] = { .raw = '?', .flags = TERM_SEQ_FLAG_DQUOTE },
488 [TERM_CHARSET_GREEK_NRCS] = { .raw = '>', .flags = TERM_SEQ_FLAG_DQUOTE },
489 [TERM_CHARSET_HEBREW_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_DQUOTE },
490 [TERM_CHARSET_HEBREW_NRCS] = { .raw = '=', .flags = TERM_SEQ_FLAG_PERCENT },
491 [TERM_CHARSET_ITALIAN_NRCS] = { .raw = 'Y', .flags = 0 },
492 [TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '`', .flags = 0 },
493 [TERM_CHARSET_PORTUGUESE_NRCS] = { .raw = '6', .flags = TERM_SEQ_FLAG_PERCENT },
494 [TERM_CHARSET_RUSSIAN_NRCS] = { .raw = '5', .flags = TERM_SEQ_FLAG_AND },
495 [TERM_CHARSET_SCS_NRCS] = { .raw = '3', .flags = TERM_SEQ_FLAG_PERCENT },
496 [TERM_CHARSET_SPANISH_NRCS] = { .raw = 'Z', .flags = 0 },
497 [TERM_CHARSET_SWEDISH_NRCS] = { .raw = '7', .flags = 0 },
498 [TERM_CHARSET_SWISS_NRCS] = { .raw = '=', .flags = 0 },
499 [TERM_CHARSET_TURKISH_DEC] = { .raw = '0', .flags = TERM_SEQ_FLAG_PERCENT },
500 [TERM_CHARSET_TURKISH_NRCS] = { .raw = '2', .flags = TERM_SEQ_FLAG_PERCENT },
502 /* special charsets */
503 [TERM_CHARSET_USERPREF_SUPPLEMENTAL] = { .raw = '<', .flags = 0 },
505 /* secondary choices */
506 [TERM_CHARSET_CNT + TERM_CHARSET_FINNISH_NRCS] = { .raw = 'C', .flags = 0 },
507 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_NRCS] = { .raw = 'f', .flags = 0 },
508 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = 'Q', .flags = 0 },
509 [TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = 'E', .flags = 0 },
510 [TERM_CHARSET_CNT + TERM_CHARSET_SWEDISH_NRCS] = { .raw = 'H', .flags = 0 }, /* unused; conflicts with ISO_HEBREW */
512 /* tertiary choices */
513 [TERM_CHARSET_CNT + TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '6', .flags = 0 },
518 * Secondary choice on SWEDISH_NRCS and primary choice on
519 * ISO_HEBREW_SUPPLEMENTAL have a conflict: raw=="H", flags==0.
520 * We always choose the ISO 96-compat set, which is what VT510 does.
523 for (i = 0; i < ELEMENTSOF(charset_cmds); ++i) {
524 if (charset_cmds[i].raw == raw && charset_cmds[i].flags == flags) {
526 while (cs >= TERM_CHARSET_CNT)
527 cs -= TERM_CHARSET_CNT;
529 if (!require_96 || cs < TERM_CHARSET_96_CNT || cs >= TERM_CHARSET_94_CNT)
537 /* true if exactly one bit in @value is set */
538 static inline bool exactly_one_bit_set(unsigned int value) {
539 return __builtin_popcount(value) == 1;
542 static unsigned int term_parse_host_escape(const term_seq *seq, unsigned int *cs_out) {
543 unsigned int t, flags;
546 assert_return(seq, TERM_CMD_NONE);
548 flags = seq->intermediates;
549 t = TERM_SEQ_FLAG_POPEN | TERM_SEQ_FLAG_PCLOSE | TERM_SEQ_FLAG_MULT |
550 TERM_SEQ_FLAG_PLUS | TERM_SEQ_FLAG_MINUS | TERM_SEQ_FLAG_DOT |
553 if (exactly_one_bit_set(flags & t)) {
555 case TERM_SEQ_FLAG_POPEN:
556 case TERM_SEQ_FLAG_PCLOSE:
557 case TERM_SEQ_FLAG_MULT:
558 case TERM_SEQ_FLAG_PLUS:
559 cs = charset_from_cmd(seq->terminator, flags & ~t, false);
561 case TERM_SEQ_FLAG_MINUS:
562 case TERM_SEQ_FLAG_DOT:
563 case TERM_SEQ_FLAG_SLASH:
564 cs = charset_from_cmd(seq->terminator, flags & ~t, true);
577 /* looked like a charset-cmd but wasn't; continue */
580 switch (seq->terminator) {
582 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL top-half */
583 return TERM_CMD_DECDHL_TH;
586 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL bottom-half */
587 return TERM_CMD_DECDHL_BH;
590 if (flags == TERM_SEQ_FLAG_HASH) /* DECSWL */
591 return TERM_CMD_DECSWL;
594 if (flags == 0) /* DECBI */
595 return TERM_CMD_DECBI;
596 else if (flags == TERM_SEQ_FLAG_HASH) /* DECDWL */
597 return TERM_CMD_DECDWL;
600 if (flags == 0) /* DECSC */
601 return TERM_CMD_DECSC;
604 if (flags == 0) /* DECRC */
605 return TERM_CMD_DECRC;
606 else if (flags == TERM_SEQ_FLAG_HASH) /* DECALN */
607 return TERM_CMD_DECALN;
610 if (flags == 0) /* DECFI */
611 return TERM_CMD_DECFI;
614 if (flags == 0) /* DECANM */
615 return TERM_CMD_DECANM;
618 if (flags == 0) /* DECKPAM */
619 return TERM_CMD_DECKPAM;
622 if (flags == 0) /* DECKPNM */
623 return TERM_CMD_DECKPNM;
626 if (flags == TERM_SEQ_FLAG_PERCENT) {
627 /* Select default character set */
628 return TERM_CMD_XTERM_SDCS;
632 if (flags == 0) /* IND */
636 if (flags == 0) /* NEL */
640 if (flags == 0) /* Cursor to lower-left corner of screen */
641 return TERM_CMD_XTERM_CLLHP;
642 else if (flags == TERM_SEQ_FLAG_SPACE) /* S7C1T */
643 return TERM_CMD_S7C1T;
646 if (flags == TERM_SEQ_FLAG_SPACE) { /* S8C1T */
647 return TERM_CMD_S8C1T;
648 } else if (flags == TERM_SEQ_FLAG_PERCENT) {
649 /* Select UTF-8 character set */
650 return TERM_CMD_XTERM_SUCS;
654 if (flags == 0) /* HTS */
658 if (flags == TERM_SEQ_FLAG_SPACE) {
659 /* Set ANSI conformance level 1 */
660 return TERM_CMD_XTERM_SACL1;
664 if (flags == 0) { /* RI */
666 } else if (flags == TERM_SEQ_FLAG_SPACE) {
667 /* Set ANSI conformance level 2 */
668 return TERM_CMD_XTERM_SACL2;
672 if (flags == 0) { /* SS2 */
674 } else if (flags == TERM_SEQ_FLAG_SPACE) {
675 /* Set ANSI conformance level 3 */
676 return TERM_CMD_XTERM_SACL3;
680 if (flags == 0) /* SS3 */
684 if (flags == 0) /* DCS: this is already handled by the state-machine */
688 if (flags == 0) /* SPA */
692 if (flags == 0) /* EPA */
696 if (flags == 0) { /* SOS */
697 /* this is already handled by the state-machine */
702 if (flags == 0) /* DECID */
703 return TERM_CMD_DECID;
706 if (flags == 0) { /* CSI */
707 /* this is already handled by the state-machine */
712 if (flags == 0) /* ST */
716 if (flags == 0) { /* OSC */
717 /* this is already handled by the state-machine */
722 if (flags == 0) { /* PM */
723 /* this is already handled by the state-machine */
728 if (flags == 0) { /* APC */
729 /* this is already handled by the state-machine */
734 if (flags == 0) /* RIS */
738 if (flags == 0) /* Memory lock */
739 return TERM_CMD_XTERM_MLHP;
742 if (flags == 0) /* Memory unlock */
743 return TERM_CMD_XTERM_MUHP;
746 if (flags == 0) /* LS2 */
750 if (flags == 0) /* LS3 */
754 if (flags == 0) /* LS3R */
755 return TERM_CMD_LS3R;
758 if (flags == 0) /* LS2R */
759 return TERM_CMD_LS2R;
762 if (flags == 0) /* LS1R */
763 return TERM_CMD_LS1R;
767 return TERM_CMD_NONE;
770 static unsigned int term_parse_host_csi(const term_seq *seq) {
773 assert_return(seq, TERM_CMD_NONE);
775 flags = seq->intermediates;
777 switch (seq->terminator) {
779 if (flags == 0) /* CUU */
783 if (flags == 0) /* HPR */
787 if (flags == 0) /* CUD */
791 if (flags == 0) /* REP */
795 if (flags == 0) /* CUF */
799 if (flags == 0) /* DA1 */
801 else if (flags == TERM_SEQ_FLAG_GT) /* DA2 */
803 else if (flags == TERM_SEQ_FLAG_EQUAL) /* DA3 */
807 if (flags == 0) /* CUB */
811 if (flags == 0) /* VPA */
815 if (flags == 0) /* CNL */
819 if (flags == 0) /* VPR */
823 if (flags == 0) /* CPL */
827 if (flags == 0) /* HVP */
831 if (flags == 0) /* CHA */
835 if (flags == 0) /* TBC */
837 else if (flags == TERM_SEQ_FLAG_MULT) /* DECLFKC */
838 return TERM_CMD_DECLFKC;
841 if (flags == 0) /* CUP */
845 if (flags == 0) /* SM ANSI */
846 return TERM_CMD_SM_ANSI;
847 else if (flags == TERM_SEQ_FLAG_WHAT) /* SM DEC */
848 return TERM_CMD_SM_DEC;
851 if (flags == 0) /* CHT */
855 if (flags == 0) /* MC ANSI */
856 return TERM_CMD_MC_ANSI;
857 else if (flags == TERM_SEQ_FLAG_WHAT) /* MC DEC */
858 return TERM_CMD_MC_DEC;
861 if (flags == 0) /* ED */
863 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSED */
864 return TERM_CMD_DECSED;
867 if (flags == 0) /* EL */
869 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSEL */
870 return TERM_CMD_DECSEL;
873 if (flags == 0) /* IL */
877 if (flags == 0) /* RM ANSI */
878 return TERM_CMD_RM_ANSI;
879 else if (flags == TERM_SEQ_FLAG_WHAT) /* RM DEC */
880 return TERM_CMD_RM_DEC;
883 if (flags == 0) /* DL */
887 if (flags == 0) /* SGR */
889 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SMR */
890 return TERM_CMD_XTERM_SRV;
893 if (flags == 0) /* DSR ANSI */
894 return TERM_CMD_DSR_ANSI;
895 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM RMR */
896 return TERM_CMD_XTERM_RRV;
897 else if (flags == TERM_SEQ_FLAG_WHAT) /* DSR DEC */
898 return TERM_CMD_DSR_DEC;
901 if (flags == 0) /* DCH */
903 else if (flags == TERM_SEQ_FLAG_SPACE) /* PPA */
907 if (flags == 0) /* DECSSL */
908 return TERM_CMD_DECSSL;
909 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSSCLS */
910 return TERM_CMD_DECSSCLS;
911 else if (flags == TERM_SEQ_FLAG_BANG) /* DECSTR */
912 return TERM_CMD_DECSTR;
913 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCL */
914 return TERM_CMD_DECSCL;
915 else if (flags == TERM_SEQ_FLAG_CASH) /* DECRQM-ANSI */
916 return TERM_CMD_DECRQM_ANSI;
917 else if (flags == (TERM_SEQ_FLAG_CASH | TERM_SEQ_FLAG_WHAT)) /* DECRQM-DEC */
918 return TERM_CMD_DECRQM_DEC;
919 else if (flags == TERM_SEQ_FLAG_PCLOSE) /* DECSDPT */
920 return TERM_CMD_DECSDPT;
921 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSPPCS */
922 return TERM_CMD_DECSPPCS;
923 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSR */
924 return TERM_CMD_DECSR;
925 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECLTOD */
926 return TERM_CMD_DECLTOD;
927 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SPM */
928 return TERM_CMD_XTERM_SPM;
931 if (flags == TERM_SEQ_FLAG_SPACE) /* PPR */
935 if (flags == 0) /* DECLL */
936 return TERM_CMD_DECLL;
937 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSCUSR */
938 return TERM_CMD_DECSCUSR;
939 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCA */
940 return TERM_CMD_DECSCA;
941 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSDDT */
942 return TERM_CMD_DECSDDT;
943 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSRC */
944 return TERM_CMD_DECSR;
945 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECELF */
946 return TERM_CMD_DECELF;
947 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECTID */
948 return TERM_CMD_DECTID;
951 if (flags == TERM_SEQ_FLAG_SPACE) /* PPB */
957 return TERM_CMD_DECSTBM;
958 } else if (flags == TERM_SEQ_FLAG_SPACE) {
960 return TERM_CMD_DECSKCV;
961 } else if (flags == TERM_SEQ_FLAG_CASH) {
963 return TERM_CMD_DECCARA;
964 } else if (flags == TERM_SEQ_FLAG_MULT) {
966 return TERM_CMD_DECSCS;
967 } else if (flags == TERM_SEQ_FLAG_PLUS) {
969 return TERM_CMD_DECSMKR;
970 } else if (flags == TERM_SEQ_FLAG_WHAT) {
972 * There's a conflict between DECPCTERM and XTERM-RPM.
973 * XTERM-RPM takes a single argument, DECPCTERM takes 2.
974 * Split both up and forward the call to the closer
977 if (seq->n_args <= 1) /* XTERM RPM */
978 return TERM_CMD_XTERM_RPM;
979 else if (seq->n_args >= 2) /* DECPCTERM */
980 return TERM_CMD_DECPCTERM;
984 if (flags == 0) /* SU */
986 else if (flags == TERM_SEQ_FLAG_WHAT) /* XTERM SGFX */
987 return TERM_CMD_XTERM_SGFX;
992 * There's a conflict between DECSLRM and SC-ANSI which
993 * cannot be resolved without knowing the state of
994 * DECLRMM. We leave that decision up to the caller.
996 return TERM_CMD_DECSLRM_OR_SC;
997 } else if (flags == TERM_SEQ_FLAG_CASH) {
999 return TERM_CMD_DECSPRTT;
1000 } else if (flags == TERM_SEQ_FLAG_MULT) {
1002 return TERM_CMD_DECSFC;
1003 } else if (flags == TERM_SEQ_FLAG_WHAT) {
1005 return TERM_CMD_XTERM_SPM;
1011 * Awesome: There's a conflict between SD and XTERM IHMT
1012 * that we have to resolve by checking the parameter
1013 * count.. XTERM_IHMT needs exactly 5 arguments, SD
1014 * takes 0 or 1. We're conservative here and give both
1015 * a wider range to allow unused arguments (compat...).
1017 if (seq->n_args >= 5) {
1019 return TERM_CMD_XTERM_IHMT;
1020 } else if (seq->n_args < 5) {
1024 } else if (flags == TERM_SEQ_FLAG_GT) {
1026 return TERM_CMD_XTERM_RTM;
1031 if (seq->n_args > 0 && seq->args[0] < 24) {
1033 return TERM_CMD_XTERM_WM;
1036 return TERM_CMD_DECSLPP;
1038 } else if (flags == TERM_SEQ_FLAG_SPACE) {
1040 return TERM_CMD_DECSWBV;
1041 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
1043 return TERM_CMD_DECSRFR;
1044 } else if (flags == TERM_SEQ_FLAG_CASH) {
1046 return TERM_CMD_DECRARA;
1047 } else if (flags == TERM_SEQ_FLAG_GT) {
1049 return TERM_CMD_XTERM_STM;
1053 if (flags == 0) /* NP */
1060 } else if (flags == TERM_SEQ_FLAG_SPACE) {
1062 return TERM_CMD_DECSMBV;
1063 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
1065 return TERM_CMD_DECSTRL;
1066 } else if (flags == TERM_SEQ_FLAG_WHAT) {
1068 return TERM_CMD_DECRQUPSS;
1069 } else if (seq->args[0] == 1 && flags == TERM_SEQ_FLAG_CASH) {
1071 return TERM_CMD_DECRQTSR;
1072 } else if (flags == TERM_SEQ_FLAG_MULT) {
1074 return TERM_CMD_DECSCP;
1075 } else if (flags == TERM_SEQ_FLAG_COMMA) {
1077 return TERM_CMD_DECRQKT;
1081 if (flags == 0) /* PP */
1085 if (flags == TERM_SEQ_FLAG_SPACE) /* DECSLCK */
1086 return TERM_CMD_DECSLCK;
1087 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECRQDE */
1088 return TERM_CMD_DECRQDE;
1089 else if (flags == TERM_SEQ_FLAG_CASH) /* DECCRA */
1090 return TERM_CMD_DECCRA;
1091 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECRPKT */
1092 return TERM_CMD_DECRPKT;
1095 if (seq->args[0] == 5 && flags == TERM_SEQ_FLAG_WHAT) {
1097 return TERM_CMD_DECST8C;
1101 if (flags == TERM_SEQ_FLAG_CASH) /* DECRQPSR */
1102 return TERM_CMD_DECRQPSR;
1103 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECEFR */
1104 return TERM_CMD_DECEFR;
1105 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSPP */
1106 return TERM_CMD_DECSPP;
1109 if (flags == 0) /* ECH */
1110 return TERM_CMD_ECH;
1113 if (flags == 0) /* DECREQTPARM */
1114 return TERM_CMD_DECREQTPARM;
1115 else if (flags == TERM_SEQ_FLAG_CASH) /* DECFRA */
1116 return TERM_CMD_DECFRA;
1117 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSACE */
1118 return TERM_CMD_DECSACE;
1119 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECRQPKFM */
1120 return TERM_CMD_DECRQPKFM;
1123 if (flags == 0) /* DECTST */
1124 return TERM_CMD_DECTST;
1125 else if (flags == TERM_SEQ_FLAG_MULT) /* DECRQCRA */
1126 return TERM_CMD_DECRQCRA;
1127 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKFMR */
1128 return TERM_CMD_DECPKFMR;
1131 if (flags == 0) /* CBT */
1132 return TERM_CMD_CBT;
1135 if (flags == TERM_SEQ_FLAG_CASH) /* DECERA */
1136 return TERM_CMD_DECERA;
1137 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECELR */
1138 return TERM_CMD_DECELR;
1139 else if (flags == TERM_SEQ_FLAG_MULT) /* DECINVM */
1140 return TERM_CMD_DECINVM;
1141 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKA */
1142 return TERM_CMD_DECPKA;
1145 if (flags == 0) /* ICH */
1146 return TERM_CMD_ICH;
1149 if (flags == 0) /* HPA */
1150 return TERM_CMD_HPA;
1153 if (flags == TERM_SEQ_FLAG_CASH) /* DECSERA */
1154 return TERM_CMD_DECSERA;
1155 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECSLE */
1156 return TERM_CMD_DECSLE;
1159 if (flags == TERM_SEQ_FLAG_CASH) /* DECSCPP */
1160 return TERM_CMD_DECSCPP;
1161 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECRQLP */
1162 return TERM_CMD_DECRQLP;
1163 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSNLS */
1164 return TERM_CMD_DECSNLS;
1167 if (flags == TERM_SEQ_FLAG_SPACE) /* DECKBD */
1168 return TERM_CMD_DECKBD;
1169 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSASD */
1170 return TERM_CMD_DECSASD;
1171 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECIC */
1172 return TERM_CMD_DECIC;
1175 if (flags == TERM_SEQ_FLAG_SPACE) /* DECTME */
1176 return TERM_CMD_DECTME;
1177 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSSDT */
1178 return TERM_CMD_DECSSDT;
1179 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECDC */
1180 return TERM_CMD_DECDC;
1184 return TERM_CMD_NONE;
1189 * This parser controls the parser-state and returns any detected sequence to
1190 * the caller. The parser is based on this state-diagram from Paul Williams:
1191 * http://vt100.net/emu/
1192 * It was written from scratch and extended where needed.
1193 * This parser is fully compatible up to the vt500 series. We expect UCS-4 as
1194 * input. It's the callers responsibility to do any UTF-8 parsing.
1198 STATE_NONE, /* placeholder */
1199 STATE_GROUND, /* initial state and ground */
1200 STATE_ESC, /* ESC sequence was started */
1201 STATE_ESC_INT, /* intermediate escape characters */
1202 STATE_CSI_ENTRY, /* starting CSI sequence */
1203 STATE_CSI_PARAM, /* CSI parameters */
1204 STATE_CSI_INT, /* intermediate CSI characters */
1205 STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */
1206 STATE_DCS_ENTRY, /* starting DCS sequence */
1207 STATE_DCS_PARAM, /* DCS parameters */
1208 STATE_DCS_INT, /* intermediate DCS characters */
1209 STATE_DCS_PASS, /* DCS data passthrough */
1210 STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */
1211 STATE_OSC_STRING, /* parsing OSC sequence */
1212 STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */
1216 enum parser_action {
1217 ACTION_NONE, /* placeholder */
1218 ACTION_CLEAR, /* clear parameters */
1219 ACTION_IGNORE, /* ignore the character entirely */
1220 ACTION_PRINT, /* print the character on the console */
1221 ACTION_EXECUTE, /* execute single control character (C0/C1) */
1222 ACTION_COLLECT, /* collect intermediate character */
1223 ACTION_PARAM, /* collect parameter character */
1224 ACTION_ESC_DISPATCH, /* dispatch escape sequence */
1225 ACTION_CSI_DISPATCH, /* dispatch csi sequence */
1226 ACTION_DCS_START, /* start of DCS data */
1227 ACTION_DCS_COLLECT, /* collect DCS data */
1228 ACTION_DCS_CONSUME, /* consume DCS terminator */
1229 ACTION_DCS_DISPATCH, /* dispatch dcs sequence */
1230 ACTION_OSC_START, /* start of OSC data */
1231 ACTION_OSC_COLLECT, /* collect OSC data */
1232 ACTION_OSC_CONSUME, /* consume OSC terminator */
1233 ACTION_OSC_DISPATCH, /* dispatch osc sequence */
1237 int term_parser_new(term_parser **out, bool host) {
1238 _term_parser_free_ term_parser *parser = NULL;
1240 assert_return(out, -EINVAL);
1242 parser = new0(term_parser, 1);
1246 parser->is_host = host;
1247 parser->st_alloc = 64;
1248 parser->seq.st = new0(char, parser->st_alloc + 1);
1249 if (!parser->seq.st)
1257 term_parser *term_parser_free(term_parser *parser) {
1261 free(parser->seq.st);
1266 static inline void parser_clear(term_parser *parser) {
1269 parser->seq.command = TERM_CMD_NONE;
1270 parser->seq.terminator = 0;
1271 parser->seq.intermediates = 0;
1272 parser->seq.charset = TERM_CHARSET_NONE;
1273 parser->seq.n_args = 0;
1274 for (i = 0; i < TERM_PARSER_ARG_MAX; ++i)
1275 parser->seq.args[i] = -1;
1277 parser->seq.n_st = 0;
1278 parser->seq.st[0] = 0;
1281 static int parser_ignore(term_parser *parser, uint32_t raw) {
1282 parser_clear(parser);
1283 parser->seq.type = TERM_SEQ_IGNORE;
1284 parser->seq.command = TERM_CMD_NONE;
1285 parser->seq.terminator = raw;
1286 parser->seq.charset = TERM_CHARSET_NONE;
1288 return parser->seq.type;
1291 static int parser_print(term_parser *parser, uint32_t raw) {
1292 parser_clear(parser);
1293 parser->seq.type = TERM_SEQ_GRAPHIC;
1294 parser->seq.command = TERM_CMD_GRAPHIC;
1295 parser->seq.terminator = raw;
1296 parser->seq.charset = TERM_CHARSET_NONE;
1298 return parser->seq.type;
1301 static int parser_execute(term_parser *parser, uint32_t raw) {
1302 parser_clear(parser);
1303 parser->seq.type = TERM_SEQ_CONTROL;
1304 parser->seq.command = TERM_CMD_GRAPHIC;
1305 parser->seq.terminator = raw;
1306 parser->seq.charset = TERM_CHARSET_NONE;
1307 if (!parser->is_host)
1308 parser->seq.command = term_parse_host_control(&parser->seq);
1310 return parser->seq.type;
1313 static void parser_collect(term_parser *parser, uint32_t raw) {
1315 * Usually, characters from 0x30 to 0x3f are only allowed as leading
1316 * markers (or as part of the parameters), characters from 0x20 to 0x2f
1317 * are only allowed as trailing markers. However, our state-machine
1318 * already verifies those restrictions so we can handle them the same
1319 * way here. Note that we safely allow markers to be specified multiple
1323 if (raw >= 0x20 && raw <= 0x3f)
1324 parser->seq.intermediates |= 1 << (raw - 0x20);
1327 static void parser_param(term_parser *parser, uint32_t raw) {
1331 if (parser->seq.n_args < TERM_PARSER_ARG_MAX)
1332 ++parser->seq.n_args;
1337 if (parser->seq.n_args >= TERM_PARSER_ARG_MAX)
1340 if (raw >= '0' && raw <= '9') {
1341 new = parser->seq.args[parser->seq.n_args];
1344 new = new * 10 + raw - '0';
1346 /* VT510 tells us to clamp all values to [0, 9999], however, it
1347 * also allows commands with values up to 2^15-1. We simply use
1348 * 2^16 as maximum here to be compatible to all commands, but
1349 * avoid overflows in any calculations. */
1353 parser->seq.args[parser->seq.n_args] = new;
1357 static int parser_esc(term_parser *parser, uint32_t raw) {
1358 parser->seq.type = TERM_SEQ_ESCAPE;
1359 parser->seq.command = TERM_CMD_NONE;
1360 parser->seq.terminator = raw;
1361 parser->seq.charset = TERM_CHARSET_NONE;
1362 if (!parser->is_host)
1363 parser->seq.command = term_parse_host_escape(&parser->seq, &parser->seq.charset);
1365 return parser->seq.type;
1368 static int parser_csi(term_parser *parser, uint32_t raw) {
1369 /* parser->seq is cleared during CSI-ENTER state, thus there's no need
1370 * to clear invalid fields here. */
1372 if (parser->seq.n_args < TERM_PARSER_ARG_MAX) {
1373 if (parser->seq.n_args > 0 ||
1374 parser->seq.args[parser->seq.n_args] >= 0)
1375 ++parser->seq.n_args;
1378 parser->seq.type = TERM_SEQ_CSI;
1379 parser->seq.command = TERM_CMD_NONE;
1380 parser->seq.terminator = raw;
1381 parser->seq.charset = TERM_CHARSET_NONE;
1382 if (!parser->is_host)
1383 parser->seq.command = term_parse_host_csi(&parser->seq);
1385 return parser->seq.type;
1388 /* perform state transition and dispatch related actions */
1389 static int parser_transition(term_parser *parser, uint32_t raw, unsigned int state, unsigned int action) {
1390 if (state != STATE_NONE)
1391 parser->state = state;
1395 return TERM_SEQ_NONE;
1397 parser_clear(parser);
1398 return TERM_SEQ_NONE;
1400 return parser_ignore(parser, raw);
1402 return parser_print(parser, raw);
1403 case ACTION_EXECUTE:
1404 return parser_execute(parser, raw);
1405 case ACTION_COLLECT:
1406 parser_collect(parser, raw);
1407 return TERM_SEQ_NONE;
1409 parser_param(parser, raw);
1410 return TERM_SEQ_NONE;
1411 case ACTION_ESC_DISPATCH:
1412 return parser_esc(parser, raw);
1413 case ACTION_CSI_DISPATCH:
1414 return parser_csi(parser, raw);
1415 case ACTION_DCS_START:
1416 /* not implemented */
1417 return TERM_SEQ_NONE;
1418 case ACTION_DCS_COLLECT:
1419 /* not implemented */
1420 return TERM_SEQ_NONE;
1421 case ACTION_DCS_CONSUME:
1422 /* not implemented */
1423 return TERM_SEQ_NONE;
1424 case ACTION_DCS_DISPATCH:
1425 /* not implemented */
1426 return TERM_SEQ_NONE;
1427 case ACTION_OSC_START:
1428 /* not implemented */
1429 return TERM_SEQ_NONE;
1430 case ACTION_OSC_COLLECT:
1431 /* not implemented */
1432 return TERM_SEQ_NONE;
1433 case ACTION_OSC_CONSUME:
1434 /* not implemented */
1435 return TERM_SEQ_NONE;
1436 case ACTION_OSC_DISPATCH:
1437 /* not implemented */
1438 return TERM_SEQ_NONE;
1440 assert_not_reached("invalid vte-parser action");
1441 return TERM_SEQ_NONE;
1445 static int parser_feed_to_state(term_parser *parser, uint32_t raw) {
1446 switch (parser->state) {
1449 * During initialization, parser->state is cleared. Treat this
1450 * as STATE_GROUND. We will then never get to STATE_NONE again.
1454 case 0x00 ... 0x1f: /* C0 */
1455 case 0x80 ... 0x9b: /* C1 \ { ST } */
1457 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1459 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1462 return parser_transition(parser, raw, STATE_NONE, ACTION_PRINT);
1465 case 0x00 ... 0x1f: /* C0 */
1466 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1467 case 0x20 ... 0x2f: /* [' ' - '\'] */
1468 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1469 case 0x30 ... 0x4f: /* ['0' - '~'] \ { 'P', 'X', '[', ']', '^', '_' } */
1474 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1475 case 0x50: /* 'P' */
1476 return parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1477 case 0x5b: /* '[' */
1478 return parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1479 case 0x5d: /* ']' */
1480 return parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1481 case 0x58: /* 'X' */
1482 case 0x5e: /* '^' */
1483 case 0x5f: /* '_' */
1484 return parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1485 case 0x7f: /* DEL */
1486 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1488 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1491 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1494 case 0x00 ... 0x1f: /* C0 */
1495 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1496 case 0x20 ... 0x2f: /* [' ' - '\'] */
1497 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1498 case 0x30 ... 0x7e: /* ['0' - '~'] */
1499 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1500 case 0x7f: /* DEL */
1501 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1503 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1506 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1507 case STATE_CSI_ENTRY:
1509 case 0x00 ... 0x1f: /* C0 */
1510 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1511 case 0x20 ... 0x2f: /* [' ' - '\'] */
1512 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1513 case 0x3a: /* ':' */
1514 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1515 case 0x30 ... 0x39: /* ['0' - '9'] */
1516 case 0x3b: /* ';' */
1517 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_PARAM);
1518 case 0x3c ... 0x3f: /* ['<' - '?'] */
1519 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_COLLECT);
1520 case 0x40 ... 0x7e: /* ['@' - '~'] */
1521 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1522 case 0x7f: /* DEL */
1523 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1525 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1528 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1529 case STATE_CSI_PARAM:
1531 case 0x00 ... 0x1f: /* C0 */
1532 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1533 case 0x20 ... 0x2f: /* [' ' - '\'] */
1534 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1535 case 0x30 ... 0x39: /* ['0' - '9'] */
1536 case 0x3b: /* ';' */
1537 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1538 case 0x3a: /* ':' */
1539 case 0x3c ... 0x3f: /* ['<' - '?'] */
1540 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1541 case 0x40 ... 0x7e: /* ['@' - '~'] */
1542 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1543 case 0x7f: /* DEL */
1544 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1546 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1549 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1552 case 0x00 ... 0x1f: /* C0 */
1553 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1554 case 0x20 ... 0x2f: /* [' ' - '\'] */
1555 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1556 case 0x30 ... 0x3f: /* ['0' - '?'] */
1557 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1558 case 0x40 ... 0x7e: /* ['@' - '~'] */
1559 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1560 case 0x7f: /* DEL */
1561 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1563 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1566 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1567 case STATE_CSI_IGNORE:
1569 case 0x00 ... 0x1f: /* C0 */
1570 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1571 case 0x20 ... 0x3f: /* [' ' - '?'] */
1572 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1573 case 0x40 ... 0x7e: /* ['@' - '~'] */
1574 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1575 case 0x7f: /* DEL */
1576 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1578 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1581 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1582 case STATE_DCS_ENTRY:
1584 case 0x00 ... 0x1f: /* C0 */
1585 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1586 case 0x20 ... 0x2f: /* [' ' - '\'] */
1587 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1588 case 0x3a: /* ':' */
1589 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1590 case 0x30 ... 0x39: /* ['0' - '9'] */
1591 case 0x3b: /* ';' */
1592 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_PARAM);
1593 case 0x3c ... 0x3f: /* ['<' - '?'] */
1594 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_COLLECT);
1595 case 0x40 ... 0x7e: /* ['@' - '~'] */
1596 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1597 case 0x7f: /* DEL */
1598 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1600 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1603 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1604 case STATE_DCS_PARAM:
1606 case 0x00 ... 0x1f: /* C0 */
1607 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1608 case 0x20 ... 0x2f: /* [' ' - '\'] */
1609 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1610 case 0x30 ... 0x39: /* ['0' - '9'] */
1611 case 0x3b: /* ';' */
1612 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1613 case 0x3a: /* ':' */
1614 case 0x3c ... 0x3f: /* ['<' - '?'] */
1615 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1616 case 0x40 ... 0x7e: /* ['@' - '~'] */
1617 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1618 case 0x7f: /* DEL */
1619 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1621 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1624 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1627 case 0x00 ... 0x1f: /* C0 */
1628 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1629 case 0x20 ... 0x2f: /* [' ' - '\'] */
1630 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1631 case 0x30 ... 0x3f: /* ['0' - '?'] */
1632 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1633 case 0x40 ... 0x7e: /* ['@' - '~'] */
1634 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1635 case 0x7f: /* DEL */
1636 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1638 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1641 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1642 case STATE_DCS_PASS:
1644 case 0x00 ... 0x7e: /* ASCII \ { DEL } */
1645 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1646 case 0x7f: /* DEL */
1647 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1649 return parser_transition(parser, raw, STATE_GROUND, ACTION_DCS_DISPATCH);
1652 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1653 case STATE_DCS_IGNORE:
1655 case 0x00 ... 0x7f: /* ASCII */
1656 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1658 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1661 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1662 case STATE_OSC_STRING:
1664 case 0x00 ... 0x06: /* C0 \ { BEL } */
1666 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1667 case 0x20 ... 0x7f: /* [' ' - DEL] */
1668 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1669 case 0x07: /* BEL */
1671 return parser_transition(parser, raw, STATE_GROUND, ACTION_OSC_DISPATCH);
1674 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1675 case STATE_ST_IGNORE:
1677 case 0x00 ... 0x7f: /* ASCII */
1678 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1680 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1683 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1686 assert_not_reached("bad vte-parser state");
1690 int term_parser_feed(term_parser *parser, const term_seq **seq_out, uint32_t raw) {
1693 assert_return(parser, -EINVAL);
1694 assert_return(seq_out, -EINVAL);
1698 * * DEC treats GR codes as GL. We don't do that as we require UTF-8
1699 * as charset and, thus, it doesn't make sense to treat GR special.
1700 * * During control sequences, unexpected C1 codes cancel the sequence
1701 * and immediately start a new one. C0 codes, however, may or may not
1702 * be ignored/executed depending on the sequence.
1706 case 0x18: /* CAN */
1707 r = parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1709 case 0x1a: /* SUB */
1710 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1712 case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, CSI, ST, OSC, PM, APC} */
1715 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1717 case 0x1b: /* ESC */
1718 r = parser_transition(parser, raw, STATE_ESC, ACTION_CLEAR);
1720 case 0x98: /* SOS */
1722 case 0x9f: /* APC */
1723 r = parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1725 case 0x90: /* DCS */
1726 r = parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1728 case 0x9d: /* OSC */
1729 r = parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1731 case 0x9b: /* CSI */
1732 r = parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1735 r = parser_feed_to_state(parser, raw);
1742 *seq_out = &parser->seq;