From: David Herrmann Date: Thu, 12 Jun 2014 15:51:14 +0000 (+0200) Subject: ui/term: add line/cell/char handling for terminal pages X-Git-Tag: v216~534 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=commitdiff_plain;h=84da4a3022bc599b26d9601cf1b7bf51d1d9f915 ui/term: add line/cell/char handling for terminal pages This commit introduces libsystemd-ui, a systemd-internal helper library that will contain all the UI related functionality. It is going to be used by systemd-welcomed, systemd-consoled, systemd-greeter and systemd-er. Further use-cases may follow. For now, this commit only adds terminal-page handling based on lines only. Follow-up commits will add more functionality. --- diff --git a/.gitignore b/.gitignore index aecc6ae0f..0ba01daeb 100644 --- a/.gitignore +++ b/.gitignore @@ -219,6 +219,7 @@ /test-strv /test-strxcpyx /test-tables +/test-term-page /test-time /test-tmpfiles /test-udev diff --git a/Makefile.am b/Makefile.am index a9ee8b070..0b9491ade 100644 --- a/Makefile.am +++ b/Makefile.am @@ -209,6 +209,7 @@ AM_CPPFLAGS = \ -I $(top_srcdir)/src/libsystemd/sd-event \ -I $(top_srcdir)/src/libsystemd/sd-rtnl \ -I $(top_srcdir)/src/libsystemd-network \ + -I $(top_srcdir)/src/libsystemd-terminal \ $(OUR_CPPFLAGS) AM_CFLAGS = $(OUR_CFLAGS) @@ -2826,6 +2827,33 @@ tests += \ test-icmp6-rs \ test-dhcp6-client +# ------------------------------------------------------------------------------ +noinst_LTLIBRARIES += \ + libsystemd-terminal.la + +libsystemd_terminal_la_CFLAGS = \ + $(AM_CFLAGS) + +libsystemd_terminal_la_SOURCES = \ + src/libsystemd-terminal/term-internal.h \ + src/libsystemd-terminal/term-page.c \ + src/libsystemd-terminal/term-wcwidth.c + +libsystemd_terminal_la_LIBADD = \ + libsystemd-internal.la \ + libsystemd-shared.la + +test_term_page_SOURCES = \ + src/libsystemd-terminal/test-term-page.c + +test_term_page_LDADD = \ + libsystemd-terminal.la \ + libsystemd-internal.la \ + libsystemd-shared.la + +tests += \ + test-term-page + # ------------------------------------------------------------------------------ if ENABLE_GTK_DOC SUBDIRS += \ diff --git a/configure.ac b/configure.ac index be05211d0..6f888a278 100644 --- a/configure.ac +++ b/configure.ac @@ -1043,6 +1043,15 @@ if test "x$enable_multi_seat_x" != "xno"; then fi AM_CONDITIONAL(ENABLE_MULTI_SEAT_X, [test "$have_multi_seat_x" = "yes"]) +# ------------------------------------------------------------------------------ +have_terminal=no +AC_ARG_ENABLE(terminal, AS_HELP_STRING([--enable-terminal], [enable terminal support])) +if test "x$enable_terminal" = "xyes"; then + AC_DEFINE(ENABLE_TERMINAL, 1, [Define if terminal support is to be enabled]) + have_terminal=yes +fi +AM_CONDITIONAL(ENABLE_TERMINAL, [test "x$have_terminal" = "xyes"]) + # ------------------------------------------------------------------------------ have_kdbus=no AC_ARG_ENABLE(kdbus, AS_HELP_STRING([--enable-kdbus], [do connect to kdbus by default])) @@ -1348,6 +1357,7 @@ AC_MSG_RESULT([ gudev: ${enable_gudev} gintrospection: ${enable_introspection} multi-seat-x: ${have_multi_seat_x} + terminal: ${have_terminal} kdbus: ${have_kdbus} Python: ${have_python} Python Headers: ${have_python_devel} diff --git a/src/libsystemd-terminal/term-internal.h b/src/libsystemd-terminal/term-internal.h new file mode 100644 index 000000000..af1c723ad --- /dev/null +++ b/src/libsystemd-terminal/term-internal.h @@ -0,0 +1,253 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright (C) 2014 David Herrmann + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#pragma once + +#include +#include +#include +#include "util.h" + +typedef struct term_char term_char_t; +typedef struct term_charbuf term_charbuf_t; + +typedef struct term_color term_color; +typedef struct term_attr term_attr; +typedef struct term_cell term_cell; +typedef struct term_line term_line; + +/* + * Miscellaneous + * Sundry things and external helpers. + */ + +int mk_wcwidth(wchar_t ucs4); +int mk_wcwidth_cjk(wchar_t ucs4); +int mk_wcswidth(const wchar_t *str, size_t len); +int mk_wcswidth_cjk(const wchar_t *str, size_t len); + +/* + * Ageing + * Redrawing terminals is quite expensive. Therefore, we avoid redrawing on + * each single modification and mark modified cells instead. This way, we know + * which cells to redraw on the next frame. However, a single DIRTY flag is not + * enough for double/triple buffered screens, hence, we use an AGE field for + * each cell. If the cell is modified, we simply increase the age by one. Each + * framebuffer can then remember its last rendered age and request an update of + * all newer cells. + * TERM_AGE_NULL is special. If used as cell age, the cell must always be + * redrawn (forced update). If used as framebuffer age, all cells are drawn. + * This way, we can allow integer wrap-arounds. + */ + +typedef uint64_t term_age_t; + +#define TERM_AGE_NULL 0 + +/* + * Characters + * Each cell in a terminal page contains only a single character. This is + * usually a single UCS-4 value. However, Unicode allows combining-characters, + * therefore, the number of UCS-4 characters per cell must be unlimited. The + * term_char_t object wraps the internal combining char API so it can be + * treated as a single object. + */ + +struct term_char { + /* never access this value directly */ + uint64_t _value; +}; + +struct term_charbuf { + /* 3 bytes + zero-terminator */ + uint32_t buf[4]; +}; + +#define TERM_CHAR_INIT(_val) ((term_char_t){ ._value = (_val) }) +#define TERM_CHAR_NULL TERM_CHAR_INIT(0) + +term_char_t term_char_set(term_char_t previous, uint32_t append_ucs4); +term_char_t term_char_merge(term_char_t base, uint32_t append_ucs4); +term_char_t term_char_dup(term_char_t ch); +term_char_t term_char_dup_append(term_char_t base, uint32_t append_ucs4); + +const uint32_t *term_char_resolve(term_char_t ch, size_t *s, term_charbuf_t *b); +unsigned int term_char_lookup_width(term_char_t ch); + +/* true if @ch is TERM_CHAR_NULL, otherwise false */ +static inline bool term_char_is_null(term_char_t ch) { + return ch._value == 0; +} + +/* true if @ch is dynamically allocated and needs to be freed */ +static inline bool term_char_is_allocated(term_char_t ch) { + return !term_char_is_null(ch) && !(ch._value & 0x1); +} + +/* true if (a == b), otherwise false; this is (a == b), NOT (*a == *b) */ +static inline bool term_char_same(term_char_t a, term_char_t b) { + return a._value == b._value; +} + +/* true if (*a == *b), otherwise false; this is implied by (a == b) */ +static inline bool term_char_equal(term_char_t a, term_char_t b) { + const uint32_t *sa, *sb; + term_charbuf_t ca, cb; + size_t na, nb; + + sa = term_char_resolve(a, &na, &ca); + sb = term_char_resolve(b, &nb, &cb); + return na == nb && !memcmp(sa, sb, sizeof(*sa) * na); +} + +/* free @ch in case it is dynamically allocated */ +static inline term_char_t term_char_free(term_char_t ch) { + if (term_char_is_allocated(ch)) + term_char_set(ch, 0); + + return TERM_CHAR_NULL; +} + +/* gcc _cleanup_ helpers */ +#define _term_char_free_ _cleanup_(term_char_freep) +static inline void term_char_freep(term_char_t *p) { + term_char_free(*p); +} + +/* + * Attributes + * Each cell in a terminal page can have its own set of attributes. These alter + * the behavior of the renderer for this single cell. We use term_attr to + * specify attributes. + * The only non-obvious field is "ccode" for foreground and background colors. + * This field contains the terminal color-code in case no full RGB information + * was given by the host. It is also required for dynamic color palettes. If it + * is set to TERM_CCODE_RGB, the "red", "green" and "blue" fields contain the + * full RGB color. + */ + +enum { + /* dark color-codes */ + TERM_CCODE_BLACK, + TERM_CCODE_RED, + TERM_CCODE_GREEN, + TERM_CCODE_YELLOW, + TERM_CCODE_BLUE, + TERM_CCODE_MAGENTA, + TERM_CCODE_CYAN, + TERM_CCODE_WHITE, /* technically: light grey */ + + /* light color-codes */ + TERM_CCODE_LIGHT_BLACK = TERM_CCODE_BLACK + 8, /* technically: dark grey */ + TERM_CCODE_LIGHT_RED = TERM_CCODE_RED + 8, + TERM_CCODE_LIGHT_GREEN = TERM_CCODE_GREEN + 8, + TERM_CCODE_LIGHT_YELLOW = TERM_CCODE_YELLOW + 8, + TERM_CCODE_LIGHT_BLUE = TERM_CCODE_BLUE + 8, + TERM_CCODE_LIGHT_MAGENTA = TERM_CCODE_MAGENTA + 8, + TERM_CCODE_LIGHT_CYAN = TERM_CCODE_CYAN + 8, + TERM_CCODE_LIGHT_WHITE = TERM_CCODE_WHITE + 8, + + /* pseudo colors */ + TERM_CCODE_FG, /* selected foreground color */ + TERM_CCODE_BG, /* selected background color */ + TERM_CCODE_RGB, /* color is specified as RGB */ + + TERM_CCODE_CNT, +}; + +struct term_color { + uint8_t ccode; + uint8_t red; + uint8_t green; + uint8_t blue; +}; + +struct term_attr { + term_color fg; /* foreground color */ + term_color bg; /* background color */ + + unsigned int bold : 1; /* bold font */ + unsigned int italic : 1; /* italic font */ + unsigned int underline : 1; /* underline text */ + unsigned int inverse : 1; /* inverse fg/bg */ + unsigned int protect : 1; /* protect from erase */ + unsigned int blink : 1; /* blink text */ +}; + +/* + * Cells + * The term_cell structure respresents a single cell in a terminal page. It + * contains the stored character, the age of the cell and all its attributes. + */ + +struct term_cell { + term_char_t ch; /* stored char or TERM_CHAR_NULL */ + term_age_t age; /* cell age or TERM_AGE_NULL */ + term_attr attr; /* cell attributes */ + unsigned int cwidth; /* cached term_char_lookup_width(cell->ch) */ +}; + +/* + * Lines + * Instead of storing cells in a 2D array, we store them in an array of + * dynamically allocated lines. This way, scrolling can be implemented very + * fast without moving any cells at all. Similarly, the scrollback-buffer is + * much simpler to implement. + * We use term_line to store a single line. It contains an array of cells, a + * fill-state which remembers the amount of blanks on the right side, a + * separate age just for the line which can overwrite the age for all cells, + * and some management data. + */ + +struct term_line { + term_line *lines_next; /* linked-list for histories */ + term_line *lines_prev; /* linked-list for histories */ + + unsigned int width; /* visible width of line */ + unsigned int n_cells; /* # of allocated cells */ + term_cell *cells; /* cell-array */ + + term_age_t age; /* line age */ + unsigned int fill; /* # of valid cells; starting left */ +}; + +int term_line_new(term_line **out); +term_line *term_line_free(term_line *line); + +#define _term_line_free_ _cleanup_(term_line_freep) +DEFINE_TRIVIAL_CLEANUP_FUNC(term_line*, term_line_free); + +int term_line_reserve(term_line *line, unsigned int width, const term_attr *attr, term_age_t age, unsigned int protect_width); +void term_line_set_width(term_line *line, unsigned int width); +void term_line_write(term_line *line, unsigned int pos_x, term_char_t ch, unsigned int cwidth, const term_attr *attr, term_age_t age, bool insert_mode); +void term_line_insert(term_line *line, unsigned int from, unsigned int num, const term_attr *attr, term_age_t age); +void term_line_delete(term_line *line, unsigned int from, unsigned int num, const term_attr *attr, term_age_t age); +void term_line_append_combchar(term_line *line, unsigned int pos_x, uint32_t ucs4, term_age_t age); +void term_line_erase(term_line *line, unsigned int from, unsigned int num, const term_attr *attr, term_age_t age, bool keep_protected); +void term_line_reset(term_line *line, const term_attr *attr, term_age_t age); + +void term_line_link(term_line *line, term_line **first, term_line **last); +void term_line_link_tail(term_line *line, term_line **first, term_line **last); +void term_line_unlink(term_line *line, term_line **first, term_line **last); + +#define TERM_LINE_LINK(_line, _head) term_line_link((_line), &(_head)->lines_first, &(_head)->lines_last) +#define TERM_LINE_LINK_TAIL(_line, _head) term_line_link_tail((_line), &(_head)->lines_first, &(_head)->lines_last) +#define TERM_LINE_UNLINK(_line, _head) term_line_unlink((_line), &(_head)->lines_first, &(_head)->lines_last) diff --git a/src/libsystemd-terminal/term-page.c b/src/libsystemd-terminal/term-page.c new file mode 100644 index 000000000..bfff3b171 --- /dev/null +++ b/src/libsystemd-terminal/term-page.c @@ -0,0 +1,1142 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright (C) 2014 David Herrmann + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +/* + * Terminal Page/Line/Cell/Char Handling + * This file implements page handling of a terminal. It is split into pages, + * lines, cells and characters. Each object is independent of the next upper + * object. + * + * The Terminal layer keeps each line of a terminal separate and dynamically + * allocated. This allows us to move lines from main-screen to history-buffers + * very fast. Same is true for scrolling, top/bottom borders and other buffer + * operations. + * + * While lines are dynamically allocated, cells are not. This would be a waste + * of memory and causes heavy fragmentation. Furthermore, cells are moved much + * less frequently than lines so the performance-penalty is pretty small. + * However, to support combining-characters, we have to initialize and cleanup + * cells properly and cannot just release the underlying memory. Therefore, + * cells are treated as proper objects despite being allocated in arrays. + * + * Each cell has a set of attributes and a stored character. This is usually a + * single Unicode character stored as 32bit UCS-4 char. However, we need to + * support Unicode combining-characters, therefore this gets more complicated. + * Characters themselves are represented by a "term_char_t" object. It + * should be treated as a normal integer and passed by value. The + * sorrounding struct is just to hide the internals. A term-char can contain a + * base character together with up to 2 combining-chars in a single integer. + * Only if you need more combining-chars (very unlikely!) a term-char is a + * pointer to an allocated storage. This requires you to always free term-char + * objects once no longer used (even though this is a no-op most of the time). + * Furthermore, term-char objects are not ref-counted so you must duplicate them + * in case you want to store it somewhere and retain a copy yourself. By + * convention, all functions that take a term-char object will not duplicate + * it but implicitly take ownership of the passed value. It's up to the caller + * to duplicate it beforehand, in case it wants to retain a copy. + * + * If it turns out, that more than 2 comb-chars become common in specific + * languages, we can try to optimize this. One idea is to ref-count allocated + * characters and store them in a hash-table (like gnome's libvte3 does). This + * way we will never have two allocated chars for the same content. Or we can + * simply put two uint64_t into a "term_char_t". This will slow down operations + * on systems that don't need that many comb-chars, but avoid the dynamic + * allocations on others. + * Anyhow, until we have proper benchmarks, we will keep the current code. It + * seems to compete very well with other solutions so far. + */ + +#include +#include +#include +#include +#include "macro.h" +#include "term-internal.h" +#include "util.h" + +/* maximum UCS-4 character */ +#define CHAR_UCS4_MAX (0x10ffff) +/* mask for valid UCS-4 characters (21bit) */ +#define CHAR_UCS4_MASK (0x1fffff) +/* UCS-4 replacement character */ +#define CHAR_UCS4_REPLACEMENT (0xfffd) + +/* real storage behind "term_char_t" in case it's not packed */ +typedef struct term_character { + uint8_t n; + uint32_t codepoints[]; +} term_character; + +/* + * char_pack() takes 3 UCS-4 values and packs them into a term_char_t object. + * Note that UCS-4 chars only take 21 bits, so we still have the LSB as marker. + * We set it to 1 so others can distinguish it from pointers. + */ +static inline term_char_t char_pack(uint32_t v1, uint32_t v2, uint32_t v3) { + uint64_t packed, u1, u2, u3; + + u1 = v1; + u2 = v2; + u3 = v3; + + packed = 0x01; + packed |= (u1 & (uint64_t)CHAR_UCS4_MASK) << 43; + packed |= (u2 & (uint64_t)CHAR_UCS4_MASK) << 22; + packed |= (u3 & (uint64_t)CHAR_UCS4_MASK) << 1; + + return TERM_CHAR_INIT(packed); +} + +#define char_pack1(_v1) char_pack2((_v1), CHAR_UCS4_MAX + 1) +#define char_pack2(_v1, _v2) char_pack3((_v1), (_v2), CHAR_UCS4_MAX + 1) +#define char_pack3(_v1, _v2, _v3) char_pack((_v1), (_v2), (_v3)) + +/* + * char_unpack() is the inverse of char_pack(). It extracts the 3 stored UCS-4 + * characters and returns them. Note that this does not validate the passed + * term_char_t. That's the responsibility of the caller. + * This returns the number of characters actually packed. This obviously is a + * number between 0 and 3 (inclusive). + */ +static inline uint8_t char_unpack(term_char_t packed, uint32_t *out_v1, uint32_t *out_v2, uint32_t *out_v3) { + uint32_t v1, v2, v3; + + v1 = (packed._value >> 43) & (uint64_t)CHAR_UCS4_MASK; + v2 = (packed._value >> 22) & (uint64_t)CHAR_UCS4_MASK; + v3 = (packed._value >> 1) & (uint64_t)CHAR_UCS4_MASK; + + if (out_v1) + *out_v1 = v1; + if (out_v2) + *out_v2 = v2; + if (out_v3) + *out_v3 = v3; + + return (v1 > CHAR_UCS4_MAX) ? 0 : + ((v2 > CHAR_UCS4_MAX) ? 1 : + ((v3 > CHAR_UCS4_MAX) ? 2 : + 3)); +} + +/* cast a term_char_t to a term_character* */ +static inline term_character *char_to_ptr(term_char_t ch) { + return (term_character*)(unsigned long)ch._value; +} + +/* cast a term_character* to a term_char_t */ +static inline term_char_t char_from_ptr(term_character *c) { + return TERM_CHAR_INIT((unsigned long)c); +} + +/* + * char_alloc() allocates a properly aligned term_character object and returns + * a pointer to it. NULL is returned on allocation errors. The object will have + * enough room for @n following UCS-4 chars. + * Note that we allocate (n+1) characters and set the last one to 0 in case + * anyone prints this string for debugging. + */ +static term_character *char_alloc(uint8_t n) { + term_character *c; + int r; + + r = posix_memalign((void**)&c, + MAX(sizeof(void*), (size_t)2), + sizeof(*c) + sizeof(*c->codepoints) * (n + 1)); + if (r) + return NULL; + + c->n = n; + c->codepoints[n] = 0; + + return c; +} + +/* + * char_free() frees the memory allocated via char_alloc(). It is safe to call + * this on any term_char_t, only allocated characters are freed. + */ +static inline void char_free(term_char_t ch) { + if (term_char_is_allocated(ch)) + free(char_to_ptr(ch)); +} + +/* + * This appends @append_ucs4 to the existing character @base and returns + * it as a new character. In case that's not possible, @base is returned. The + * caller can use term_char_same() to test whether the returned character was + * freshly allocated or not. + */ +static term_char_t char_build(term_char_t base, uint32_t append_ucs4) { + /* soft-limit for combining-chars; hard-limit is currently 255 */ + const size_t climit = 64; + term_character *c; + uint32_t buf[3], *t; + uint8_t n; + + /* ignore invalid UCS-4 */ + if (append_ucs4 > CHAR_UCS4_MAX) + return base; + + if (term_char_is_null(base)) { + return char_pack1(append_ucs4); + } else if (!term_char_is_allocated(base)) { + /* unpack and try extending the packed character */ + n = char_unpack(base, &buf[0], &buf[1], &buf[2]); + + switch (n) { + case 0: + return char_pack1(append_ucs4); + case 1: + if (climit < 2) + return base; + + return char_pack2(buf[0], append_ucs4); + case 2: + if (climit < 3) + return base; + + return char_pack3(buf[0], buf[1], append_ucs4); + default: + /* fallthrough */ + break; + } + + /* already fully packed, we need to allocate a new one */ + t = buf; + } else { + /* already an allocated type, we need to allocate a new one */ + c = char_to_ptr(base); + t = c->codepoints; + n = c->n; + } + + /* bail out if soft-limit is reached */ + if (n >= climit) + return base; + + /* allocate new char */ + c = char_alloc(n + 1); + if (!c) + return base; + + memcpy(c->codepoints, t, sizeof(*t) * n); + c->codepoints[n] = append_ucs4; + + return char_from_ptr(c); +} + +/** + * term_char_set() - Reset character to a single UCS-4 character + * @previous: term-char to reset + * @append_ucs4: UCS-4 char to set + * + * This frees all resources in @previous and re-initializes it to @append_ucs4. + * The new char is returned. + * + * Usually, this is used like this: + * obj->ch = term_char_set(obj->ch, ucs4); + * + * Returns: The previous character reset to @append_ucs4. + */ +term_char_t term_char_set(term_char_t previous, uint32_t append_ucs4) { + char_free(previous); + return char_build(TERM_CHAR_NULL, append_ucs4); +} + +/** + * term_char_merge() - Merge UCS-4 char at the end of an existing char + * @base: existing term-char + * @append_ucs4: UCS-4 character to append + * + * This appends @append_ucs4 to @base and returns the result. @base is + * invalidated by this function and must no longer be used. The returned value + * replaces the old one. + * + * Usually, this is used like this: + * obj->ch = term_char_merge(obj->ch, ucs4); + * + * Returns: The new merged character. + */ +term_char_t term_char_merge(term_char_t base, uint32_t append_ucs4) { + term_char_t ch; + + ch = char_build(base, append_ucs4); + if (!term_char_same(ch, base)) + term_char_free(base); + + return ch; +} + +/** + * term_char_dup() - Duplicate character + * @ch: character to duplicate + * + * This duplicates a term-character. In case the character is not allocated, + * nothing is done. Otherwise, the underlying memory is copied and returned. You + * need to call term_char_free() on the returned character to release it again. + * On allocation errors, a replacement character is returned. Therefore, the + * caller can safely assume that this function always succeeds. + * + * Returns: The duplicated term-character. + */ +term_char_t term_char_dup(term_char_t ch) { + term_character *c, *newc; + + if (!term_char_is_allocated(ch)) + return ch; + + c = char_to_ptr(ch); + newc = char_alloc(c->n); + if (!newc) + return char_pack1(CHAR_UCS4_REPLACEMENT); + + memcpy(newc->codepoints, c->codepoints, sizeof(*c->codepoints) * c->n); + return char_from_ptr(newc); +} + +/** + * term_char_dup_append() - Duplicate tsm-char with UCS-4 character appended + * @base: existing term-char + * @append_ucs4: UCS-4 character to append + * + * This is similar to term_char_merge(), but it returns a separately allocated + * character. That is, @base will stay valid after this returns and is not + * touched. In case the append-operation fails, @base is duplicated and + * returned. That is, the returned char is always independent of @base. + * + * Returns: Newly allocated character with @append_ucs4 appended to @base. + */ +term_char_t term_char_dup_append(term_char_t base, uint32_t append_ucs4) { + term_char_t ch; + + ch = char_build(base, append_ucs4); + if (term_char_same(ch, base)) + ch = term_char_dup(base); + + return ch; +} + +/** + * term_char_resolve() - Retrieve the UCS-4 string for a term-char + * @ch: character to resolve + * @s: storage for size of string or NULL + * @b: storage for string or NULL + * + * This takes a term-character and returns the UCS-4 string associated with it. + * In case @ch is not allocated, the string is stored in @b (in case @b is NULL + * static storage is used). Otherwise, a pointer to the allocated storage is + * returned. + * + * The returned string is only valid as long as @ch and @b are valid. The string + * is zero-terminated and can safely be printed via long-character printf(). + * The length of the string excluding the zero-character is returned in @s. + * + * This never returns NULL. Even if the size is 0, this points to a buffer of at + * least a zero-terminator. + * + * Returns: The UCS-4 string-representation of @ch, and its size in @s. + */ +const uint32_t *term_char_resolve(term_char_t ch, size_t *s, term_charbuf_t *b) { + static term_charbuf_t static_b; + term_character *c; + uint32_t *cache; + size_t len; + + if (b) + cache = b->buf; + else + cache = static_b.buf; + + if (term_char_is_null(ch)) { + len = 0; + cache[0] = 0; + } else if (term_char_is_allocated(ch)) { + c = char_to_ptr(ch); + len = c->n; + cache = c->codepoints; + } else { + len = char_unpack(ch, &cache[0], &cache[1], &cache[2]); + cache[len] = 0; + } + + if (s) + *s = len; + + return cache; +} + +/** + * term_char_lookup_width() - Lookup cell-width of a character + * @ch: character to return cell-width for + * + * This is an equivalent of wcwidth() for term_char_t. It can deal directly + * with UCS-4 and combining-characters and avoids the mess that is wchar_t and + * locale handling. + * + * Returns: 0 for unprintable characters, >0 for everything else. + */ +unsigned int term_char_lookup_width(term_char_t ch) { + term_charbuf_t b; + const uint32_t *str; + unsigned int max; + size_t i, len; + int r; + + max = 0; + str = term_char_resolve(ch, &len, &b); + + for (i = 0; i < len; ++i) { + /* + * Oh god, C99 locale handling strikes again: wcwidth() expects + * wchar_t, but there is no way for us to know the + * internal encoding of wchar_t. Moreover, it is nearly + * impossible to convert UCS-4 into wchar_t (except for iconv, + * which is way too much overhead). + * Therefore, we use our own copy of wcwidth(). Lets just hope + * that glibc will one day export it's internal UCS-4 and UTF-8 + * helpers for direct use. + */ + assert_cc(sizeof(wchar_t) >= 4); + r = mk_wcwidth((wchar_t)str[i]); + if (r > 0 && (unsigned int)r > max) + max = r; + } + + return max; +} + +/** + * term_cell_init() - Initialize a new cell + * @cell: cell to initialize + * @ch: character to set on the cell or TERM_CHAR_NULL + * @cwidth: character width of @ch + * @attr: attributes to set on the cell or NULL + * @age: age to set on the cell or TERM_AGE_NULL + * + * This initializes a new cell. The backing-memory of the cell must be allocated + * by the caller beforehand. The caller is responsible to destroy the cell via + * term_cell_destroy() before freeing the backing-memory. + * + * It is safe (and supported!) to use: + * zero(*c); + * instead of: + * term_cell_init(c, TERM_CHAR_NULL, NULL, TERM_AGE_NULL); + * + * Note that this call takes ownership of @ch. If you want to use it yourself + * after this call, you need to duplicate it before calling this. + */ +static void term_cell_init(term_cell *cell, term_char_t ch, unsigned int cwidth, const term_attr *attr, term_age_t age) { + assert(cell); + + cell->ch = ch; + cell->cwidth = cwidth; + cell->age = age; + + if (attr) + memcpy(&cell->attr, attr, sizeof(*attr)); + else + zero(cell->attr); +} + +/** + * term_cell_destroy() - Destroy previously initialized cell + * @cell: cell to destroy or NULL + * + * This releases all resources associated with a cell. The backing memory is + * kept as-is. It's the responsibility of the caller to manage it. + * + * You must not call any other cell operations on this cell after this call + * returns. You must re-initialize the cell via term_cell_init() before you can + * use it again. + * + * If @cell is NULL, this is a no-op. + */ +static void term_cell_destroy(term_cell *cell) { + if (!cell) + return; + + term_char_free(cell->ch); +} + +/** + * term_cell_set() - Change contents of a cell + * @cell: cell to modify + * @ch: character to set on the cell or cell->ch + * @cwidth: character width of @ch or cell->cwidth + * @attr: attributes to set on the cell or NULL + * @age: age to set on the cell or cell->age + * + * This changes the contents of a cell. It can be used to change the character, + * attributes and age. To keep the current character, pass cell->ch as @ch. To + * reset the current attributes, pass NULL. To keep the current age, pass + * cell->age. + * + * This call takes ownership of @ch. You need to duplicate it first, in case you + * want to use it for your own purposes after this call. + * + * The cell must have been initialized properly before calling this. See + * term_cell_init(). + */ +static void term_cell_set(term_cell *cell, term_char_t ch, unsigned int cwidth, const term_attr *attr, term_age_t age) { + assert(cell); + + if (!term_char_same(ch, cell->ch)) { + term_char_free(cell->ch); + cell->ch = ch; + } + + cell->cwidth = cwidth; + cell->age = age; + + if (attr) + memcpy(&cell->attr, attr, sizeof(*attr)); + else + zero(cell->attr); +} + +/** + * term_cell_append() - Append a combining-char to a cell + * @cell: cell to modify + * @ucs4: UCS-4 character to append to the cell + * @age: new age to set on the cell or cell->age + * + * This appends a combining-character to a cell. No validation of the UCS-4 + * character is done, so this can be used to append any character. Additionally, + * this can update the age of the cell. + * + * The cell must have been initialized properly before calling this. See + * term_cell_init(). + */ +static void term_cell_append(term_cell *cell, uint32_t ucs4, term_age_t age) { + assert(cell); + + cell->ch = term_char_merge(cell->ch, ucs4); + cell->age = age; +} + +/** + * term_cell_init_n() - Initialize an array of cells + * @cells: pointer to an array of cells to initialize + * @n: number of cells + * @attr: attributes to set on all cells or NULL + * @age: age to set on all cells + * + * This is the same as term_cell_init() but initializes an array of cells. + * Furthermore, this always sets the character to TERM_CHAR_NULL. + * If you want to set a specific characters on all cells, you need to hard-code + * this loop and duplicate the character for each cell. + */ +static void term_cell_init_n(term_cell *cells, unsigned int n, const term_attr *attr, term_age_t age) { + for ( ; n > 0; --n, ++cells) + term_cell_init(cells, TERM_CHAR_NULL, 0, attr, age); +} + +/** + * term_cell_destroy_n() - Destroy an array of cells + * @cells: pointer to an array of cells to destroy + * @n: number of cells + * + * This is the same as term_cell_destroy() but destroys an array of cells. + */ +static void term_cell_destroy_n(term_cell *cells, unsigned int n) { + for ( ; n > 0; --n, ++cells) + term_cell_destroy(cells); +} + +/** + * term_cell_clear_n() - Clear contents of an array of cells + * @cells: pointer to an array of cells to modify + * @n: number of cells + * @attr: attributes to set on all cells or NULL + * @age: age to set on all cells + * + * This is the same as term_cell_set() but operates on an array of cells. Note + * that all characters are always set to TERM_CHAR_NULL, unlike term_cell_set() + * which takes the character as argument. + * If you want to set a specific characters on all cells, you need to hard-code + * this loop and duplicate the character for each cell. + */ +static void term_cell_clear_n(term_cell *cells, unsigned int n, const term_attr *attr, term_age_t age) { + for ( ; n > 0; --n, ++cells) + term_cell_set(cells, TERM_CHAR_NULL, 0, attr, age); +} + +/** + * term_line_new() - Allocate a new line + * @out: place to store pointer to new line + * + * This allocates and initialized a new line. The line is unlinked and + * independent of any page. It can be used for any purpose. The initial + * cell-count is set to 0. + * + * The line has to be freed via term_line_free() once it's no longer needed. + * + * Returns: 0 on success, negative error code on failure. + */ +int term_line_new(term_line **out) { + _term_line_free_ term_line *line = NULL; + + assert_return(out, -EINVAL); + + line = new0(term_line, 1); + if (!line) + return -ENOMEM; + + *out = line; + line = NULL; + return 0; +} + +/** + * term_line_free() - Free a line + * @line: line to free or NULL + * + * This frees a line that was previously allocated via term_line_free(). All its + * cells are released, too. + * + * If @line is NULL, this is a no-op. + */ +term_line *term_line_free(term_line *line) { + if (!line) + return NULL; + + term_cell_destroy_n(line->cells, line->n_cells); + free(line->cells); + free(line); + + return NULL; +} + +/** + * term_line_reserve() - Pre-allocate cells for a line + * @line: line to pre-allocate cells for + * @width: numbers of cells the line shall have pre-allocated + * @attr: attribute for all allocated cells or NULL + * @age: current age for all modifications + * @protect_width: width to protect from erasure + * + * This pre-allocates cells for this line. Please note that @width is the number + * of cells the line is guaranteed to have allocated after this call returns. + * It's not the number of cells that are added, neither is it the new width of + * the line. + * + * This function never frees memory. That is, reducing the line-width will + * always succeed, same is true for increasing the width to a previously set + * width. + * + * @attr and @age are used to initialize new cells. Additionally, any + * existing cell outside of the protected area specified by @protect_width are + * cleared and reset with @attr and @age. + * + * Returns: 0 on success, negative error code on failure. + */ +int term_line_reserve(term_line *line, unsigned int width, const term_attr *attr, term_age_t age, unsigned int protect_width) { + unsigned int min_width; + term_cell *t; + + assert_return(line, -EINVAL); + + /* reset existing cells if required */ + min_width = MIN(line->n_cells, width); + if (min_width > protect_width) + term_cell_clear_n(line->cells + protect_width, + min_width - protect_width, + attr, + age); + + /* allocate new cells if required */ + + if (width > line->n_cells) { + t = realloc_multiply(line->cells, sizeof(*t), width); + if (!t) + return -ENOMEM; + + if (!attr && !age) + memzero(t + line->n_cells, + sizeof(*t) * (width - line->n_cells)); + else + term_cell_init_n(t + line->n_cells, + width - line->n_cells, + attr, + age); + + line->cells = t; + line->n_cells = width; + } + + line->fill = MIN(line->fill, protect_width); + + return 0; +} + +/** + * term_line_set_width() - Change width of a line + * @line: line to modify + * @width: new width + * + * This changes the actual width of a line. It is the caller's responsibility + * to use term_line_reserve() to make sure enough space is allocated. If @width + * is greater than the allocated size, it is cropped. + * + * This does not modify any cells. Use term_line_reserve() or term_line_erase() + * to clear any newly added cells. + * + * NOTE: The fill state is cropped at line->width. Therefore, if you increase + * the line-width afterwards, but there is a multi-cell character at the + * end of the line that got cropped, then the fill-state will _not_ be + * adjusted. + * This means, the fill-state always includes the cells up to the start + * of the right-most character, but it might or might not cover it until + * its end. This should be totally fine, though. You should never access + * multi-cell tails directly, anyway. + */ +void term_line_set_width(term_line *line, unsigned int width) { + assert(line); + + if (width > line->n_cells) + width = line->n_cells; + + line->width = width; + line->fill = MIN(line->fill, width); +} + +/** + * line_insert() - Insert characters and move existing cells to the right + * @from: position to insert cells at + * @num: number of cells to insert + * @head_char: character that is set on the first cell + * @head_cwidth: character-length of @head_char + * @attr: attribute for all inserted cells or NULL + * @age: current age for all modifications + * + * The INSERT operation (or writes with INSERT_MODE) writes data at a specific + * position on a line and shifts the existing cells to the right. Cells that are + * moved beyond the right hand border are discarded. + * + * This helper contains the actual INSERT implementation which is independent of + * the data written. It works on cells, not on characters. The first cell is set + * to @head_char, all others are reset to TERM_CHAR_NULL. See each caller for a + * more detailed description. + */ +static inline void line_insert(term_line *line, unsigned int from, unsigned int num, term_char_t head_char, unsigned int head_cwidth, const term_attr *attr, term_age_t age) { + unsigned int i, rem, move; + + if (from >= line->width) + return; + if (from + num < from || from + num > line->width) + num = line->width - from; + if (!num) + return; + + move = line->width - from - num; + rem = MIN(num, move); + + if (rem > 0) { + /* + * Make room for @num cells; shift cells to the right if + * required. @rem is the number of remaining cells that we will + * knock off on the right and overwrite during the right shift. + * + * For INSERT_MODE, @num/@rem are usually 1 or 2, @move is 50% + * of the line on average. Therefore, the actual move is quite + * heavy and we can safely invalidate cells manually instead of + * the whole line. + * However, for INSERT operations, any parameters are + * possible. But we cannot place any assumption on its usage + * across applications, so we just handle it the same as + * INSERT_MODE and do per-cell invalidation. + */ + + /* destroy cells that are knocked off on the right */ + term_cell_destroy_n(line->cells + line->width - rem, rem); + + /* move remaining bulk of cells */ + memmove(line->cells + from + num, + line->cells + from, + sizeof(*line->cells) * move); + + /* invalidate cells */ + for (i = 0; i < move; ++i) + line->cells[from + num + i].age = age; + + /* initialize fresh head-cell */ + term_cell_init(line->cells + from, + head_char, + head_cwidth, + attr, + age); + + /* initialize fresh tail-cells */ + term_cell_init_n(line->cells + from + 1, + num - 1, + attr, + age); + + /* adjust fill-state */ + DISABLE_WARNING_SHADOW; + line->fill = MIN(line->width, + MAX(line->fill + num, + from + num)); + REENABLE_WARNING; + } else { + /* modify head-cell */ + term_cell_set(line->cells + from, + head_char, + head_cwidth, + attr, + age); + + /* reset tail-cells */ + term_cell_clear_n(line->cells + from + 1, + num - 1, + attr, + age); + + /* adjust fill-state */ + line->fill = line->width; + } +} + +/** + * term_line_write() - Write to a single, specific cell + * @line: line to write to + * @pos_x: x-position of cell in @line to write to + * @ch: character to write to the cell + * @cwidth: character width of @ch + * @attr: attributes to set on the cell or NULL + * @age: current age for all modifications + * @insert_mode: true if INSERT-MODE is enabled + * + * This writes to a specific cell in a line. The cell is addressed by its + * X-position @pos_x. If that cell does not exist, this is a no-op. + * + * @ch and @attr are set on this cell. + * + * If @insert_mode is true, this inserts the character instead of overwriting + * existing data (existing data is now moved to the right before writing). + * + * This function is the low-level handler of normal writes to a terminal. + */ +void term_line_write(term_line *line, unsigned int pos_x, term_char_t ch, unsigned int cwidth, const term_attr *attr, term_age_t age, bool insert_mode) { + unsigned int len; + + assert(line); + + if (pos_x >= line->width) + return; + + len = MAX(1U, cwidth); + if (pos_x + len < pos_x || pos_x + len > line->width) + len = line->width - pos_x; + if (!len) + return; + + if (insert_mode) { + /* Use line_insert() to insert the character-head and fill + * the remains with NULLs. */ + line_insert(line, pos_x, len, ch, cwidth, attr, age); + } else { + /* modify head-cell */ + term_cell_set(line->cells + pos_x, ch, cwidth, attr, age); + + /* reset tail-cells */ + term_cell_clear_n(line->cells + pos_x + 1, + len - 1, + attr, + age); + + /* adjust fill-state */ + DISABLE_WARNING_SHADOW; + line->fill = MIN(line->width, + MAX(line->fill, + pos_x + len)); + REENABLE_WARNING; + } +} + +/** + * term_line_insert() - Insert empty cells + * @line: line to insert empty cells into + * @from: x-position where to insert cells + * @num: number of cells to insert + * @attr: attributes to set on the cells or NULL + * @age: current age for all modifications + * + * This inserts @num empty cells at position @from in line @line. All existing + * cells to the right are shifted to make room for the new cells. Cells that get + * pushed beyond the right hand border are discarded. + */ +void term_line_insert(term_line *line, unsigned int from, unsigned int num, const term_attr *attr, term_age_t age) { + /* use line_insert() to insert @num empty cells */ + return line_insert(line, from, num, TERM_CHAR_NULL, 0, attr, age); +} + +/** + * term_line_delete() - Delete cells from line + * @line: line to delete cells from + * @from: position to delete cells at + * @num: number of cells to delete + * @attr: attributes to set on any new cells + * @age: current age for all modifications + * + * Delete cells from a line. All cells to the right of the deleted cells are + * shifted to the left to fill the empty space. New cells appearing on the right + * hand border are cleared and initialized with @attr. + */ +void term_line_delete(term_line *line, unsigned int from, unsigned int num, const term_attr *attr, term_age_t age) { + unsigned int rem, move, i; + + assert(line); + + if (from >= line->width) + return; + if (from + num < from || from + num > line->width) + num = line->width - from; + if (!num) + return; + + /* destroy and move as many upfront as possible */ + move = line->width - from - num; + rem = MIN(num, move); + if (rem > 0) { + /* destroy to be removed cells */ + term_cell_destroy_n(line->cells + from, rem); + + /* move tail upfront */ + memmove(line->cells + from, + line->cells + from + num, + sizeof(*line->cells) * move); + + /* invalidate copied cells */ + for (i = 0; i < move; ++i) + line->cells[from + i].age = age; + + /* initialize tail that was moved away */ + term_cell_init_n(line->cells + line->width - rem, + rem, + attr, + age); + + /* reset remaining cells in case the move was too small */ + if (num > move) + term_cell_clear_n(line->cells + from + move, + num - move, + attr, + age); + } else { + /* reset cells */ + term_cell_clear_n(line->cells + from, + num, + attr, + age); + } + + /* adjust fill-state */ + if (from + num < line->fill) + line->fill -= num; + else if (from < line->fill) + line->fill = from; +} + +/** + * term_line_append_combchar() - Append combining char to existing cell + * @line: line to modify + * @pos_x: position of cell to append combining char to + * @ucs4: combining character to append + * @age: current age for all modifications + * + * Unicode allows trailing combining characters, which belong to the + * char in front of them. The caller is responsible of detecting + * combining characters and calling term_line_append_combchar() instead of + * term_line_write(). This simply appends the char to the correct cell then. + * If the cell is not in the visible area, this call is skipped. + * + * Note that control-sequences are not 100% compatible with combining + * characters as they require delayed parsing. However, we must handle + * control-sequences immediately. Therefore, there might be trailing + * combining chars that should be discarded by the parser. + * However, to prevent programming errors, we're also being pedantic + * here and discard weirdly placed combining chars. This prevents + * situations were invalid content is parsed into the terminal and you + * might end up with cells containing only combining chars. + * + * Long story short: To get combining-characters working with old-fashioned + * terminal-emulation, we parse them exclusively for direct cell-writes. Other + * combining-characters are usually simply discarded and ignored. + */ +void term_line_append_combchar(term_line *line, unsigned int pos_x, uint32_t ucs4, term_age_t age) { + assert(line); + + if (pos_x >= line->width) + return; + + /* Unused cell? Skip appending any combining chars then. */ + if (term_char_is_null(line->cells[pos_x].ch)) + return; + + term_cell_append(line->cells + pos_x, ucs4, age); +} + +/** + * term_line_erase() - Erase parts of a line + * @line: line to modify + * @from: position to start the erase + * @num: number of cells to erase + * @attr: attributes to initialize erased cells with + * @age: current age for all modifications + * @keep_protected: true if protected cells should be kept + * + * This is the standard erase operation. It clears all cells in the targetted + * area and re-initializes them. Cells to the right are not shifted left, you + * must use DELETE to achieve that. Cells outside the visible area are skipped. + * + * If @keep_protected is true, protected cells will not be erased. + */ +void term_line_erase(term_line *line, unsigned int from, unsigned int num, const term_attr *attr, term_age_t age, bool keep_protected) { + term_cell *cell; + unsigned int i, last_protected; + + assert(line); + + if (from >= line->width) + return; + if (from + num < from || from + num > line->width) + num = line->width - from; + if (!num) + return; + + last_protected = 0; + for (i = 0; i < num; ++i) { + cell = line->cells + from + i; + if (keep_protected && cell->attr.protect) { + /* only count protected-cells inside the fill-region */ + if (from + i < line->fill) + last_protected = from + i; + + continue; + } + + term_cell_set(cell, TERM_CHAR_NULL, 0, attr, age); + } + + /* Adjust fill-state. This is a bit tricks, we can only adjust it in + * case the erase-region starts inside the fill-region and ends at the + * tail or beyond the fill-region. Otherwise, the current fill-state + * stays as it was. + * Furthermore, we must account for protected cells. The loop above + * ensures that protected-cells are only accounted for if they're + * inside the fill-region. */ + if (from < line->fill && from + num >= line->fill) + line->fill = MAX(from, last_protected); +} + +/** + * term_line_reset() - Reset a line + * @line: line to reset + * @attr: attributes to initialize all cells with + * @age: current age for all modifications + * + * This resets all visible cells of a line and sets their attributes and ages + * to @attr and @age. This is equivalent to erasing a whole line via + * term_line_erase(). + */ +void term_line_reset(term_line *line, const term_attr *attr, term_age_t age) { + assert(line); + + return term_line_erase(line, 0, line->width, attr, age, 0); +} + +/** + * term_line_link() - Link line in front of a list + * @line: line to link + * @first: member pointing to first entry + * @last: member pointing to last entry + * + * This links a line into a list of lines. The line is inserted at the front and + * must not be linked, yet. See the TERM_LINE_LINK() macro for an easier usage of + * this. + */ +void term_line_link(term_line *line, term_line **first, term_line **last) { + assert(line); + assert(first); + assert(last); + assert(!line->lines_prev); + assert(!line->lines_next); + + line->lines_prev = NULL; + line->lines_next = *first; + if (*first) + (*first)->lines_prev = line; + else + *last = line; + *first = line; +} + +/** + * term_line_link_tail() - Link line at tail of a list + * @line: line to link + * @first: member pointing to first entry + * @last: member pointing to last entry + * + * Same as term_line_link() but links the line at the tail. + */ +void term_line_link_tail(term_line *line, term_line **first, term_line **last) { + assert(line); + assert(first); + assert(last); + assert(!line->lines_prev); + assert(!line->lines_next); + + line->lines_next = NULL; + line->lines_prev = *last; + if (*last) + (*last)->lines_next = line; + else + *first = line; + *last = line; +} + +/** + * term_line_unlink() - Unlink line from a list + * @line: line to unlink + * @first: member pointing to first entry + * @last: member pointing to last entry + * + * This unlinks a previously linked line. See TERM_LINE_UNLINK() for an easier to + * use macro. + */ +void term_line_unlink(term_line *line, term_line **first, term_line **last) { + assert(line); + assert(first); + assert(last); + + if (line->lines_prev) + line->lines_prev->lines_next = line->lines_next; + else + *first = line->lines_next; + if (line->lines_next) + line->lines_next->lines_prev = line->lines_prev; + else + *last = line->lines_prev; + + line->lines_prev = NULL; + line->lines_next = NULL; +} diff --git a/src/libsystemd-terminal/term-wcwidth.c b/src/libsystemd-terminal/term-wcwidth.c new file mode 100644 index 000000000..443161cfb --- /dev/null +++ b/src/libsystemd-terminal/term-wcwidth.c @@ -0,0 +1,313 @@ +/* + * (Minimal changes made by David Herrmann, to make clean for inclusion in + * systemd. Original header follows.) + * + * This is an implementation of wcwidth() and wcswidth() (defined in + * IEEE Std 1002.1-2001) for Unicode. + * + * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html + * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html + * + * In fixed-width output devices, Latin characters all occupy a single + * "cell" position of equal width, whereas ideographic CJK characters + * occupy two such cells. Interoperability between terminal-line + * applications and (teletype-style) character terminals using the + * UTF-8 encoding requires agreement on which character should advance + * the cursor by how many cell positions. No established formal + * standards exist at present on which Unicode character shall occupy + * how many cell positions on character terminals. These routines are + * a first attempt of defining such behavior based on simple rules + * applied to data provided by the Unicode Consortium. + * + * For some graphical characters, the Unicode standard explicitly + * defines a character-cell width via the definition of the East Asian + * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. + * In all these cases, there is no ambiguity about which width a + * terminal shall use. For characters in the East Asian Ambiguous (A) + * class, the width choice depends purely on a preference of backward + * compatibility with either historic CJK or Western practice. + * Choosing single-width for these characters is easy to justify as + * the appropriate long-term solution, as the CJK practice of + * displaying these characters as double-width comes from historic + * implementation simplicity (8-bit encoded characters were displayed + * single-width and 16-bit ones double-width, even for Greek, + * Cyrillic, etc.) and not any typographic considerations. + * + * Much less clear is the choice of width for the Not East Asian + * (Neutral) class. Existing practice does not dictate a width for any + * of these characters. It would nevertheless make sense + * typographically to allocate two character cells to characters such + * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be + * represented adequately with a single-width glyph. The following + * routines at present merely assign a single-cell width to all + * neutral characters, in the interest of simplicity. This is not + * entirely satisfactory and should be reconsidered before + * establishing a formal standard in this area. At the moment, the + * decision which Not East Asian (Neutral) characters should be + * represented by double-width glyphs cannot yet be answered by + * applying a simple rule from the Unicode database content. Setting + * up a proper standard for the behavior of UTF-8 character terminals + * will require a careful analysis not only of each Unicode character, + * but also of each presentation form, something the author of these + * routines has avoided to do so far. + * + * http://www.unicode.org/unicode/reports/tr11/ + * + * Markus Kuhn -- 2007-05-26 (Unicode 5.0) + * + * Permission to use, copy, modify, and distribute this software + * for any purpose and without fee is hereby granted. The author + * disclaims all warranties with regard to this software. + * + * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ + +#include +#include "term-internal.h" + +struct interval { + int first; + int last; +}; + +/* auxiliary function for binary search in interval table */ +static int bisearch(wchar_t ucs, const struct interval *table, int max) { + int min = 0; + int mid; + + if (ucs < table[0].first || ucs > table[max].last) + return 0; + while (max >= min) { + mid = (min + max) / 2; + if (ucs > table[mid].last) + min = mid + 1; + else if (ucs < table[mid].first) + max = mid - 1; + else + return 1; + } + + return 0; +} + + +/* The following two functions define the column width of an ISO 10646 + * character as follows: + * + * - The null character (U+0000) has a column width of 0. + * + * - Other C0/C1 control characters and DEL will lead to a return + * value of -1. + * + * - Non-spacing and enclosing combining characters (general + * category code Mn or Me in the Unicode database) have a + * column width of 0. + * + * - SOFT HYPHEN (U+00AD) has a column width of 1. + * + * - Other format characters (general category code Cf in the Unicode + * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. + * + * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) + * have a column width of 0. + * + * - Spacing characters in the East Asian Wide (W) or East Asian + * Full-width (F) category as defined in Unicode Technical + * Report #11 have a column width of 2. + * + * - All remaining characters (including all printable + * ISO 8859-1 and WGL4 characters, Unicode control characters, + * etc.) have a column width of 1. + * + * This implementation assumes that wchar_t characters are encoded + * in ISO 10646. + */ + +int mk_wcwidth(wchar_t ucs) +{ + /* sorted list of non-overlapping intervals of non-spacing characters */ + /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ + static const struct interval combining[] = { + { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, + { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, + { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, + { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, + { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, + { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, + { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, + { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, + { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, + { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, + { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, + { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, + { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, + { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, + { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, + { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, + { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, + { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, + { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, + { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, + { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, + { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, + { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, + { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, + { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, + { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, + { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, + { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, + { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, + { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, + { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, + { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, + { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, + { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, + { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, + { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, + { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, + { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, + { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, + { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, + { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, + { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 }, + { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, + { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F }, + { 0xE0100, 0xE01EF } + }; + + /* test for 8-bit control characters */ + if (ucs == 0) + return 0; + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) + return -1; + + /* binary search in table of non-spacing characters */ + if (bisearch(ucs, combining, + sizeof(combining) / sizeof(struct interval) - 1)) + return 0; + + /* if we arrive here, ucs is not a combining or C0/C1 control character */ + + return 1 + + (ucs >= 0x1100 && + (ucs <= 0x115f || /* Hangul Jamo init. consonants */ + ucs == 0x2329 || ucs == 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && + ucs != 0x303f) || /* CJK ... Yi */ + (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ + (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ + (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */ + (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ + (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ + (ucs >= 0xffe0 && ucs <= 0xffe6) || + (ucs >= 0x20000 && ucs <= 0x2fffd) || + (ucs >= 0x30000 && ucs <= 0x3fffd))); +} + + +int mk_wcswidth(const wchar_t *pwcs, size_t n) +{ + int w, width = 0; + + for (;*pwcs && n-- > 0; pwcs++) + if ((w = mk_wcwidth(*pwcs)) < 0) + return -1; + else + width += w; + + return width; +} + + +/* + * The following functions are the same as mk_wcwidth() and + * mk_wcswidth(), except that spacing characters in the East Asian + * Ambiguous (A) category as defined in Unicode Technical Report #11 + * have a column width of 2. This variant might be useful for users of + * CJK legacy encodings who want to migrate to UCS without changing + * the traditional terminal character-width behaviour. It is not + * otherwise recommended for general use. + */ +int mk_wcwidth_cjk(wchar_t ucs) +{ + /* sorted list of non-overlapping intervals of East Asian Ambiguous + * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */ + static const struct interval ambiguous[] = { + { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 }, + { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 }, + { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 }, + { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 }, + { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED }, + { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA }, + { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 }, + { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B }, + { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 }, + { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 }, + { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 }, + { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE }, + { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 }, + { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA }, + { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 }, + { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB }, + { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB }, + { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 }, + { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 }, + { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 }, + { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 }, + { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 }, + { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 }, + { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 }, + { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC }, + { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 }, + { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 }, + { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 }, + { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 }, + { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 }, + { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 }, + { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B }, + { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 }, + { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 }, + { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E }, + { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 }, + { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 }, + { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F }, + { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 }, + { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF }, + { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B }, + { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 }, + { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 }, + { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 }, + { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 }, + { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 }, + { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 }, + { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 }, + { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 }, + { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F }, + { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF }, + { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD } + }; + + /* binary search in table of non-spacing characters */ + if (bisearch(ucs, ambiguous, + sizeof(ambiguous) / sizeof(struct interval) - 1)) + return 2; + + return mk_wcwidth(ucs); +} + + +int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n) +{ + int w, width = 0; + + for (;*pwcs && n-- > 0; pwcs++) + if ((w = mk_wcwidth_cjk(*pwcs)) < 0) + return -1; + else + width += w; + + return width; +} diff --git a/src/libsystemd-terminal/test-term-page.c b/src/libsystemd-terminal/test-term-page.c new file mode 100644 index 000000000..bba83ee40 --- /dev/null +++ b/src/libsystemd-terminal/test-term-page.c @@ -0,0 +1,463 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ +/*** + This file is part of systemd. + + Copyright (C) 2014 David Herrmann + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +/* + * Terminal Page/Line/Cell/Char Tests + * This tests internals of terminal page, line, cell and char handling. It + * relies on some implementation details, so it might need to be updated if + * those internals are changed. They should be fairly obvious, though. + */ + +#include +#include +#include +#include +#include +#include "macro.h" +#include "term-internal.h" +#include "util.h" + +#define MY_ASSERT_VALS __FILE__, __LINE__, __PRETTY_FUNCTION__ +#define MY_ASSERT_FORW _FILE, _LINE, _FUNC +#define MY_ASSERT_ARGS const char *_FILE, int _LINE, const char *_FUNC +#define MY_ASSERT(expr) \ + do { \ + if (_unlikely_(!(expr))) \ + log_assert_failed(#expr, _FILE, _LINE, _FUNC); \ + } while (false) \ + +/* + * Character Tests + * + * These tests rely on some implementation details of term_char_t, including + * the way we pack characters and the internal layout of "term_char_t". These + * tests have to be updated once we change the implementation. + */ + +#define PACK(v1, v2, v3) \ + TERM_CHAR_INIT( \ + (((((uint64_t)v1) & 0x1fffffULL) << 43) | \ + ((((uint64_t)v2) & 0x1fffffULL) << 22) | \ + ((((uint64_t)v3) & 0x1fffffULL) << 1) | \ + 0x1) \ + ) +#define PACK1(v1) PACK2((v1), 0x110000) +#define PACK2(v1, v2) PACK3((v1), (v2), 0x110000) +#define PACK3(v1, v2, v3) PACK((v1), (v2), (v3)) + +static void test_term_char_misc(void) { + term_char_t c, t; + + /* test TERM_CHAR_NULL handling */ + + c = TERM_CHAR_NULL; /* c is NULL */ + assert_se(term_char_same(c, TERM_CHAR_NULL)); + assert_se(term_char_equal(c, TERM_CHAR_NULL)); + assert_se(term_char_is_null(c)); + assert_se(term_char_is_null(TERM_CHAR_NULL)); + assert_se(!term_char_is_allocated(c)); + + /* test single char handling */ + + t = term_char_dup_append(c, 'A'); /* t is >A< now */ + assert_se(!term_char_same(c, t)); + assert_se(!term_char_equal(c, t)); + assert_se(!term_char_is_allocated(t)); + assert_se(!term_char_is_null(t)); + + /* test basic combined char handling */ + + t = term_char_dup_append(t, '~'); + t = term_char_dup_append(t, '^'); /* t is >A~^< now */ + assert_se(!term_char_same(c, t)); + assert_se(!term_char_is_allocated(t)); + assert_se(!term_char_is_null(t)); + + c = term_char_dup_append(c, 'A'); + c = term_char_dup_append(c, '~'); + c = term_char_dup_append(c, '^'); /* c is >A~^< now */ + assert_se(term_char_same(c, t)); + assert_se(term_char_equal(c, t)); + + /* test more than 2 comb-chars so the chars are allocated */ + + t = term_char_dup_append(t, '`'); /* t is >A~^`< now */ + c = term_char_dup_append(c, '`'); /* c is >A~^`< now */ + assert_se(!term_char_same(c, t)); + assert_se(term_char_equal(c, t)); + + /* test dup_append() on allocated chars */ + + term_char_free(t); + t = term_char_dup_append(c, '"'); /* t is >A~^`"< now */ + assert_se(!term_char_same(c, t)); + assert_se(!term_char_equal(c, t)); + c = term_char_merge(c, '"'); /* c is >A~^`"< now */ + assert_se(!term_char_same(c, t)); + assert_se(term_char_equal(c, t)); + + term_char_free(t); + term_char_free(c); +} + +static void test_term_char_packing(void) { + uint32_t seqs[][1024] = { + { -1 }, + { 0, -1 }, + { 'A', '~', -1 }, + { 'A', '~', 0, -1 }, + { 'A', '~', 'a', -1 }, + }; + term_char_t res[] = { + TERM_CHAR_NULL, + PACK1(0), + PACK2('A', '~'), + PACK3('A', '~', 0), + PACK3('A', '~', 'a'), + }; + uint32_t next; + unsigned int i, j; + term_char_t c = TERM_CHAR_NULL; + + /* + * This creates term_char_t objects based on the data in @seqs and + * compares the result to @res. Only basic packed types are tested, no + * allocations are done. + */ + + for (i = 0; i < ELEMENTSOF(seqs); ++i) { + for (j = 0; j < ELEMENTSOF(seqs[i]); ++j) { + next = seqs[i][j]; + if (next == (uint32_t)-1) + break; + + c = term_char_merge(c, next); + } + + assert_se(!memcmp(&c, &res[i], sizeof(c))); + c = term_char_free(c); + } +} + +static void test_term_char_allocating(void) { + uint32_t seqs[][1024] = { + { 0, -1 }, + { 'A', '~', -1 }, + { 'A', '~', 0, -1 }, + { 'A', '~', 'a', -1 }, + { 'A', '~', 'a', 'b', 'c', 'd', -1 }, + { 'A', '~', 'a', 'b', 'c', 'd', 0, '^', -1 }, + /* exceeding implementation-defined soft-limit of 64 */ + { 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', -1 }, + }; + term_char_t res[] = { + PACK1(0), + PACK2('A', '~'), + PACK3('A', '~', 0), + PACK3('A', '~', 'a'), + TERM_CHAR_NULL, /* allocated */ + TERM_CHAR_NULL, /* allocated */ + TERM_CHAR_NULL, /* allocated */ + }; + uint32_t str[][1024] = { + { 0, -1 }, + { 'A', '~', -1 }, + { 'A', '~', 0, -1 }, + { 'A', '~', 'a', -1 }, + { 'A', '~', 'a', 'b', 'c', 'd', -1 }, + { 'A', '~', 'a', 'b', 'c', 'd', 0, '^', -1 }, + { 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', + 'd', 'd', 'd', 'd', 'd', 'd', 'd', 'd', -1 }, + }; + size_t n; + uint32_t next; + unsigned int i, j; + const uint32_t *t; + + /* + * This builds term_char_t objects based on the data in @seqs. It + * compares the result to @res for packed chars, otherwise it requires + * them to be allocated. + * After that, we resolve the UCS-4 string and compare it to the + * expected strings in @str. + */ + + for (i = 0; i < ELEMENTSOF(seqs); ++i) { + _term_char_free_ term_char_t c = TERM_CHAR_NULL; + + for (j = 0; j < ELEMENTSOF(seqs[i]); ++j) { + next = seqs[i][j]; + if (next == (uint32_t)-1) + break; + + c = term_char_merge(c, next); + } + + /* we use TERM_CHAR_NULL as marker for allocated chars here */ + if (term_char_is_null(res[i])) + assert_se(term_char_is_allocated(c)); + else + assert_se(!memcmp(&c, &res[i], sizeof(c))); + + t = term_char_resolve(c, &n, NULL); + for (j = 0; j < ELEMENTSOF(str[i]); ++j) { + next = str[i][j]; + if (next == (uint32_t)-1) + break; + + assert_se(t[j] == next); + } + + assert_se(n == j); + } +} + +/* + * Line Tests + * + * The following tests work on term_line objects and verify their behavior when + * we modify them. To verify and set line layouts, we have two simple helpers + * to avoid harcoding the cell-verification all the time: + * line_set(): Set a line to a given layout + * line_assert(): Verify that a line has a given layout + * + * These functions take the line-layout encoded as a string and verify it + * against, or set it on, a term_line object. The format used to describe a + * line looks like this: + * example: "| | A | | | | | | 10 *AB* |" + * + * The string describes the contents of all cells of a line, separated by + * pipe-symbols ('|'). Whitespace are ignored, the leading pipe-symbol is + * optional. + * The description of each cell can contain an arbitrary amount of characters + * in the range 'A'-'Z', 'a'-'z'. All those are combined and used as term_char_t + * on this cell. Any numbers in the description are combined and are used as + * cell-age. + * The occurance of a '*'-symbol marks the cell as bold, '/' marks it as italic. + * You can use those characters multiple times, but only the first one has an + * effect. + * For further symbols, see parse_attr(). + * + * Therefore, the following descriptions are equivalent: + * 1) "| | /A* | | | | | | 10 *AB* |" + * 2) "| | /A** | | | | | | 10 *AB* |" + * 3) "| | A* // | | | | | | 10 *AB* |" + * 4) "| | A* // | | | | | | 1 *AB* 0 |" + * 5) "| | A* // | | | | | | A1B0* |" + * + * The parser isn't very strict about placement of alpha/numerical characters, + * but simply appends all found chars. Don't make use of that feature! It's + * just a stupid parser to simplify these tests. Make them readable! + */ + +static void parse_attr(char c, term_char_t *ch, term_attr *attr, term_age_t *age) { + switch (c) { + case ' ': + /* ignore */ + break; + case '0' ... '9': + /* increase age */ + *age = *age * 10; + *age = *age + c - '0'; + break; + case 'A' ... 'Z': + case 'a' ... 'z': + /* add to character */ + *ch = term_char_merge(*ch, c); + break; + case '*': + attr->bold = true; + break; + case '/': + attr->italic = true; + break; + default: + assert_se(0); + break; + } +} + +static void cell_assert(MY_ASSERT_ARGS, term_cell *c, term_char_t ch, const term_attr *attr, term_age_t age) { + MY_ASSERT(term_char_equal(c->ch, ch)); + MY_ASSERT(!memcmp(&c->attr, attr, sizeof(*attr))); + MY_ASSERT(c->age == age); +} +#define CELL_ASSERT(_cell, _ch, _attr, _age) cell_assert(MY_ASSERT_VALS, (_cell), (_ch), (_attr), (_age)) + +static void line_assert(MY_ASSERT_ARGS, term_line *l, const char *str, unsigned int fill) { + unsigned int cell_i; + term_char_t ch = TERM_CHAR_NULL; + term_attr attr = { }; + term_age_t age = TERM_AGE_NULL; + char c; + + assert_se(l->fill == fill); + + /* skip leading whitespace */ + while (*str == ' ') + ++str; + + /* skip leading '|' */ + if (*str == '|') + ++str; + + cell_i = 0; + while ((c = *str++)) { + switch (c) { + case '|': + /* end of cell-description; compare it */ + assert_se(cell_i < l->n_cells); + cell_assert(MY_ASSERT_FORW, + &l->cells[cell_i], + ch, + &attr, + age); + + ++cell_i; + ch = term_char_free(ch); + zero(attr); + age = TERM_AGE_NULL; + break; + default: + parse_attr(c, &ch, &attr, &age); + break; + } + } + + assert_se(cell_i == l->n_cells); +} +#define LINE_ASSERT(_line, _str, _fill) line_assert(MY_ASSERT_VALS, (_line), (_str), (_fill)) + +static void line_set(term_line *l, unsigned int pos, const char *str, bool insert_mode) { + term_char_t ch = TERM_CHAR_NULL; + term_attr attr = { }; + term_age_t age = TERM_AGE_NULL; + char c; + + while ((c = *str++)) + parse_attr(c, &ch, &attr, &age); + + term_line_write(l, pos, ch, 1, &attr, age, insert_mode); +} + +static void line_resize(term_line *l, unsigned int width, const term_attr *attr, term_age_t age) { + assert_se(term_line_reserve(l, width, attr, age, width) >= 0); + term_line_set_width(l, width); +} + +static void test_term_line_misc(void) { + term_line *l; + + assert_se(term_line_new(&l) >= 0); + assert_se(!term_line_free(l)); + + assert_se(term_line_new(NULL) < 0); + assert_se(!term_line_free(NULL)); + + assert_se(term_line_new(&l) >= 0); + assert_se(l->n_cells == 0); + assert_se(l->fill == 0); + assert_se(term_line_reserve(l, 16, NULL, 0, 0) >= 0); + assert_se(l->n_cells == 16); + assert_se(l->fill == 0); + assert_se(term_line_reserve(l, 512, NULL, 0, 0) >= 0); + assert_se(l->n_cells == 512); + assert_se(l->fill == 0); + assert_se(term_line_reserve(l, 16, NULL, 0, 0) >= 0); + assert_se(l->n_cells == 512); + assert_se(l->fill == 0); + assert_se(!term_line_free(l)); +} + +static void test_term_line_ops(void) { + term_line *l; + term_attr attr_regular = { }; + term_attr attr_bold = { .bold = true }; + term_attr attr_italic = { .italic = true }; + + assert_se(term_line_new(&l) >= 0); + line_resize(l, 8, NULL, 0); + assert_se(l->n_cells == 8); + + LINE_ASSERT(l, "| | | | | | | | |", 0); + + term_line_write(l, 4, TERM_CHAR_NULL, 0, NULL, TERM_AGE_NULL, 0); + LINE_ASSERT(l, "| | | | | | | | |", 5); + + term_line_write(l, 1, PACK1('A'), 1, NULL, TERM_AGE_NULL, 0); + LINE_ASSERT(l, "| |A| | | | | | |", 5); + + term_line_write(l, 8, PACK2('A', 'B'), 1, NULL, TERM_AGE_NULL, 0); + LINE_ASSERT(l, "| |A| | | | | | |", 5); + + term_line_write(l, 7, PACK2('A', 'B'), 1, &attr_regular, 10, 0); + LINE_ASSERT(l, "| |A| | | | | | 10 AB |", 8); + + term_line_write(l, 7, PACK2('A', 'B'), 1, &attr_bold, 10, 0); + LINE_ASSERT(l, "| |A| | | | | | 10 *AB* |", 8); + + term_line_reset(l, NULL, TERM_AGE_NULL); + + LINE_ASSERT(l, "| | | | | | | | |", 0); + line_set(l, 2, "*wxyz* 8", 0); + line_set(l, 3, "/wxyz/ 8", 0); + LINE_ASSERT(l, "| | | *wxyz* 8 | /wxyz/ 8 | | | | |", 4); + line_set(l, 2, "*abc* 9", true); + LINE_ASSERT(l, "| | | *abc* 9 | *wxyz* 9 | /wxyz/ 9 | 9 | 9 | 9 |", 5); + line_set(l, 7, "*abc* 10", true); + LINE_ASSERT(l, "| | | *abc* 9 | *wxyz* 9 | /wxyz/ 9 | 9 | 9 | *abc* 10 |", 8); + + term_line_erase(l, 6, 1, NULL, 11, 0); + LINE_ASSERT(l, "| | | *abc* 9 | *wxyz* 9 | /wxyz/ 9 | 9 | 11 | *abc* 10 |", 8); + term_line_erase(l, 6, 2, &attr_italic, 12, 0); + LINE_ASSERT(l, "| | | *abc* 9 | *wxyz* 9 | /wxyz/ 9 | 9 | 12 // | 12 // |", 6); + term_line_erase(l, 7, 2, &attr_regular, 13, 0); + LINE_ASSERT(l, "| | | *abc* 9 | *wxyz* 9 | /wxyz/ 9 | 9 | 12 // | 13 |", 6); + term_line_delete(l, 1, 3, &attr_bold, 14); + LINE_ASSERT(l, "| | /wxyz/ 14 | 14 | 14 // | 14 | 14 ** | 14 ** | 14 ** |", 3); + term_line_insert(l, 2, 2, &attr_regular, 15); + LINE_ASSERT(l, "| | /wxyz/ 14 | 15 | 15 | 15 | 15 // | 15 | 15 ** |", 5); + + assert_se(!term_line_free(l)); +} + +int main(int argc, char *argv[]) { + test_term_char_misc(); + test_term_char_packing(); + test_term_char_allocating(); + + test_term_line_misc(); + test_term_line_ops(); + + return 0; +}