chiark / gitweb /
journalctl: show any printable Unicode character
[elogind.git] / src / shared / utf8.c
index 13f0521..a6f5b3f 100644 (file)
@@ -78,6 +78,77 @@ static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
         *u_ch |= ch & 0x3f;
 }
 
+static bool is_unicode_control(uint32_t ch) {
+
+        /*
+          0 to ' '-1 is the C0 range.
+          DEL=0x7F, and DEL+1 to 0x9F is C1 range.
+          '\t' is in C0 range, but more or less harmless and commonly used.
+        */
+
+        return (ch < ' ' && ch != '\t') ||
+                (0x7F <= ch && ch <= 0x9F);
+}
+
+char* utf8_is_printable_n(const char* str, size_t length) {
+        uint32_t val = 0;
+        uint32_t min = 0;
+        const uint8_t *p;
+
+        assert(str);
+
+        for (p = (const uint8_t*) str; length; p++, length--) {
+                if (*p < 128) {
+                        val = *p;
+                } else {
+                        if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
+                                min = 128;
+                                val = (uint32_t) (*p & 0x1e);
+                                goto ONE_REMAINING;
+                        } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
+                                min = (1 << 11);
+                                val = (uint32_t) (*p & 0x0f);
+                                goto TWO_REMAINING;
+                        } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
+                                min = (1 << 16);
+                                val = (uint32_t) (*p & 0x07);
+                        } else
+                                goto error;
+
+                        p++;
+                        length--;
+                        if (!length || !is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                TWO_REMAINING:
+                        p++;
+                        length--;
+                        if (!is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                ONE_REMAINING:
+                        p++;
+                        length--;
+                        if (!is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                        if (val < min)
+                                goto error;
+                }
+
+                if (is_unicode_control(val))
+                        goto error;
+        }
+
+        return (char*) str;
+
+error:
+        return NULL;
+}
+
 static char* utf8_validate(const char *str, char *output) {
         uint32_t val = 0;
         uint32_t min = 0;