chiark / gitweb /
journalctl: show any printable Unicode character
[elogind.git] / src / shared / utf8.c
index 11619dce2fa0b9e82d4d680af4e8f389433a881d..a6f5b3f9e5dff38fc28efd4eb1d47943a2f49a87 100644 (file)
@@ -6,16 +6,16 @@
   Copyright 2012 Lennart Poettering
 
   systemd is free software; you can redistribute it and/or modify it
-  under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
   (at your option) any later version.
 
   systemd is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-  General Public License for more details.
+  Lesser General Public License for more details.
 
-  You should have received a copy of the GNU General Public License
+  You should have received a copy of the GNU Lesser General Public License
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
@@ -78,6 +78,77 @@ static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
         *u_ch |= ch & 0x3f;
 }
 
+static bool is_unicode_control(uint32_t ch) {
+
+        /*
+          0 to ' '-1 is the C0 range.
+          DEL=0x7F, and DEL+1 to 0x9F is C1 range.
+          '\t' is in C0 range, but more or less harmless and commonly used.
+        */
+
+        return (ch < ' ' && ch != '\t') ||
+                (0x7F <= ch && ch <= 0x9F);
+}
+
+char* utf8_is_printable_n(const char* str, size_t length) {
+        uint32_t val = 0;
+        uint32_t min = 0;
+        const uint8_t *p;
+
+        assert(str);
+
+        for (p = (const uint8_t*) str; length; p++, length--) {
+                if (*p < 128) {
+                        val = *p;
+                } else {
+                        if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
+                                min = 128;
+                                val = (uint32_t) (*p & 0x1e);
+                                goto ONE_REMAINING;
+                        } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
+                                min = (1 << 11);
+                                val = (uint32_t) (*p & 0x0f);
+                                goto TWO_REMAINING;
+                        } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
+                                min = (1 << 16);
+                                val = (uint32_t) (*p & 0x07);
+                        } else
+                                goto error;
+
+                        p++;
+                        length--;
+                        if (!length || !is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                TWO_REMAINING:
+                        p++;
+                        length--;
+                        if (!is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                ONE_REMAINING:
+                        p++;
+                        length--;
+                        if (!is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                        if (val < min)
+                                goto error;
+                }
+
+                if (is_unicode_control(val))
+                        goto error;
+        }
+
+        return (char*) str;
+
+error:
+        return NULL;
+}
+
 static char* utf8_validate(const char *str, char *output) {
         uint32_t val = 0;
         uint32_t min = 0;