X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Fshared%2Futf8.c;h=655cc771d4030ad656993c2c3a4ca49381591bb7;hb=b32ff512191bf873266ee8067f6f6c8a30c96a5e;hp=62e2803919555d58af12f3a5960f9ca14812e9f9;hpb=23757887f768b0b7339239cc98aee879d9f4d87f;p=elogind.git diff --git a/src/shared/utf8.c b/src/shared/utf8.c index 62e280391..655cc771d 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -49,6 +49,7 @@ #include #include "utf8.h" +#include "util.h" #define FILTER_CHAR '_' @@ -85,11 +86,11 @@ static bool is_unicode_control(uint32_t ch) { '\t' is in C0 range, but more or less harmless and commonly used. */ - return (ch < ' ' && ch != '\t') || + return (ch < ' ' && ch != '\t' && ch != '\n') || (0x7F <= ch && ch <= 0x9F); } -char* utf8_is_printable_n(const char* str, size_t length) { +bool utf8_is_printable(const char* str, size_t length) { uint32_t val = 0; uint32_t min = 0; const uint8_t *p; @@ -112,40 +113,37 @@ char* utf8_is_printable_n(const char* str, size_t length) { min = (1 << 16); val = (uint32_t) (*p & 0x07); } else - goto error; + return false; p++; length--; if (!length || !is_continuation_char(*p)) - goto error; + return false; merge_continuation_char(&val, *p); TWO_REMAINING: p++; length--; if (!is_continuation_char(*p)) - goto error; + return false; merge_continuation_char(&val, *p); ONE_REMAINING: p++; length--; if (!is_continuation_char(*p)) - goto error; + return false; merge_continuation_char(&val, *p); if (val < min) - goto error; + return false; } if (is_unicode_control(val)) - goto error; + return false; } - return (char*) str; - -error: - return NULL; + return true; } static char* utf8_validate(const char *str, char *output) { @@ -283,3 +281,39 @@ char *ascii_filter(const char *str) { return r; } + +char *utf16_to_utf8(const void *s, size_t length) { + char *r; + const uint8_t *f; + uint8_t *t; + + r = new(char, (length*3+1)/2 + 1); + if (!r) + return NULL; + + t = (uint8_t*) r; + + for (f = s; f < (const uint8_t*) s + length; f += 2) { + uint16_t c; + + c = (f[1] << 8) | f[0]; + + if (c == 0) { + *t = 0; + return r; + } else if (c < 0x80) { + *(t++) = (uint8_t) c; + } else if (c < 0x800) { + *(t++) = (uint8_t) (0xc0 | (c >> 6)); + *(t++) = (uint8_t) (0x80 | (c & 0x3f)); + } else { + *(t++) = (uint8_t) (0xe0 | (c >> 12)); + *(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); + *(t++) = (uint8_t) (0x80 | (c & 0x3f)); + } + } + + *t = 0; + + return r; +}