X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Fshared%2Futf8.c;h=c559c13678d62b7cd275f05721febe73b5205924;hb=8d2a6145334257c8a9ceabc9dd52dff06cca818e;hp=a8e28accd3b20e41aad631bda2d08a371cd5ee4a;hpb=036ae95ac4a425475b58e1a8e53d5c52b2c8a218;p=elogind.git diff --git a/src/shared/utf8.c b/src/shared/utf8.c index a8e28accd..c559c1367 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -98,7 +98,7 @@ static int utf8_encoded_expected_len(const char *str) { } /* decode one unicode char */ -static int utf8_encoded_to_unichar(const char *str) { +int utf8_encoded_to_unichar(const char *str) { int unichar; int len; int i; @@ -136,19 +136,21 @@ static int utf8_encoded_to_unichar(const char *str) { return unichar; } -bool utf8_is_printable(const char* str, size_t length) { +bool utf8_is_printable_newline(const char* str, size_t length, bool newline) { const uint8_t *p; assert(str); - for (p = (const uint8_t*) str; length; p++) { + for (p = (const uint8_t*) str; length;) { int encoded_len = utf8_encoded_valid_unichar((const char *)p); - int32_t val = utf8_encoded_to_unichar((const char*)p); + int val = utf8_encoded_to_unichar((const char*)p); - if (encoded_len < 0 || val < 0 || is_unicode_control(val)) + if (encoded_len < 0 || val < 0 || is_unicode_control(val) || + (!newline && val == '\n')) return false; length -= encoded_len; + p += encoded_len; } return true; @@ -160,7 +162,9 @@ const char *utf8_is_valid(const char *str) { assert(str); for (p = (const uint8_t*) str; *p; ) { - int len = utf8_encoded_valid_unichar((const char *)p); + int len; + + len = utf8_encoded_valid_unichar((const char *)p); if (len < 0) return NULL; @@ -171,37 +175,42 @@ const char *utf8_is_valid(const char *str) { return str; } -char *ascii_is_valid(const char *str) { - const char *p; +char *utf8_escape_invalid(const char *str) { + char *p, *s; assert(str); - for (p = str; *p; p++) - if ((unsigned char) *p >= 128) - return NULL; + p = s = malloc(strlen(str) * 4 + 1); + if (!p) + return NULL; - return (char*) str; -} + while (*str) { + int len; -char *ascii_filter(const char *str) { - const char *s; - char *r, *d; - size_t l; + len = utf8_encoded_valid_unichar(str); + if (len > 0) { + s = mempcpy(s, str, len); + str += len; + } else { + s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER)); + str += 1; + } + } + *s = '\0'; - assert(str); + return p; +} - l = strlen(str); - r = malloc(l + 1); - if (!r) - return NULL; +char *ascii_is_valid(const char *str) { + const char *p; - for (s = str, d = r; *s; s++) - if ((unsigned char) *s < 128) - *(d++) = *s; + assert(str); - *d = 0; + for (p = str; *p; p++) + if ((unsigned char) *p >= 128) + return NULL; - return r; + return (char*) str; } char *utf16_to_utf8(const void *s, size_t length) {