X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Fshared%2Futf8.c;h=c559c13678d62b7cd275f05721febe73b5205924;hb=d9c67ea112724e271c39553d966eae612e272e34;hp=c3d97cc783f06e2805a4a2394a162e87a3b8b0e1;hpb=8f6ce71fe79d897b67157d92869db87ee2042af6;p=elogind.git diff --git a/src/shared/utf8.c b/src/shared/utf8.c index c3d97cc78..c559c1367 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -3,6 +3,7 @@ /*** This file is part of systemd. + Copyright 2008-2011 Kay Sievers Copyright 2012 Lennart Poettering systemd is free software; you can redistribute it and/or modify it @@ -19,7 +20,7 @@ along with systemd; If not, see . ***/ -/* This file is based on the GLIB utf8 validation functions. The +/* Parts of this file are based on the GLIB utf8 validation functions. The * original license text follows. */ /* gutf8.c - Operations on UTF-8 strings. @@ -97,7 +98,7 @@ static int utf8_encoded_expected_len(const char *str) { } /* decode one unicode char */ -static int utf8_encoded_to_unichar(const char *str) { +int utf8_encoded_to_unichar(const char *str) { int unichar; int len; int i; @@ -135,19 +136,21 @@ static int utf8_encoded_to_unichar(const char *str) { return unichar; } -bool utf8_is_printable(const char* str, size_t length) { +bool utf8_is_printable_newline(const char* str, size_t length, bool newline) { const uint8_t *p; assert(str); - for (p = (const uint8_t*) str; length; p++) { + for (p = (const uint8_t*) str; length;) { int encoded_len = utf8_encoded_valid_unichar((const char *)p); - int32_t val = utf8_encoded_to_unichar((const char*)p); + int val = utf8_encoded_to_unichar((const char*)p); - if (encoded_len < 0 || val < 0 || is_unicode_control(val)) + if (encoded_len < 0 || val < 0 || is_unicode_control(val) || + (!newline && val == '\n')) return false; length -= encoded_len; + p += encoded_len; } return true; @@ -159,7 +162,9 @@ const char *utf8_is_valid(const char *str) { assert(str); for (p = (const uint8_t*) str; *p; ) { - int len = utf8_encoded_valid_unichar((const char *)p); + int len; + + len = utf8_encoded_valid_unichar((const char *)p); if (len < 0) return NULL; @@ -170,37 +175,42 @@ const char *utf8_is_valid(const char *str) { return str; } -char *ascii_is_valid(const char *str) { - const char *p; +char *utf8_escape_invalid(const char *str) { + char *p, *s; assert(str); - for (p = str; *p; p++) - if ((unsigned char) *p >= 128) - return NULL; + p = s = malloc(strlen(str) * 4 + 1); + if (!p) + return NULL; - return (char*) str; -} + while (*str) { + int len; -char *ascii_filter(const char *str) { - const char *s; - char *r, *d; - size_t l; + len = utf8_encoded_valid_unichar(str); + if (len > 0) { + s = mempcpy(s, str, len); + str += len; + } else { + s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER)); + str += 1; + } + } + *s = '\0'; - assert(str); + return p; +} - l = strlen(str); - r = malloc(l + 1); - if (!r) - return NULL; +char *ascii_is_valid(const char *str) { + const char *p; - for (s = str, d = r; *s; s++) - if ((unsigned char) *s < 128) - *(d++) = *s; + assert(str); - *d = 0; + for (p = str; *p; p++) + if ((unsigned char) *p >= 128) + return NULL; - return r; + return (char*) str; } char *utf16_to_utf8(const void *s, size_t length) {