X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Fshared%2Futf8.c;h=c559c13678d62b7cd275f05721febe73b5205924;hb=dc92e62c6c34f242aa54aa187e50a94ed7695c51;hp=732f0f00ca20d2003cd3c96122cf0099abec15bf;hpb=7991ac34ab08421415b907e42775c5539a4a5bbb;p=elogind.git diff --git a/src/shared/utf8.c b/src/shared/utf8.c index 732f0f00c..c559c1367 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -3,6 +3,7 @@ /*** This file is part of systemd. + Copyright 2008-2011 Kay Sievers Copyright 2012 Lennart Poettering systemd is free software; you can redistribute it and/or modify it @@ -19,7 +20,7 @@ along with systemd; If not, see . ***/ -/* This file is based on the GLIB utf8 validation functions. The +/* Parts of this file are based on the GLIB utf8 validation functions. The * original license text follows. */ /* gutf8.c - Operations on UTF-8 strings. @@ -97,7 +98,7 @@ static int utf8_encoded_expected_len(const char *str) { } /* decode one unicode char */ -static int utf8_encoded_to_unichar(const char *str) { +int utf8_encoded_to_unichar(const char *str) { int unichar; int len; int i; @@ -135,19 +136,21 @@ static int utf8_encoded_to_unichar(const char *str) { return unichar; } -bool utf8_is_printable(const char* str, size_t length) { +bool utf8_is_printable_newline(const char* str, size_t length, bool newline) { const uint8_t *p; assert(str); - for (p = (const uint8_t*) str; length; p++) { + for (p = (const uint8_t*) str; length;) { int encoded_len = utf8_encoded_valid_unichar((const char *)p); - int32_t val = utf8_encoded_to_unichar((const char*)p); + int val = utf8_encoded_to_unichar((const char*)p); - if (encoded_len < 0 || val < 0 || is_unicode_control(val)) + if (encoded_len < 0 || val < 0 || is_unicode_control(val) || + (!newline && val == '\n')) return false; length -= encoded_len; + p += encoded_len; } return true; @@ -159,7 +162,9 @@ const char *utf8_is_valid(const char *str) { assert(str); for (p = (const uint8_t*) str; *p; ) { - int len = utf8_encoded_valid_unichar((const char *)p); + int len; + + len = utf8_encoded_valid_unichar((const char *)p); if (len < 0) return NULL; @@ -170,37 +175,42 @@ const char *utf8_is_valid(const char *str) { return str; } -char *ascii_is_valid(const char *str) { - const char *p; +char *utf8_escape_invalid(const char *str) { + char *p, *s; assert(str); - for (p = str; *p; p++) - if ((unsigned char) *p >= 128) - return NULL; + p = s = malloc(strlen(str) * 4 + 1); + if (!p) + return NULL; - return (char*) str; -} + while (*str) { + int len; -char *ascii_filter(const char *str) { - const char *s; - char *r, *d; - size_t l; + len = utf8_encoded_valid_unichar(str); + if (len > 0) { + s = mempcpy(s, str, len); + str += len; + } else { + s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER)); + str += 1; + } + } + *s = '\0'; - assert(str); + return p; +} - l = strlen(str); - r = malloc(l + 1); - if (!r) - return NULL; +char *ascii_is_valid(const char *str) { + const char *p; - for (s = str, d = r; *s; s++) - if ((unsigned char) *s < 128) - *(d++) = *s; + assert(str); - *d = 0; + for (p = str; *p; p++) + if ((unsigned char) *p >= 128) + return NULL; - return r; + return (char*) str; } char *utf16_to_utf8(const void *s, size_t length) { @@ -285,49 +295,3 @@ int utf8_encoded_valid_unichar(const char *str) { return len; } - -int is_utf8_encoding_whitelisted(char c, const char *white) { - if ((c >= '0' && c <= '9') || - (c >= 'A' && c <= 'Z') || - (c >= 'a' && c <= 'z') || - strchr("#+-.:=@_", c) != NULL || - (white != NULL && strchr(white, c) != NULL)) - return 1; - return 0; -} - -int udev_encode_string(const char *str, char *str_enc, size_t len) { - size_t i, j; - - if (str == NULL || str_enc == NULL) - return -1; - - for (i = 0, j = 0; str[i] != '\0'; i++) { - int seqlen; - - seqlen = utf8_encoded_valid_unichar(&str[i]); - if (seqlen > 1) { - if (len-j < (size_t)seqlen) - goto err; - memcpy(&str_enc[j], &str[i], seqlen); - j += seqlen; - i += (seqlen-1); - } else if (str[i] == '\\' || !is_utf8_encoding_whitelisted(str[i], NULL)) { - if (len-j < 4) - goto err; - sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]); - j += 4; - } else { - if (len-j < 1) - goto err; - str_enc[j] = str[i]; - j++; - } - } - if (len-j < 1) - goto err; - str_enc[j] = '\0'; - return 0; -err: - return -1; -}