X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Fshared%2Futf8.c;h=655cc771d4030ad656993c2c3a4ca49381591bb7;hb=b32ff512191bf873266ee8067f6f6c8a30c96a5e;hp=11619dce2fa0b9e82d4d680af4e8f389433a881d;hpb=d7832d2c6e0ef5f2839a2296c1cc2fc85c7d9632;p=elogind.git diff --git a/src/shared/utf8.c b/src/shared/utf8.c index 11619dce2..655cc771d 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -6,16 +6,16 @@ Copyright 2012 Lennart Poettering systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. systemd is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + Lesser General Public License for more details. - You should have received a copy of the GNU General Public License + You should have received a copy of the GNU Lesser General Public License along with systemd; If not, see . ***/ @@ -28,19 +28,18 @@ * Copyright (C) 2000 Red Hat, Inc. * * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public + * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include @@ -50,6 +49,7 @@ #include #include "utf8.h" +#include "util.h" #define FILTER_CHAR '_' @@ -78,6 +78,74 @@ static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) { *u_ch |= ch & 0x3f; } +static bool is_unicode_control(uint32_t ch) { + + /* + 0 to ' '-1 is the C0 range. + DEL=0x7F, and DEL+1 to 0x9F is C1 range. + '\t' is in C0 range, but more or less harmless and commonly used. + */ + + return (ch < ' ' && ch != '\t' && ch != '\n') || + (0x7F <= ch && ch <= 0x9F); +} + +bool utf8_is_printable(const char* str, size_t length) { + uint32_t val = 0; + uint32_t min = 0; + const uint8_t *p; + + assert(str); + + for (p = (const uint8_t*) str; length; p++, length--) { + if (*p < 128) { + val = *p; + } else { + if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */ + min = 128; + val = (uint32_t) (*p & 0x1e); + goto ONE_REMAINING; + } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/ + min = (1 << 11); + val = (uint32_t) (*p & 0x0f); + goto TWO_REMAINING; + } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */ + min = (1 << 16); + val = (uint32_t) (*p & 0x07); + } else + return false; + + p++; + length--; + if (!length || !is_continuation_char(*p)) + return false; + merge_continuation_char(&val, *p); + + TWO_REMAINING: + p++; + length--; + if (!is_continuation_char(*p)) + return false; + merge_continuation_char(&val, *p); + + ONE_REMAINING: + p++; + length--; + if (!is_continuation_char(*p)) + return false; + merge_continuation_char(&val, *p); + + if (val < min) + return false; + } + + if (is_unicode_control(val)) + return false; + } + + return true; +} + static char* utf8_validate(const char *str, char *output) { uint32_t val = 0; uint32_t min = 0; @@ -194,7 +262,8 @@ char *ascii_is_valid(const char *str) { } char *ascii_filter(const char *str) { - char *r, *s, *d; + const char *s; + char *r, *d; size_t l; assert(str); @@ -204,7 +273,7 @@ char *ascii_filter(const char *str) { if (!r) return NULL; - for (s = r, d = r; *s; s++) + for (s = str, d = r; *s; s++) if ((unsigned char) *s < 128) *(d++) = *s; @@ -212,3 +281,39 @@ char *ascii_filter(const char *str) { return r; } + +char *utf16_to_utf8(const void *s, size_t length) { + char *r; + const uint8_t *f; + uint8_t *t; + + r = new(char, (length*3+1)/2 + 1); + if (!r) + return NULL; + + t = (uint8_t*) r; + + for (f = s; f < (const uint8_t*) s + length; f += 2) { + uint16_t c; + + c = (f[1] << 8) | f[0]; + + if (c == 0) { + *t = 0; + return r; + } else if (c < 0x80) { + *(t++) = (uint8_t) c; + } else if (c < 0x800) { + *(t++) = (uint8_t) (0xc0 | (c >> 6)); + *(t++) = (uint8_t) (0x80 | (c & 0x3f)); + } else { + *(t++) = (uint8_t) (0xe0 | (c >> 12)); + *(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); + *(t++) = (uint8_t) (0x80 | (c & 0x3f)); + } + } + + *t = 0; + + return r; +}