1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2012 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 /* This file is based on the GLIB utf8 validation functions. The
23 * original license text follows. */
25 /* gutf8.c - Operations on UTF-8 strings.
27 * Copyright (C) 1999 Tom Tromey
28 * Copyright (C) 2000 Red Hat, Inc.
30 * This library is free software; you can redistribute it and/or
31 * modify it under the terms of the GNU Lesser General Public
32 * License as published by the Free Software Foundation; either
33 * version 2 of the License, or (at your option) any later version.
35 * This library is distributed in the hope that it will be useful,
36 * but WITHOUT ANY WARRANTY; without even the implied warranty of
37 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
38 * Lesser General Public License for more details.
40 * You should have received a copy of the GNU Lesser General Public
41 * License along with this library; if not, write to the
42 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
43 * Boston, MA 02111-1307, USA.
54 #define FILTER_CHAR '_'
56 static inline bool is_unicode_valid(uint32_t ch) {
58 if (ch >= 0x110000) /* End of unicode space */
60 if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
62 if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
64 if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
70 static inline bool is_continuation_char(uint8_t ch) {
71 if ((ch & 0xc0) != 0x80) /* 10xxxxxx */
76 static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
81 static char* utf8_validate(const char *str, char *output) {
84 const uint8_t *p, *last;
90 o = (uint8_t*) output;
91 for (p = (const uint8_t*) str; *p; p++) {
98 if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
101 val = (uint32_t) (*p & 0x1e);
103 } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
106 val = (uint32_t) (*p & 0x0f);
108 } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
111 val = (uint32_t) (*p & 0x07);
116 if (!is_continuation_char(*p))
118 merge_continuation_char(&val, *p);
122 if (!is_continuation_char(*p))
124 merge_continuation_char(&val, *p);
128 if (!is_continuation_char(*p))
130 merge_continuation_char(&val, *p);
135 if (!is_unicode_valid(val))
139 memcpy(o, last, (size_t) size);
148 p = last; /* We retry at the next character */
168 char* utf8_is_valid (const char *str) {
169 return utf8_validate(str, NULL);
172 char* utf8_filter (const char *str) {
177 new_str = malloc(strlen(str) + 1);
181 return utf8_validate(str, new_str);
184 char *ascii_is_valid(const char *str) {
189 for (p = str; *p; p++)
190 if ((unsigned char) *p >= 128)
196 char *ascii_filter(const char *str) {
207 for (s = r, d = r; *s; s++)
208 if ((unsigned char) *s < 128)