1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Dave Reisner
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include "alloc-util.h"
22 #include "string-util.h"
26 static void test_utf8_is_printable(void) {
27 assert_se(utf8_is_printable("ascii is valid\tunicode", 22));
28 assert_se(utf8_is_printable("\342\204\242", 3));
29 assert_se(!utf8_is_printable("\341\204", 2));
30 assert_se(utf8_is_printable("ąę", 4));
33 static void test_utf8_is_valid(void) {
34 assert_se(utf8_is_valid("ascii is valid unicode"));
35 assert_se(utf8_is_valid("\342\204\242"));
36 assert_se(!utf8_is_valid("\341\204"));
39 static void test_ascii_is_valid(void) {
40 assert_se(ascii_is_valid("alsdjf\t\vbarr\nba z"));
41 assert_se(!ascii_is_valid("\342\204\242"));
42 assert_se(!ascii_is_valid("\341\204"));
45 static void test_utf8_encoded_valid_unichar(void) {
46 assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
47 assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
48 assert_se(utf8_encoded_valid_unichar("a") == 1);
49 assert_se(utf8_encoded_valid_unichar("\341\204") < 0);
50 assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0);
53 static void test_utf8_escaping(void) {
54 _cleanup_free_ char *p1, *p2, *p3;
56 p1 = utf8_escape_invalid("goo goo goo");
58 assert_se(utf8_is_valid(p1));
60 p2 = utf8_escape_invalid("\341\204\341\204");
62 assert_se(utf8_is_valid(p2));
64 p3 = utf8_escape_invalid("\341\204");
66 assert_se(utf8_is_valid(p3));
69 static void test_utf8_escaping_printable(void) {
70 _cleanup_free_ char *p1, *p2, *p3, *p4, *p5, *p6;
72 p1 = utf8_escape_non_printable("goo goo goo");
74 assert_se(utf8_is_valid(p1));
76 p2 = utf8_escape_non_printable("\341\204\341\204");
78 assert_se(utf8_is_valid(p2));
80 p3 = utf8_escape_non_printable("\341\204");
82 assert_se(utf8_is_valid(p3));
84 p4 = utf8_escape_non_printable("ąę\n가너도루\n1234\n\341\204\341\204\n\001 \019\20\a");
86 assert_se(utf8_is_valid(p4));
88 p5 = utf8_escape_non_printable("\001 \019\20\a");
90 assert_se(utf8_is_valid(p5));
92 p6 = utf8_escape_non_printable("\xef\xbf\x30\x13");
94 assert_se(utf8_is_valid(p6));
97 static void test_utf16_to_utf8(void) {
99 const uint16_t utf16[] = { htole16('a'), htole16(0xd800), htole16('b'), htole16(0xdc00), htole16('c'), htole16(0xd801), htole16(0xdc37) };
100 const char utf8[] = { 'a', 'b', 'c', 0xf0, 0x90, 0x90, 0xb7, 0 };
102 a = utf16_to_utf8(utf16, 14);
104 assert_se(streq(a, utf8));
109 static void test_utf8_n_codepoints(void) {
110 assert_se(utf8_n_codepoints("abc") == 3);
111 assert_se(utf8_n_codepoints("zażółcić gęślą jaźń") == 19);
112 assert_se(utf8_n_codepoints("串") == 1);
113 assert_se(utf8_n_codepoints("") == 0);
114 assert_se(utf8_n_codepoints("…👊🔪💐…") == 5);
115 assert_se(utf8_n_codepoints("\xF1") == (size_t) -1);
118 int main(int argc, char *argv[]) {
119 test_utf8_is_valid();
120 test_utf8_is_printable();
121 test_ascii_is_valid();
122 test_utf8_encoded_valid_unichar();
123 test_utf8_escaping();
124 test_utf8_escaping_printable();
125 test_utf16_to_utf8();
126 test_utf8_n_codepoints();