chiark / gitweb /
Some minor debianization fixes
[disorder] / lib / t-unicode.c
CommitLineData
b90f122b
RK
1/*
2 * This file is part of DisOrder.
3 * Copyright (C) 2005, 2007, 2008 Richard Kettlewell
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18 * USA
19 */
20#include "test.h"
21
22/** @brief Open a Unicode test file */
23static FILE *open_unicode_test(const char *path) {
24 const char *base;
25 FILE *fp;
26 char buffer[1024];
27 int w;
28
29 if((base = strrchr(path, '/')))
30 ++base;
31 else
32 base = path;
33 if(!(fp = fopen(base, "r"))) {
34 snprintf(buffer, sizeof buffer,
35 "wget http://www.unicode.org/Public/5.0.0/ucd/%s", path);
36 if((w = system(buffer)))
37 fatal(0, "%s: %s", buffer, wstat(w));
38 if(chmod(base, 0444) < 0)
39 fatal(errno, "chmod %s", base);
40 if(!(fp = fopen(base, "r")))
41 fatal(errno, "%s", base);
42 }
43 return fp;
44}
45
46/** @brief Run breaking tests for utf32_grapheme_boundary() etc */
47static void breaktest(const char *path,
48 int (*breakfn)(const uint32_t *, size_t, size_t)) {
49 FILE *fp = open_unicode_test(path);
50 int lineno = 0;
51 char *l, *lp;
52 size_t bn, n;
53 char break_allowed[1024];
54 uint32_t buffer[1024];
55
56 while(!inputline(path, fp, &l, '\n')) {
57 ++lineno;
58 if(l[0] == '#') continue;
59 bn = 0;
60 lp = l;
61 while(*lp) {
62 if(*lp == ' ' || *lp == '\t') {
63 ++lp;
64 continue;
65 }
66 if(*lp == '#')
67 break;
68 if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0xB7) {
69 /* 00F7 DIVISION SIGN */
70 break_allowed[bn] = 1;
71 lp += 2;
72 continue;
73 }
74 if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0x97) {
75 /* 00D7 MULTIPLICATION SIGN */
76 break_allowed[bn] = 0;
77 lp += 2;
78 continue;
79 }
80 if(isxdigit((unsigned char)*lp)) {
81 buffer[bn++] = strtoul(lp, &lp, 16);
82 continue;
83 }
84 fatal(0, "%s:%d: evil line: %s", path, lineno, l);
85 }
86 for(n = 0; n <= bn; ++n) {
87 if(breakfn(buffer, bn, n) != break_allowed[n]) {
88 fprintf(stderr,
89 "%s:%d: offset %zu: mismatch\n"
90 "%s\n"
91 "\n",
92 path, lineno, n, l);
93 count_error();
94 }
95 ++tests;
96 }
97 xfree(l);
98 }
99 fclose(fp);
100}
101
102/** @brief Tests for @ref lib/unicode.h */
103void test_unicode(void) {
104 FILE *fp;
105 int lineno = 0;
106 char *l, *lp;
107 uint32_t buffer[1024];
108 uint32_t *c[6], *NFD_c[6], *NFKD_c[6], *NFC_c[6], *NFKC_c[6]; /* 1-indexed */
109 int cn, bn;
110
111 fprintf(stderr, "test_unicode\n");
112 fp = open_unicode_test("NormalizationTest.txt");
113 while(!inputline("NormalizationTest.txt", fp, &l, '\n')) {
114 ++lineno;
115 if(*l == '#' || *l == '@')
116 continue;
117 bn = 0;
118 cn = 1;
119 lp = l;
120 c[cn++] = &buffer[bn];
121 while(*lp && *lp != '#') {
122 if(*lp == ' ') {
123 ++lp;
124 continue;
125 }
126 if(*lp == ';') {
127 buffer[bn++] = 0;
128 if(cn == 6)
129 break;
130 c[cn++] = &buffer[bn];
131 ++lp;
132 continue;
133 }
134 buffer[bn++] = strtoul(lp, &lp, 16);
135 }
136 buffer[bn] = 0;
137 assert(cn == 6);
138 for(cn = 1; cn <= 5; ++cn) {
139 NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
140 NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
141 NFC_c[cn] = utf32_compose_canon(c[cn], utf32_len(c[cn]), 0);
142 NFKC_c[cn] = utf32_compose_compat(c[cn], utf32_len(c[cn]), 0);
143 }
144#define unt_check(T, A, B) do { \
145 ++tests; \
146 if(utf32_cmp(c[A], T##_c[B])) { \
147 fprintf(stderr, \
148 "NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \
149 lineno, A, B); \
150 fprintf(stderr, " c%d:%s\n", \
151 A, format_utf32(c[A])); \
152 fprintf(stderr, " c%d:%s\n", \
153 B, format_utf32(c[B])); \
154 fprintf(stderr, "%4s(c%d):%s\n", \
155 #T, B, format_utf32(T##_c[B])); \
156 count_error(); \
157 } \
158 } while(0)
159 unt_check(NFD, 3, 1);
160 unt_check(NFD, 3, 2);
161 unt_check(NFD, 3, 3);
162 unt_check(NFD, 5, 4);
163 unt_check(NFD, 5, 5);
164 unt_check(NFKD, 5, 1);
165 unt_check(NFKD, 5, 2);
166 unt_check(NFKD, 5, 3);
167 unt_check(NFKD, 5, 4);
168 unt_check(NFKD, 5, 5);
169 unt_check(NFC, 2, 1);
170 unt_check(NFC, 2, 2);
171 unt_check(NFC, 2, 3);
172 unt_check(NFC, 4, 4);
173 unt_check(NFC, 4, 5);
174 unt_check(NFKC, 4, 1);
175 unt_check(NFKC, 4, 2);
176 unt_check(NFKC, 4, 3);
177 unt_check(NFKC, 4, 4);
178 unt_check(NFKC, 4, 5);
179 for(cn = 1; cn <= 5; ++cn) {
180 xfree(NFD_c[cn]);
181 xfree(NFKD_c[cn]);
182 }
183 xfree(l);
184 }
185 fclose(fp);
186 breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
187 breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
188 insist(utf32_combining_class(0x40000) == 0);
189 insist(utf32_combining_class(0xE0000) == 0);
190}
191
192/*
193Local Variables:
194c-basic-offset:2
195comment-column:40
196fill-column:79
197indent-tabs-mode:nil
198End:
199*/