| 1 | /* |
| 2 | * This file is part of DisOrder. |
| 3 | * Copyright (C) 2005, 2007-2009, 2011 Richard Kettlewell |
| 4 | * |
| 5 | * This program is free software: you can redistribute it and/or modify |
| 6 | * it under the terms of the GNU General Public License as published by |
| 7 | * the Free Software Foundation, either version 3 of the License, or |
| 8 | * (at your option) any later version. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * GNU General Public License for more details. |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License |
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 17 | */ |
| 18 | #include "test.h" |
| 19 | |
| 20 | /** @brief Open a Unicode test file */ |
| 21 | static FILE *open_unicode_test(const char *path) { |
| 22 | const char *base; |
| 23 | FILE *fp; |
| 24 | char buffer[1024]; |
| 25 | int w; |
| 26 | |
| 27 | if((base = strrchr(path, '/'))) |
| 28 | ++base; |
| 29 | else |
| 30 | base = path; |
| 31 | if(!(fp = fopen(base, "r"))) { |
| 32 | snprintf(buffer, sizeof buffer, |
| 33 | "wget http://www.unicode.org/Public/6.0.0/ucd/%s", path); |
| 34 | if((w = system(buffer))) |
| 35 | disorder_fatal(0, "%s: %s", buffer, wstat(w)); |
| 36 | if(chmod(base, 0444) < 0) |
| 37 | disorder_fatal(errno, "chmod %s", base); |
| 38 | if(!(fp = fopen(base, "r"))) |
| 39 | disorder_fatal(errno, "%s", base); |
| 40 | } |
| 41 | return fp; |
| 42 | } |
| 43 | |
| 44 | /** @brief Run breaking tests for utf32_grapheme_boundary() etc */ |
| 45 | static void breaktest(const char *path, |
| 46 | int (*breakfn)(const uint32_t *, size_t, size_t)) { |
| 47 | FILE *fp = open_unicode_test(path); |
| 48 | int lineno = 0; |
| 49 | char *l, *lp; |
| 50 | size_t bn, n; |
| 51 | char break_allowed[1024]; |
| 52 | uint32_t buffer[1024]; |
| 53 | |
| 54 | while(!inputline(path, fp, &l, '\n')) { |
| 55 | ++lineno; |
| 56 | if(l[0] == '#') continue; |
| 57 | bn = 0; |
| 58 | lp = l; |
| 59 | while(*lp) { |
| 60 | if(*lp == ' ' || *lp == '\t') { |
| 61 | ++lp; |
| 62 | continue; |
| 63 | } |
| 64 | if(*lp == '#') |
| 65 | break; |
| 66 | if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0xB7) { |
| 67 | /* 00F7 DIVISION SIGN */ |
| 68 | break_allowed[bn] = 1; |
| 69 | lp += 2; |
| 70 | continue; |
| 71 | } |
| 72 | if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0x97) { |
| 73 | /* 00D7 MULTIPLICATION SIGN */ |
| 74 | break_allowed[bn] = 0; |
| 75 | lp += 2; |
| 76 | continue; |
| 77 | } |
| 78 | if(isxdigit((unsigned char)*lp)) { |
| 79 | buffer[bn++] = strtoul(lp, &lp, 16); |
| 80 | continue; |
| 81 | } |
| 82 | disorder_fatal(0, "%s:%d: evil line: %s", path, lineno, l); |
| 83 | } |
| 84 | for(n = 0; n <= bn; ++n) { |
| 85 | if(breakfn(buffer, bn, n) != break_allowed[n]) { |
| 86 | fprintf(stderr, |
| 87 | "%s:%d: offset %zu: mismatch\n" |
| 88 | "%s\n" |
| 89 | "\n", |
| 90 | path, lineno, n, l); |
| 91 | count_error(); |
| 92 | } |
| 93 | ++tests; |
| 94 | } |
| 95 | xfree(l); |
| 96 | } |
| 97 | fclose(fp); |
| 98 | } |
| 99 | |
| 100 | /** @brief Tests for @ref lib/unicode.h */ |
| 101 | static void test_unicode(void) { |
| 102 | FILE *fp; |
| 103 | int lineno = 0; |
| 104 | char *l, *lp; |
| 105 | uint32_t buffer[1024]; |
| 106 | uint32_t *c[6], *NFD_c[6], *NFKD_c[6], *NFC_c[6], *NFKC_c[6]; /* 1-indexed */ |
| 107 | int cn, bn; |
| 108 | |
| 109 | fp = open_unicode_test("NormalizationTest.txt"); |
| 110 | while(!inputline("NormalizationTest.txt", fp, &l, '\n')) { |
| 111 | ++lineno; |
| 112 | if(*l == '#' || *l == '@') |
| 113 | continue; |
| 114 | bn = 0; |
| 115 | cn = 1; |
| 116 | lp = l; |
| 117 | c[cn++] = &buffer[bn]; |
| 118 | while(*lp && *lp != '#') { |
| 119 | if(*lp == ' ') { |
| 120 | ++lp; |
| 121 | continue; |
| 122 | } |
| 123 | if(*lp == ';') { |
| 124 | buffer[bn++] = 0; |
| 125 | if(cn == 6) |
| 126 | break; |
| 127 | c[cn++] = &buffer[bn]; |
| 128 | ++lp; |
| 129 | continue; |
| 130 | } |
| 131 | buffer[bn++] = strtoul(lp, &lp, 16); |
| 132 | } |
| 133 | buffer[bn] = 0; |
| 134 | assert(cn == 6); |
| 135 | for(cn = 1; cn <= 5; ++cn) { |
| 136 | NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0); |
| 137 | NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0); |
| 138 | NFC_c[cn] = utf32_compose_canon(c[cn], utf32_len(c[cn]), 0); |
| 139 | NFKC_c[cn] = utf32_compose_compat(c[cn], utf32_len(c[cn]), 0); |
| 140 | } |
| 141 | #define unt_check(T, A, B) do { \ |
| 142 | ++tests; \ |
| 143 | if(utf32_cmp(c[A], T##_c[B])) { \ |
| 144 | fprintf(stderr, \ |
| 145 | "NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \ |
| 146 | lineno, A, B); \ |
| 147 | fprintf(stderr, " c%d:%s\n", \ |
| 148 | A, format_utf32(c[A])); \ |
| 149 | fprintf(stderr, " c%d:%s\n", \ |
| 150 | B, format_utf32(c[B])); \ |
| 151 | fprintf(stderr, "%4s(c%d):%s\n", \ |
| 152 | #T, B, format_utf32(T##_c[B])); \ |
| 153 | count_error(); \ |
| 154 | } \ |
| 155 | } while(0) |
| 156 | unt_check(NFD, 3, 1); |
| 157 | unt_check(NFD, 3, 2); |
| 158 | unt_check(NFD, 3, 3); |
| 159 | unt_check(NFD, 5, 4); |
| 160 | unt_check(NFD, 5, 5); |
| 161 | unt_check(NFKD, 5, 1); |
| 162 | unt_check(NFKD, 5, 2); |
| 163 | unt_check(NFKD, 5, 3); |
| 164 | unt_check(NFKD, 5, 4); |
| 165 | unt_check(NFKD, 5, 5); |
| 166 | unt_check(NFC, 2, 1); |
| 167 | unt_check(NFC, 2, 2); |
| 168 | unt_check(NFC, 2, 3); |
| 169 | unt_check(NFC, 4, 4); |
| 170 | unt_check(NFC, 4, 5); |
| 171 | unt_check(NFKC, 4, 1); |
| 172 | unt_check(NFKC, 4, 2); |
| 173 | unt_check(NFKC, 4, 3); |
| 174 | unt_check(NFKC, 4, 4); |
| 175 | unt_check(NFKC, 4, 5); |
| 176 | for(cn = 1; cn <= 5; ++cn) { |
| 177 | xfree(NFD_c[cn]); |
| 178 | xfree(NFKD_c[cn]); |
| 179 | } |
| 180 | xfree(l); |
| 181 | } |
| 182 | fclose(fp); |
| 183 | breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary); |
| 184 | breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary); |
| 185 | insist(utf32_combining_class(0x40000) == 0); |
| 186 | insist(utf32_combining_class(0xE0000) == 0); |
| 187 | } |
| 188 | |
| 189 | TEST(unicode); |
| 190 | |
| 191 | /* |
| 192 | Local Variables: |
| 193 | c-basic-offset:2 |
| 194 | comment-column:40 |
| 195 | fill-column:79 |
| 196 | indent-tabs-mode:nil |
| 197 | End: |
| 198 | */ |