dynstr_init(&d);
while((c = *s++)) {
- if(c >= 32 && c <= 127)
- dynstr_append(&d, c);
- else {
- sprintf(buf, "\\x%04lX", (unsigned long)c);
- dynstr_append_string(&d, buf);
- }
+ sprintf(buf, " %04lX", (long)c);
+ dynstr_append_string(&d, buf);
}
dynstr_terminate(&d);
return d.vec;
char *u8; \
\
insist(validutf8(CHARS)); \
- ucs = utf82ucs4(CHARS); \
+ ucs = utf8_to_utf32(CHARS, strlen(CHARS), 0); \
insist(ucs != 0); \
- insist(!ucs4cmp(w, ucs)); \
- u8 = ucs42utf8(ucs); \
+ insist(!utf32_cmp(w, ucs)); \
+ u8 = utf32_to_utf8(ucs, utf32_len(ucs), 0); \
insist(u8 != 0); \
insist(!strcmp(u8, CHARS)); \
} while(0)
fprintf(stderr, "test_utf8\n");
+#define validutf8(S) utf8_valid((S), strlen(S))
/* empty string */
break;
}
if(l) {
+ uint32_t *d;
/* Case-folded data is now normalized */
- canon_expected = ucs42utf8(utf32_decompose_canon(&l, 1, 0));
+ d = utf32_decompose_canon(&l, 1, 0);
+ canon_expected = utf32_to_utf8(d, utf32_len(d), 0);
if(strcmp(canon_folded, canon_expected)) {
fprintf(stderr, "%s:%d: canon-casefolding %#lx got '%s', expected '%s'\n",
__FILE__, __LINE__, (unsigned long)c,
count_error();
}
++tests;
- compat_expected = ucs42utf8(utf32_decompose_compat(&l, 1, 0));
+ d = utf32_decompose_compat(&l, 1, 0);
+ compat_expected = utf32_to_utf8(d, utf32_len(d), 0);
if(strcmp(compat_folded, compat_expected)) {
fprintf(stderr, "%s:%d: compat-casefolding %#lx got '%s', expected '%s'\n",
__FILE__, __LINE__, (unsigned long)c,
return fp;
}
-/** @brief Run breaking tests for utf32_is_gcb() etc */
+/** @brief Run breaking tests for utf32_grapheme_boundary() etc */
static void breaktest(const char *path,
int (*breakfn)(const uint32_t *, size_t, size_t)) {
FILE *fp = open_unicode_test(path);
for(n = 0; n <= bn; ++n) {
if(breakfn(buffer, bn, n) != break_allowed[n]) {
fprintf(stderr,
- "%s:%d: offset %zu: mismatch\n",
- path, lineno, n);
+ "%s:%d: offset %zu: mismatch\n"
+ "%s\n"
+ "\n",
+ path, lineno, n, l);
count_error();
}
++tests;
int lineno = 0;
char *l, *lp;
uint32_t buffer[1024];
- uint32_t *c[6], *NFD_c[6], *NFKD_c[6]; /* 1-indexed */
+ uint32_t *c[6], *NFD_c[6], *NFKD_c[6], *NFC_c[6], *NFKC_c[6]; /* 1-indexed */
int cn, bn;
fprintf(stderr, "test_unicode\n");
for(cn = 1; cn <= 5; ++cn) {
NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
+ NFC_c[cn] = utf32_compose_canon(c[cn], utf32_len(c[cn]), 0);
+ NFKC_c[cn] = utf32_compose_compat(c[cn], utf32_len(c[cn]), 0);
}
#define unt_check(T, A, B) do { \
++tests; \
fprintf(stderr, \
"NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \
lineno, A, B); \
- fprintf(stderr, " c%d: %s\n", \
+ fprintf(stderr, " c%d:%s\n", \
A, format_utf32(c[A])); \
- fprintf(stderr, "%4s(c%d): %s\n", \
+ fprintf(stderr, " c%d:%s\n", \
+ B, format_utf32(c[B])); \
+ fprintf(stderr, "%4s(c%d):%s\n", \
#T, B, format_utf32(T##_c[B])); \
- count_error(); \
+ count_error(); \
} \
} while(0)
unt_check(NFD, 3, 1);
unt_check(NFKD, 5, 3);
unt_check(NFKD, 5, 4);
unt_check(NFKD, 5, 5);
+ unt_check(NFC, 2, 1);
+ unt_check(NFC, 2, 2);
+ unt_check(NFC, 2, 3);
+ unt_check(NFC, 4, 4);
+ unt_check(NFC, 4, 5);
+ unt_check(NFKC, 4, 1);
+ unt_check(NFKC, 4, 2);
+ unt_check(NFKC, 4, 3);
+ unt_check(NFKC, 4, 4);
+ unt_check(NFKC, 4, 5);
for(cn = 1; cn <= 5; ++cn) {
xfree(NFD_c[cn]);
xfree(NFKD_c[cn]);
xfree(l);
}
fclose(fp);
- breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_gcb);
+ breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
}