chiark - git - mdw - disorder/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* This file is part of DisOrder.
	3	* Copyright (C) 2005, 2007 Richard Kettlewell
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful, but
	11	* WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
	18	* USA
	19	*/
	20	/** @file lib/test.c @brief Library tests */
	21
	22	#include <config.h>
	23	#include "types.h"
	24
	25	#include <stdio.h>
	26	#include <string.h>
	27	#include <stdlib.h>
	28	#include <errno.h>
	29	#include <ctype.h>
	30	#include <assert.h>
	31	#include <sys/types.h>
	32	#include <sys/stat.h>
	33
	34	#include "utf8.h"
	35	#include "mem.h"
	36	#include "log.h"
	37	#include "vector.h"
	38	#include "charset.h"
	39	#include "mime.h"
	40	#include "hex.h"
	41	#include "heap.h"
	42	#include "unicode.h"
	43	#include "inputline.h"
	44	#include "wstat.h"
	45
	46	static int tests, errors;
	47	static int fail_first;
	48
	49	static void count_error() {
	50	++errors;
	51	if(fail_first)
	52	abort();
	53	}
	54
	55	/** @brief Checks that @p expr is nonzero */
	56	#define insist(expr) do { \
	57	if(!(expr)) { \
	58	count_error(); \
	59	fprintf(stderr, "%s:%d: error checking %s\n", \
	60	__FILE__, __LINE__, #expr); \
	61	} \
	62	++tests; \
	63	} while(0)
	64
	65	static const char format(const char s) {
	66	struct dynstr d;
	67	int c;
	68	char buf[10];
	69
	70	dynstr_init(&d);
	71	while((c = (unsigned char)*s++)) {
	72	if(c >= ' ' && c <= '~')
	73	dynstr_append(&d, c);
	74	else {
	75	sprintf(buf, "\\x%02X", (unsigned)c);
	76	dynstr_append_string(&d, buf);
	77	}
	78	}
	79	dynstr_terminate(&d);
	80	return d.vec;
	81	}
	82
	83	static const char format_utf32(const uint32_t s) {
	84	struct dynstr d;
	85	uint32_t c;
	86	char buf[64];
	87
	88	dynstr_init(&d);
	89	while((c = *s++)) {
	90	sprintf(buf, " %04lX", (long)c);
	91	dynstr_append_string(&d, buf);
	92	}
	93	dynstr_terminate(&d);
	94	return d.vec;
	95	}
	96
	97	#define check_string(GOT, WANT) do { \
	98	const char *g = GOT; \
	99	const char *w = WANT; \
	100	\
	101	if(w == 0) { \
	102	fprintf(stderr, "%s:%d: %s returned 0\n", \
	103	__FILE__, __LINE__, #GOT); \
	104	count_error(); \
	105	} else if(strcmp(w, g)) { \
	106	fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n", \
	107	__FILE__, __LINE__, #GOT, format(g), format(w)); \
	108	count_error(); \
	109	} \
	110	++tests; \
	111	} while(0)
	112
	113	static uint32_t ucs4parse(const char s) {
	114	struct dynstr_ucs4 d;
	115	char *e;
	116
	117	dynstr_ucs4_init(&d);
	118	while(*s) {
	119	errno = 0;
	120	dynstr_ucs4_append(&d, strtoul(s, &e, 0));
	121	if(errno) fatal(errno, "strtoul (%s)", s);
	122	s = e;
	123	}
	124	dynstr_ucs4_terminate(&d);
	125	return d.vec;
	126	}
	127
	128	static void test_utf8(void) {
	129	/* Test validutf8, convert to UCS-4, check the answer is right,
	130	* convert back to UTF-8, check we got to where we started */
	131	#define U8(CHARS, WORDS) do { \
	132	uint32_t *w = ucs4parse(WORDS); \
	133	uint32_t *ucs; \
	134	char *u8; \
	135	\
	136	insist(validutf8(CHARS)); \
	137	ucs = utf8_to_utf32(CHARS, strlen(CHARS), 0); \
	138	insist(ucs != 0); \
	139	insist(!utf32_cmp(w, ucs)); \
	140	u8 = utf32_to_utf8(ucs, utf32_len(ucs), 0); \
	141	insist(u8 != 0); \
	142	insist(!strcmp(u8, CHARS)); \
	143	} while(0)
	144
	145	fprintf(stderr, "test_utf8\n");
	146	#define validutf8(S) utf8_valid((S), strlen(S))
	147
	148	/* empty string */
	149
	150	U8("", "");
	151
	152	/* ASCII characters */
	153
	154	U8(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{\|}~",
	155	"0x20 0x21 0x22 0x23 0x24 0x25 0x26 0x27 0x28 0x29 0x2a 0x2b 0x2c 0x2d "
	156	"0x2e 0x2f 0x30 0x31 0x32 0x33 0x34 0x35 0x36 0x37 0x38 0x39 0x3a "
	157	"0x3b 0x3c 0x3d 0x3e 0x3f 0x40 0x41 0x42 0x43 0x44 0x45 0x46 0x47 "
	158	"0x48 0x49 0x4a 0x4b 0x4c 0x4d 0x4e 0x4f 0x50 0x51 0x52 0x53 0x54 "
	159	"0x55 0x56 0x57 0x58 0x59 0x5a 0x5b 0x5c 0x5d 0x5e 0x5f 0x60 0x61 "
	160	"0x62 0x63 0x64 0x65 0x66 0x67 0x68 0x69 0x6a 0x6b 0x6c 0x6d 0x6e "
	161	"0x6f 0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77 0x78 0x79 0x7a 0x7b "
	162	"0x7c 0x7d 0x7e");
	163	U8("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177",
	164	"0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf 0x10 "
	165	"0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d "
	166	"0x1e 0x1f 0x7f");
	167
	168	/* from RFC3629 */
	169
	170	/* UTF8-2 = %xC2-DF UTF8-tail */
	171	insist(!validutf8("\xC0\x80"));
	172	insist(!validutf8("\xC1\x80"));
	173	insist(!validutf8("\xC2\x7F"));
	174	U8("\xC2\x80", "0x80");
	175	U8("\xDF\xBF", "0x7FF");
	176	insist(!validutf8("\xDF\xC0"));
	177
	178	/* UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
	179	* %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
	180	*/
	181	insist(!validutf8("\xE0\x9F\x80"));
	182	U8("\xE0\xA0\x80", "0x800");
	183	U8("\xE0\xBF\xBF", "0xFFF");
	184	insist(!validutf8("\xE0\xC0\xBF"));
	185
	186	insist(!validutf8("\xE1\x80\x7F"));
	187	U8("\xE1\x80\x80", "0x1000");
	188	U8("\xEC\xBF\xBF", "0xCFFF");
	189	insist(!validutf8("\xEC\xC0\xBF"));
	190
	191	U8("\xED\x80\x80", "0xD000");
	192	U8("\xED\x9F\xBF", "0xD7FF");
	193	insist(!validutf8("\xED\xA0\xBF"));
	194
	195	insist(!validutf8("\xEE\x7f\x80"));
	196	U8("\xEE\x80\x80", "0xE000");
	197	U8("\xEF\xBF\xBF", "0xFFFF");
	198	insist(!validutf8("\xEF\xC0\xBF"));
	199
	200	/* UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
	201	* %xF4 %x80-8F 2( UTF8-tail )
	202	*/
	203	insist(!validutf8("\xF0\x8F\x80\x80"));
	204	U8("\xF0\x90\x80\x80", "0x10000");
	205	U8("\xF0\xBF\xBF\xBF", "0x3FFFF");
	206	insist(!validutf8("\xF0\xC0\x80\x80"));
	207
	208	insist(!validutf8("\xF1\x80\x80\x7F"));
	209	U8("\xF1\x80\x80\x80", "0x40000");
	210	U8("\xF3\xBF\xBF\xBF", "0xFFFFF");
	211	insist(!validutf8("\xF3\xC0\x80\x80"));
	212
	213	insist(!validutf8("\xF4\x80\x80\x7F"));
	214	U8("\xF4\x80\x80\x80", "0x100000");
	215	U8("\xF4\x8F\xBF\xBF", "0x10FFFF");
	216	insist(!validutf8("\xF4\x90\x80\x80"));
	217
	218	/* miscellaneous non-UTF-8 rubbish */
	219	insist(!validutf8("\x80"));
	220	insist(!validutf8("\xBF"));
	221	insist(!validutf8("\xC0"));
	222	insist(!validutf8("\xC0\x7F"));
	223	insist(!validutf8("\xC0\xC0"));
	224	insist(!validutf8("\xE0"));
	225	insist(!validutf8("\xE0\x7F"));
	226	insist(!validutf8("\xE0\xC0"));
	227	insist(!validutf8("\xE0\x80"));
	228	insist(!validutf8("\xE0\x80\x7f"));
	229	insist(!validutf8("\xE0\x80\xC0"));
	230	insist(!validutf8("\xF0"));
	231	insist(!validutf8("\xF0\x7F"));
	232	insist(!validutf8("\xF0\xC0"));
	233	insist(!validutf8("\xF0\x80"));
	234	insist(!validutf8("\xF0\x80\x7f"));
	235	insist(!validutf8("\xF0\x80\xC0"));
	236	insist(!validutf8("\xF0\x80\x80\x7f"));
	237	insist(!validutf8("\xF0\x80\x80\xC0"));
	238	insist(!validutf8("\xF5\x80\x80\x80"));
	239	insist(!validutf8("\xF8"));
	240	}
	241
	242	static void test_mime(void) {
	243	char t, n, *v;
	244
	245	fprintf(stderr, "test_mime\n");
	246
	247	t = n = v = 0;
	248	insist(!mime_content_type("text/plain", &t, &n, &v));
	249	insist(!strcmp(t, "text/plain"));
	250	insist(n == 0);
	251	insist(v == 0);
	252
	253	t = n = v = 0;
	254	insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v));
	255	insist(!strcmp(t, "text/plain"));
	256	insist(n == 0);
	257	insist(v == 0);
	258
	259	t = n = v = 0;
	260	insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v));
	261	insist(!strcmp(t, "text/plain"));
	262	insist(!strcmp(n, "charset"));
	263	insist(!strcmp(v, "utf-8"));
	264
	265	t = n = v = 0;
	266	insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
	267	insist(!strcmp(t, "text/plain"));
	268	insist(!strcmp(n, "charset"));
	269	insist(!strcmp(v, "ISO-8859-1"));
	270
	271	/* XXX mime_parse */
	272	/* XXX mime_multipart */
	273	/* XXX mime_rfc2388_content_disposition */
	274
	275	check_string(mime_qp(""), "");
	276	check_string(mime_qp("foobar"), "foobar");
	277	check_string(mime_qp("foo=20bar"), "foo bar");
	278	check_string(mime_qp("x \r\ny"), "x\r\ny");
	279	check_string(mime_qp("x=\r\ny"), "xy");
	280	check_string(mime_qp("x= \r\ny"), "xy");
	281	check_string(mime_qp("x =\r\ny"), "x y");
	282	check_string(mime_qp("x = \r\ny"), "x y");
	283
	284	/* from RFC2045 */
	285	check_string(mime_qp("Now's the time =\r\n"
	286	"for all folk to come=\r\n"
	287	" to the aid of their country."),
	288	"Now's the time for all folk to come to the aid of their country.");
	289
	290	check_string(mime_base64(""), "");
	291	check_string(mime_base64("BBBB"), "\x04\x10\x41");
	292	check_string(mime_base64("////"), "\xFF\xFF\xFF");
	293	check_string(mime_base64("//BB"), "\xFF\xF0\x41");
	294	check_string(mime_base64("BBBB//BB////"),
	295	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	296	check_string(mime_base64("B B B B / / B B / / / /"),
	297	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	298	check_string(mime_base64("B\r\nBBB.// B-B//~//"),
	299	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	300	check_string(mime_base64("BBBB="),
	301	"\x04\x10\x41");
	302	check_string(mime_base64("BBBBx="), /* not actually valid base64 */
	303	"\x04\x10\x41");
	304	check_string(mime_base64("BBBB BB=="),
	305	"\x04\x10\x41" "\x04");
	306	check_string(mime_base64("BBBB BBB="),
	307	"\x04\x10\x41" "\x04\x10");
	308	}
	309
	310	static void test_hex(void) {
	311	unsigned n;
	312	static const unsigned char h[] = { 0x00, 0xFF, 0x80, 0x7F };
	313	uint8_t *u;
	314	size_t ul;
	315
	316	fprintf(stderr, "test_hex\n");
	317
	318	for(n = 0; n <= UCHAR_MAX; ++n) {
	319	if(!isxdigit(n))
	320	insist(unhexdigitq(n) == -1);
	321	}
	322	insist(unhexdigitq('0') == 0);
	323	insist(unhexdigitq('1') == 1);
	324	insist(unhexdigitq('2') == 2);
	325	insist(unhexdigitq('3') == 3);
	326	insist(unhexdigitq('4') == 4);
	327	insist(unhexdigitq('5') == 5);
	328	insist(unhexdigitq('6') == 6);
	329	insist(unhexdigitq('7') == 7);
	330	insist(unhexdigitq('8') == 8);
	331	insist(unhexdigitq('9') == 9);
	332	insist(unhexdigitq('a') == 10);
	333	insist(unhexdigitq('b') == 11);
	334	insist(unhexdigitq('c') == 12);
	335	insist(unhexdigitq('d') == 13);
	336	insist(unhexdigitq('e') == 14);
	337	insist(unhexdigitq('f') == 15);
	338	insist(unhexdigitq('A') == 10);
	339	insist(unhexdigitq('B') == 11);
	340	insist(unhexdigitq('C') == 12);
	341	insist(unhexdigitq('D') == 13);
	342	insist(unhexdigitq('E') == 14);
	343	insist(unhexdigitq('F') == 15);
	344	check_string(hex(h, sizeof h), "00ff807f");
	345	check_string(hex(0, 0), "");
	346	u = unhex("00ff807f", &ul);
	347	insist(ul == 4);
	348	insist(memcmp(u, h, 4) == 0);
	349	u = unhex("00FF807F", &ul);
	350	insist(ul == 4);
	351	insist(memcmp(u, h, 4) == 0);
	352	u = unhex("", &ul);
	353	insist(ul == 0);
	354	fprintf(stderr, "2 ERROR reports expected {\n");
	355	insist(unhex("F", 0) == 0);
	356	insist(unhex("az", 0) == 0);
	357	fprintf(stderr, "}\n");
	358	}
	359
	360	static void test_casefold(void) {
	361	uint32_t c, l;
	362	const char input, canon_folded, compat_folded, canon_expected, *compat_expected;
	363
	364	fprintf(stderr, "test_casefold\n");
	365
	366	/* This isn't a very exhaustive test. Unlike for normalization, there don't
	367	* seem to be any public test vectors for these algorithms. */
	368
	369	for(c = 1; c < 256; ++c) {
	370	input = utf32_to_utf8(&c, 1, 0);
	371	canon_folded = utf8_casefold_canon(input, strlen(input), 0);
	372	compat_folded = utf8_casefold_compat(input, strlen(input), 0);
	373	switch(c) {
	374	default:
	375	if((c >= 'A' && c <= 'Z')
	376	\|\| (c >= 0xC0 && c <= 0xDE && c != 0xD7))
	377	l = c ^ 0x20;
	378	else
	379	l = c;
	380	break;
	381	case 0xB5: /* MICRO SIGN */
	382	l = 0x3BC; /* GREEK SMALL LETTER MU */
	383	break;
	384	case 0xDF: /* LATIN SMALL LETTER SHARP S */
	385	insist(!strcmp(canon_folded, "ss"));
	386	insist(!strcmp(compat_folded, "ss"));
	387	l = 0;
	388	break;
	389	}
	390	if(l) {
	391	uint32_t *d;
	392	/* Case-folded data is now normalized */
	393	d = utf32_decompose_canon(&l, 1, 0);
	394	canon_expected = utf32_to_utf8(d, utf32_len(d), 0);
	395	if(strcmp(canon_folded, canon_expected)) {
	396	fprintf(stderr, "%s:%d: canon-casefolding %#lx got '%s', expected '%s'\n",
	397	__FILE__, __LINE__, (unsigned long)c,
	398	format(canon_folded), format(canon_expected));
	399	count_error();
	400	}
	401	++tests;
	402	d = utf32_decompose_compat(&l, 1, 0);
	403	compat_expected = utf32_to_utf8(d, utf32_len(d), 0);
	404	if(strcmp(compat_folded, compat_expected)) {
	405	fprintf(stderr, "%s:%d: compat-casefolding %#lx got '%s', expected '%s'\n",
	406	__FILE__, __LINE__, (unsigned long)c,
	407	format(compat_folded), format(compat_expected));
	408	count_error();
	409	}
	410	++tests;
	411	}
	412	}
	413	check_string(utf8_casefold_canon("", 0, 0), "");
	414	}
	415
	416	struct {
	417	const char *in;
	418	const char *expect[10];
	419	} wtest[] = {
	420	/* Empty string */
	421	{ "", { 0 } },
	422	/* Only whitespace and punctuation */
	423	{ " ", { 0 } },
	424	{ " ' ", { 0 } },
	425	{ " ! ", { 0 } },
	426	{ " \"\" ", { 0 } },
	427	{ " @ ", { 0 } },
	428	/* Basics */
	429	{ "wibble", { "wibble", 0 } },
	430	{ " wibble", { "wibble", 0 } },
	431	{ " wibble ", { "wibble", 0 } },
	432	{ "wibble ", { "wibble", 0 } },
	433	{ "wibble spong", { "wibble", "spong", 0 } },
	434	{ " wibble spong", { "wibble", "spong", 0 } },
	435	{ " wibble spong ", { "wibble", "spong", 0 } },
	436	{ "wibble spong ", { "wibble", "spong", 0 } },
	437	{ "wibble spong splat foo zot ", { "wibble", "spong", "splat", "foo", "zot", 0 } },
	438	/* Apostrophes */
	439	{ "wibble 'spong", { "wibble", "spong", 0 } },
	440	{ " wibble's", { "wibble's", 0 } },
	441	{ " wibblespong' ", { "wibblespong", 0 } },
	442	{ "wibble sp''ong ", { "wibble", "sp", "ong", 0 } },
	443	};
	444	#define NWTEST (sizeof wtest / sizeof *wtest)
	445
	446	static void test_words(void) {
	447	size_t t, nexpect, ngot, i;
	448	int right;
	449
	450	fprintf(stderr, "test_words\n");
	451	for(t = 0; t < NWTEST; ++t) {
	452	char **got = utf8_word_split(wtest[t].in, strlen(wtest[t].in), &ngot, 0);
	453
	454	for(nexpect = 0; wtest[t].expect[nexpect]; ++nexpect)
	455	;
	456	if(nexpect == ngot) {
	457	for(i = 0; i < ngot; ++i)
	458	if(strcmp(wtest[t].expect[i], got[i]))
	459	break;
	460	right = i == ngot;
	461	} else
	462	right = 0;
	463	if(!right) {
	464	fprintf(stderr, "word split %zu failed\n", t);
	465	fprintf(stderr, "input: %s\n", wtest[t].in);
	466	fprintf(stderr, " \| %-30s \| %-30s\n",
	467	"expected", "got");
	468	for(i = 0; i < nexpect \|\| i < ngot; ++i) {
	469	const char *e = i < nexpect ? wtest[t].expect[i] : "<none>";
	470	const char *g = i < ngot ? got[i] : "<none>";
	471	fprintf(stderr, " %2zu \| %-30s \| %-30s\n", i, e, g);
	472	}
	473	count_error();
	474	}
	475	++tests;
	476	}
	477	}
	478
	479	/** @brief Less-than comparison function for integer heap */
	480	static inline int int_lt(int a, int b) { return a < b; }
	481
	482	/** @struct iheap
	483	* @brief A heap with @c int elements */
	484	HEAP_TYPE(iheap, int, int_lt);
	485	HEAP_DEFINE(iheap, int, int_lt);
	486
	487	/** @brief Tests for @ref heap.h */
	488	static void test_heap(void) {
	489	struct iheap h[1];
	490	int n;
	491	int last = -1;
	492
	493	fprintf(stderr, "test_heap\n");
	494
	495	iheap_init(h);
	496	for(n = 0; n < 1000; ++n)
	497	iheap_insert(h, random() % 100);
	498	for(n = 0; n < 1000; ++n) {
	499	const int latest = iheap_remove(h);
	500	if(last > latest)
	501	fprintf(stderr, "should have %d <= %d\n", last, latest);
	502	insist(last <= latest);
	503	last = latest;
	504	}
	505	putchar('\n');
	506	}
	507
	508	/** @brief Open a Unicode test file */
	509	static FILE open_unicode_test(const char path) {
	510	const char *base;
	511	FILE *fp;
	512	char buffer[1024];
	513	int w;
	514
	515	if((base = strrchr(path, '/')))
	516	++base;
	517	else
	518	base = path;
	519	if(!(fp = fopen(base, "r"))) {
	520	snprintf(buffer, sizeof buffer,
	521	"wget http://www.unicode.org/Public/5.0.0/ucd/%s", path);
	522	if((w = system(buffer)))
	523	fatal(0, "%s: %s", buffer, wstat(w));
	524	if(chmod(base, 0444) < 0)
	525	fatal(errno, "chmod %s", base);
	526	if(!(fp = fopen(base, "r")))
	527	fatal(errno, "%s", base);
	528	}
	529	return fp;
	530	}
	531
	532	/** @brief Run breaking tests for utf32_grapheme_boundary() etc */
	533	static void breaktest(const char *path,
	534	int (breakfn)(const uint32_t , size_t, size_t)) {
	535	FILE *fp = open_unicode_test(path);
	536	int lineno = 0;
	537	char l, lp;
	538	size_t bn, n;
	539	char break_allowed[1024];
	540	uint32_t buffer[1024];
	541
	542	while(!inputline(path, fp, &l, '\n')) {
	543	++lineno;
	544	if(l[0] == '#') continue;
	545	bn = 0;
	546	lp = l;
	547	while(*lp) {
	548	if(lp == ' ' \|\| lp == '\t') {
	549	++lp;
	550	continue;
	551	}
	552	if(*lp == '#')
	553	break;
	554	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0xB7) {
	555	/* 00F7 DIVISION SIGN */
	556	break_allowed[bn] = 1;
	557	lp += 2;
	558	continue;
	559	}
	560	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0x97) {
	561	/* 00D7 MULTIPLICATION SIGN */
	562	break_allowed[bn] = 0;
	563	lp += 2;
	564	continue;
	565	}
	566	if(isxdigit((unsigned char)*lp)) {
	567	buffer[bn++] = strtoul(lp, &lp, 16);
	568	continue;
	569	}
	570	fatal(0, "%s:%d: evil line: %s", path, lineno, l);
	571	}
	572	for(n = 0; n <= bn; ++n) {
	573	if(breakfn(buffer, bn, n) != break_allowed[n]) {
	574	fprintf(stderr,
	575	"%s:%d: offset %zu: mismatch\n"
	576	"%s\n"
	577	"\n",
	578	path, lineno, n, l);
	579	count_error();
	580	}
	581	++tests;
	582	}
	583	xfree(l);
	584	}
	585	fclose(fp);
	586	}
	587
	588	/** @brief Tests for @ref lib/unicode.h */
	589	static void test_unicode(void) {
	590	FILE *fp;
	591	int lineno = 0;
	592	char l, lp;
	593	uint32_t buffer[1024];
	594	uint32_t c[6], NFD_c[6], NFKD_c[6], NFC_c[6], NFKC_c[6]; / 1-indexed */
	595	int cn, bn;
	596
	597	fprintf(stderr, "test_unicode\n");
	598	fp = open_unicode_test("NormalizationTest.txt");
	599	while(!inputline("NormalizationTest.txt", fp, &l, '\n')) {
	600	++lineno;
	601	if(l == '#' \|\| l == '@')
	602	continue;
	603	bn = 0;
	604	cn = 1;
	605	lp = l;
	606	c[cn++] = &buffer[bn];
	607	while(lp && lp != '#') {
	608	if(*lp == ' ') {
	609	++lp;
	610	continue;
	611	}
	612	if(*lp == ';') {
	613	buffer[bn++] = 0;
	614	if(cn == 6)
	615	break;
	616	c[cn++] = &buffer[bn];
	617	++lp;
	618	continue;
	619	}
	620	buffer[bn++] = strtoul(lp, &lp, 16);
	621	}
	622	buffer[bn] = 0;
	623	assert(cn == 6);
	624	for(cn = 1; cn <= 5; ++cn) {
	625	NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
	626	NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
	627	NFC_c[cn] = utf32_compose_canon(c[cn], utf32_len(c[cn]), 0);
	628	NFKC_c[cn] = utf32_compose_compat(c[cn], utf32_len(c[cn]), 0);
	629	}
	630	#define unt_check(T, A, B) do { \
	631	++tests; \
	632	if(utf32_cmp(c[A], T##_c[B])) { \
	633	fprintf(stderr, \
	634	"NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \
	635	lineno, A, B); \
	636	fprintf(stderr, " c%d:%s\n", \
	637	A, format_utf32(c[A])); \
	638	fprintf(stderr, " c%d:%s\n", \
	639	B, format_utf32(c[B])); \
	640	fprintf(stderr, "%4s(c%d):%s\n", \
	641	#T, B, format_utf32(T##_c[B])); \
	642	count_error(); \
	643	} \
	644	} while(0)
	645	unt_check(NFD, 3, 1);
	646	unt_check(NFD, 3, 2);
	647	unt_check(NFD, 3, 3);
	648	unt_check(NFD, 5, 4);
	649	unt_check(NFD, 5, 5);
	650	unt_check(NFKD, 5, 1);
	651	unt_check(NFKD, 5, 2);
	652	unt_check(NFKD, 5, 3);
	653	unt_check(NFKD, 5, 4);
	654	unt_check(NFKD, 5, 5);
	655	unt_check(NFC, 2, 1);
	656	unt_check(NFC, 2, 2);
	657	unt_check(NFC, 2, 3);
	658	unt_check(NFC, 4, 4);
	659	unt_check(NFC, 4, 5);
	660	unt_check(NFKC, 4, 1);
	661	unt_check(NFKC, 4, 2);
	662	unt_check(NFKC, 4, 3);
	663	unt_check(NFKC, 4, 4);
	664	unt_check(NFKC, 4, 5);
	665	for(cn = 1; cn <= 5; ++cn) {
	666	xfree(NFD_c[cn]);
	667	xfree(NFKD_c[cn]);
	668	}
	669	xfree(l);
	670	}
	671	fclose(fp);
	672	breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
	673	breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
	674	}
	675
	676	int main(void) {
	677	fail_first = !!getenv("FAIL_FIRST");
	678	insist('\n' == 0x0A);
	679	insist('\r' == 0x0D);
	680	insist(' ' == 0x20);
	681	insist('0' == 0x30);
	682	insist('9' == 0x39);
	683	insist('A' == 0x41);
	684	insist('Z' == 0x5A);
	685	insist('a' == 0x61);
	686	insist('z' == 0x7A);
	687	/* addr.c */
	688	/* asprintf.c */
	689	/* authhash.c */
	690	/* basen.c */
	691	/* charset.c */
	692	/* client.c */
	693	/* configuration.c */
	694	/* event.c */
	695	/* fprintf.c */
	696	/* heap.c */
	697	test_heap();
	698	/* hex.c */
	699	test_hex();
	700	/* inputline.c */
	701	/* kvp.c */
	702	/* log.c */
	703	/* mem.c */
	704	/* mime.c */
	705	test_mime();
	706	/* mixer.c */
	707	/* plugin.c */
	708	/* printf.c */
	709	/* queue.c */
	710	/* sink.c */
	711	/* snprintf.c */
	712	/* split.c */
	713	/* syscalls.c */
	714	/* table.c */
	715	/* unicode.c */
	716	test_unicode();
	717	/* utf8.c */
	718	test_utf8();
	719	/* vector.c */
	720	/* words.c */
	721	test_casefold();
	722	test_words();
	723	/* XXX words() */
	724	/* wstat.c */
	725	fprintf(stderr, "%d errors out of %d tests\n", errors, tests);
	726	return !!errors;
	727	}
	728
	729	/*
	730	Local Variables:
	731	c-basic-offset:2
	732	comment-column:40
	733	fill-column:79
	734	indent-tabs-mode:nil
	735	End:
	736	*/