chiark - git - mdw - disorder/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* This file is part of DisOrder.
	3	* Copyright (C) 2005, 2007 Richard Kettlewell
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful, but
	11	* WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
	18	* USA
	19	*/
	20	/** @file lib/test.c @brief Library tests */
	21
	22	#include <config.h>
	23	#include "types.h"
	24
	25	#include <stdio.h>
	26	#include <string.h>
	27	#include <stdlib.h>
	28	#include <errno.h>
	29	#include <ctype.h>
	30	#include <assert.h>
	31	#include <sys/types.h>
	32	#include <sys/stat.h>
	33	#include <unistd.h>
	34
	35	#include "utf8.h"
	36	#include "mem.h"
	37	#include "log.h"
	38	#include "vector.h"
	39	#include "charset.h"
	40	#include "mime.h"
	41	#include "hex.h"
	42	#include "heap.h"
	43	#include "unicode.h"
	44	#include "inputline.h"
	45	#include "wstat.h"
	46	#include "signame.h"
	47	#include "cache.h"
	48	#include "filepart.h"
	49
	50	static int tests, errors;
	51	static int fail_first;
	52
	53	static void count_error() {
	54	++errors;
	55	if(fail_first)
	56	abort();
	57	}
	58
	59	/** @brief Checks that @p expr is nonzero */
	60	#define insist(expr) do { \
	61	if(!(expr)) { \
	62	count_error(); \
	63	fprintf(stderr, "%s:%d: error checking %s\n", \
	64	__FILE__, __LINE__, #expr); \
	65	} \
	66	++tests; \
	67	} while(0)
	68
	69	static const char format(const char s) {
	70	struct dynstr d;
	71	int c;
	72	char buf[10];
	73
	74	dynstr_init(&d);
	75	while((c = (unsigned char)*s++)) {
	76	if(c >= ' ' && c <= '~')
	77	dynstr_append(&d, c);
	78	else {
	79	sprintf(buf, "\\x%02X", (unsigned)c);
	80	dynstr_append_string(&d, buf);
	81	}
	82	}
	83	dynstr_terminate(&d);
	84	return d.vec;
	85	}
	86
	87	static const char format_utf32(const uint32_t s) {
	88	struct dynstr d;
	89	uint32_t c;
	90	char buf[64];
	91
	92	dynstr_init(&d);
	93	while((c = *s++)) {
	94	sprintf(buf, " %04lX", (long)c);
	95	dynstr_append_string(&d, buf);
	96	}
	97	dynstr_terminate(&d);
	98	return d.vec;
	99	}
	100
	101	#define check_string(GOT, WANT) do { \
	102	const char *g = GOT; \
	103	const char *w = WANT; \
	104	\
	105	if(w == 0) { \
	106	fprintf(stderr, "%s:%d: %s returned 0\n", \
	107	__FILE__, __LINE__, #GOT); \
	108	count_error(); \
	109	} else if(strcmp(w, g)) { \
	110	fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n", \
	111	__FILE__, __LINE__, #GOT, format(g), format(w)); \
	112	count_error(); \
	113	} \
	114	++tests; \
	115	} while(0)
	116
	117	static uint32_t ucs4parse(const char s) {
	118	struct dynstr_ucs4 d;
	119	char *e;
	120
	121	dynstr_ucs4_init(&d);
	122	while(*s) {
	123	errno = 0;
	124	dynstr_ucs4_append(&d, strtoul(s, &e, 0));
	125	if(errno) fatal(errno, "strtoul (%s)", s);
	126	s = e;
	127	}
	128	dynstr_ucs4_terminate(&d);
	129	return d.vec;
	130	}
	131
	132	static void test_utf8(void) {
	133	/* Test validutf8, convert to UCS-4, check the answer is right,
	134	* convert back to UTF-8, check we got to where we started */
	135	#define U8(CHARS, WORDS) do { \
	136	uint32_t *w = ucs4parse(WORDS); \
	137	uint32_t *ucs; \
	138	char *u8; \
	139	\
	140	insist(validutf8(CHARS)); \
	141	ucs = utf8_to_utf32(CHARS, strlen(CHARS), 0); \
	142	insist(ucs != 0); \
	143	insist(!utf32_cmp(w, ucs)); \
	144	u8 = utf32_to_utf8(ucs, utf32_len(ucs), 0); \
	145	insist(u8 != 0); \
	146	insist(!strcmp(u8, CHARS)); \
	147	} while(0)
	148
	149	fprintf(stderr, "test_utf8\n");
	150	#define validutf8(S) utf8_valid((S), strlen(S))
	151
	152	/* empty string */
	153
	154	U8("", "");
	155
	156	/* ASCII characters */
	157
	158	U8(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{\|}~",
	159	"0x20 0x21 0x22 0x23 0x24 0x25 0x26 0x27 0x28 0x29 0x2a 0x2b 0x2c 0x2d "
	160	"0x2e 0x2f 0x30 0x31 0x32 0x33 0x34 0x35 0x36 0x37 0x38 0x39 0x3a "
	161	"0x3b 0x3c 0x3d 0x3e 0x3f 0x40 0x41 0x42 0x43 0x44 0x45 0x46 0x47 "
	162	"0x48 0x49 0x4a 0x4b 0x4c 0x4d 0x4e 0x4f 0x50 0x51 0x52 0x53 0x54 "
	163	"0x55 0x56 0x57 0x58 0x59 0x5a 0x5b 0x5c 0x5d 0x5e 0x5f 0x60 0x61 "
	164	"0x62 0x63 0x64 0x65 0x66 0x67 0x68 0x69 0x6a 0x6b 0x6c 0x6d 0x6e "
	165	"0x6f 0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77 0x78 0x79 0x7a 0x7b "
	166	"0x7c 0x7d 0x7e");
	167	U8("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177",
	168	"0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf 0x10 "
	169	"0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d "
	170	"0x1e 0x1f 0x7f");
	171
	172	/* from RFC3629 */
	173
	174	/* UTF8-2 = %xC2-DF UTF8-tail */
	175	insist(!validutf8("\xC0\x80"));
	176	insist(!validutf8("\xC1\x80"));
	177	insist(!validutf8("\xC2\x7F"));
	178	U8("\xC2\x80", "0x80");
	179	U8("\xDF\xBF", "0x7FF");
	180	insist(!validutf8("\xDF\xC0"));
	181
	182	/* UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
	183	* %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
	184	*/
	185	insist(!validutf8("\xE0\x9F\x80"));
	186	U8("\xE0\xA0\x80", "0x800");
	187	U8("\xE0\xBF\xBF", "0xFFF");
	188	insist(!validutf8("\xE0\xC0\xBF"));
	189
	190	insist(!validutf8("\xE1\x80\x7F"));
	191	U8("\xE1\x80\x80", "0x1000");
	192	U8("\xEC\xBF\xBF", "0xCFFF");
	193	insist(!validutf8("\xEC\xC0\xBF"));
	194
	195	U8("\xED\x80\x80", "0xD000");
	196	U8("\xED\x9F\xBF", "0xD7FF");
	197	insist(!validutf8("\xED\xA0\xBF"));
	198
	199	insist(!validutf8("\xEE\x7f\x80"));
	200	U8("\xEE\x80\x80", "0xE000");
	201	U8("\xEF\xBF\xBF", "0xFFFF");
	202	insist(!validutf8("\xEF\xC0\xBF"));
	203
	204	/* UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
	205	* %xF4 %x80-8F 2( UTF8-tail )
	206	*/
	207	insist(!validutf8("\xF0\x8F\x80\x80"));
	208	U8("\xF0\x90\x80\x80", "0x10000");
	209	U8("\xF0\xBF\xBF\xBF", "0x3FFFF");
	210	insist(!validutf8("\xF0\xC0\x80\x80"));
	211
	212	insist(!validutf8("\xF1\x80\x80\x7F"));
	213	U8("\xF1\x80\x80\x80", "0x40000");
	214	U8("\xF3\xBF\xBF\xBF", "0xFFFFF");
	215	insist(!validutf8("\xF3\xC0\x80\x80"));
	216
	217	insist(!validutf8("\xF4\x80\x80\x7F"));
	218	U8("\xF4\x80\x80\x80", "0x100000");
	219	U8("\xF4\x8F\xBF\xBF", "0x10FFFF");
	220	insist(!validutf8("\xF4\x90\x80\x80"));
	221	insist(!validutf8("\xF4\x80\xFF\x80"));
	222
	223	/* miscellaneous non-UTF-8 rubbish */
	224	insist(!validutf8("\x80"));
	225	insist(!validutf8("\xBF"));
	226	insist(!validutf8("\xC0"));
	227	insist(!validutf8("\xC0\x7F"));
	228	insist(!validutf8("\xC0\xC0"));
	229	insist(!validutf8("\xE0"));
	230	insist(!validutf8("\xE0\x7F"));
	231	insist(!validutf8("\xE0\xC0"));
	232	insist(!validutf8("\xE0\x80"));
	233	insist(!validutf8("\xE0\x80\x7f"));
	234	insist(!validutf8("\xE0\x80\xC0"));
	235	insist(!validutf8("\xF0"));
	236	insist(!validutf8("\xF0\x7F"));
	237	insist(!validutf8("\xF0\xC0"));
	238	insist(!validutf8("\xF0\x80"));
	239	insist(!validutf8("\xF0\x80\x7f"));
	240	insist(!validutf8("\xF0\x80\xC0"));
	241	insist(!validutf8("\xF0\x80\x80\x7f"));
	242	insist(!validutf8("\xF0\x80\x80\xC0"));
	243	insist(!validutf8("\xF5\x80\x80\x80"));
	244	insist(!validutf8("\xF8"));
	245	}
	246
	247	static void test_mime(void) {
	248	char t, n, *v;
	249
	250	fprintf(stderr, "test_mime\n");
	251
	252	t = n = v = 0;
	253	insist(!mime_content_type("text/plain", &t, &n, &v));
	254	insist(!strcmp(t, "text/plain"));
	255	insist(n == 0);
	256	insist(v == 0);
	257
	258	t = n = v = 0;
	259	insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v));
	260	insist(!strcmp(t, "text/plain"));
	261	insist(n == 0);
	262	insist(v == 0);
	263
	264	t = n = v = 0;
	265	insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v));
	266	insist(!strcmp(t, "text/plain"));
	267	insist(!strcmp(n, "charset"));
	268	insist(!strcmp(v, "utf-8"));
	269
	270	t = n = v = 0;
	271	insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
	272	insist(!strcmp(t, "text/plain"));
	273	insist(!strcmp(n, "charset"));
	274	insist(!strcmp(v, "ISO-8859-1"));
	275
	276	/* XXX mime_parse */
	277	/* XXX mime_multipart */
	278	/* XXX mime_rfc2388_content_disposition */
	279
	280	check_string(mime_qp(""), "");
	281	check_string(mime_qp("foobar"), "foobar");
	282	check_string(mime_qp("foo=20bar"), "foo bar");
	283	check_string(mime_qp("x \r\ny"), "x\r\ny");
	284	check_string(mime_qp("x=\r\ny"), "xy");
	285	check_string(mime_qp("x= \r\ny"), "xy");
	286	check_string(mime_qp("x =\r\ny"), "x y");
	287	check_string(mime_qp("x = \r\ny"), "x y");
	288
	289	/* from RFC2045 */
	290	check_string(mime_qp("Now's the time =\r\n"
	291	"for all folk to come=\r\n"
	292	" to the aid of their country."),
	293	"Now's the time for all folk to come to the aid of their country.");
	294
	295	check_string(mime_base64(""), "");
	296	check_string(mime_base64("BBBB"), "\x04\x10\x41");
	297	check_string(mime_base64("////"), "\xFF\xFF\xFF");
	298	check_string(mime_base64("//BB"), "\xFF\xF0\x41");
	299	check_string(mime_base64("BBBB//BB////"),
	300	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	301	check_string(mime_base64("B B B B / / B B / / / /"),
	302	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	303	check_string(mime_base64("B\r\nBBB.// B-B//~//"),
	304	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	305	check_string(mime_base64("BBBB="),
	306	"\x04\x10\x41");
	307	check_string(mime_base64("BBBBx="), /* not actually valid base64 */
	308	"\x04\x10\x41");
	309	check_string(mime_base64("BBBB BB=="),
	310	"\x04\x10\x41" "\x04");
	311	check_string(mime_base64("BBBB BBB="),
	312	"\x04\x10\x41" "\x04\x10");
	313	}
	314
	315	static void test_hex(void) {
	316	unsigned n;
	317	static const unsigned char h[] = { 0x00, 0xFF, 0x80, 0x7F };
	318	uint8_t *u;
	319	size_t ul;
	320
	321	fprintf(stderr, "test_hex\n");
	322
	323	for(n = 0; n <= UCHAR_MAX; ++n) {
	324	if(!isxdigit(n))
	325	insist(unhexdigitq(n) == -1);
	326	}
	327	insist(unhexdigitq('0') == 0);
	328	insist(unhexdigitq('1') == 1);
	329	insist(unhexdigitq('2') == 2);
	330	insist(unhexdigitq('3') == 3);
	331	insist(unhexdigitq('4') == 4);
	332	insist(unhexdigitq('5') == 5);
	333	insist(unhexdigitq('6') == 6);
	334	insist(unhexdigitq('7') == 7);
	335	insist(unhexdigitq('8') == 8);
	336	insist(unhexdigitq('9') == 9);
	337	insist(unhexdigitq('a') == 10);
	338	insist(unhexdigitq('b') == 11);
	339	insist(unhexdigitq('c') == 12);
	340	insist(unhexdigitq('d') == 13);
	341	insist(unhexdigitq('e') == 14);
	342	insist(unhexdigitq('f') == 15);
	343	insist(unhexdigitq('A') == 10);
	344	insist(unhexdigitq('B') == 11);
	345	insist(unhexdigitq('C') == 12);
	346	insist(unhexdigitq('D') == 13);
	347	insist(unhexdigitq('E') == 14);
	348	insist(unhexdigitq('F') == 15);
	349	check_string(hex(h, sizeof h), "00ff807f");
	350	check_string(hex(0, 0), "");
	351	u = unhex("00ff807f", &ul);
	352	insist(ul == 4);
	353	insist(memcmp(u, h, 4) == 0);
	354	u = unhex("00FF807F", &ul);
	355	insist(ul == 4);
	356	insist(memcmp(u, h, 4) == 0);
	357	u = unhex("", &ul);
	358	insist(ul == 0);
	359	fprintf(stderr, "2 ERROR reports expected {\n");
	360	insist(unhex("F", 0) == 0);
	361	insist(unhex("az", 0) == 0);
	362	fprintf(stderr, "}\n");
	363	}
	364
	365	static void test_casefold(void) {
	366	uint32_t c, l;
	367	const char input, canon_folded, compat_folded, canon_expected, *compat_expected;
	368
	369	fprintf(stderr, "test_casefold\n");
	370
	371	/* This isn't a very exhaustive test. Unlike for normalization, there don't
	372	* seem to be any public test vectors for these algorithms. */
	373
	374	for(c = 1; c < 256; ++c) {
	375	input = utf32_to_utf8(&c, 1, 0);
	376	canon_folded = utf8_casefold_canon(input, strlen(input), 0);
	377	compat_folded = utf8_casefold_compat(input, strlen(input), 0);
	378	switch(c) {
	379	default:
	380	if((c >= 'A' && c <= 'Z')
	381	\|\| (c >= 0xC0 && c <= 0xDE && c != 0xD7))
	382	l = c ^ 0x20;
	383	else
	384	l = c;
	385	break;
	386	case 0xB5: /* MICRO SIGN */
	387	l = 0x3BC; /* GREEK SMALL LETTER MU */
	388	break;
	389	case 0xDF: /* LATIN SMALL LETTER SHARP S */
	390	insist(!strcmp(canon_folded, "ss"));
	391	insist(!strcmp(compat_folded, "ss"));
	392	l = 0;
	393	break;
	394	}
	395	if(l) {
	396	uint32_t *d;
	397	/* Case-folded data is now normalized */
	398	d = utf32_decompose_canon(&l, 1, 0);
	399	canon_expected = utf32_to_utf8(d, utf32_len(d), 0);
	400	if(strcmp(canon_folded, canon_expected)) {
	401	fprintf(stderr, "%s:%d: canon-casefolding %#lx got '%s', expected '%s'\n",
	402	__FILE__, __LINE__, (unsigned long)c,
	403	format(canon_folded), format(canon_expected));
	404	count_error();
	405	}
	406	++tests;
	407	d = utf32_decompose_compat(&l, 1, 0);
	408	compat_expected = utf32_to_utf8(d, utf32_len(d), 0);
	409	if(strcmp(compat_folded, compat_expected)) {
	410	fprintf(stderr, "%s:%d: compat-casefolding %#lx got '%s', expected '%s'\n",
	411	__FILE__, __LINE__, (unsigned long)c,
	412	format(compat_folded), format(compat_expected));
	413	count_error();
	414	}
	415	++tests;
	416	}
	417	}
	418	check_string(utf8_casefold_canon("", 0, 0), "");
	419	}
	420
	421	struct {
	422	const char *in;
	423	const char *expect[10];
	424	} wtest[] = {
	425	/* Empty string */
	426	{ "", { 0 } },
	427	/* Only whitespace and punctuation */
	428	{ " ", { 0 } },
	429	{ " ' ", { 0 } },
	430	{ " ! ", { 0 } },
	431	{ " \"\" ", { 0 } },
	432	{ " @ ", { 0 } },
	433	/* Basics */
	434	{ "wibble", { "wibble", 0 } },
	435	{ " wibble", { "wibble", 0 } },
	436	{ " wibble ", { "wibble", 0 } },
	437	{ "wibble ", { "wibble", 0 } },
	438	{ "wibble spong", { "wibble", "spong", 0 } },
	439	{ " wibble spong", { "wibble", "spong", 0 } },
	440	{ " wibble spong ", { "wibble", "spong", 0 } },
	441	{ "wibble spong ", { "wibble", "spong", 0 } },
	442	{ "wibble spong splat foo zot ", { "wibble", "spong", "splat", "foo", "zot", 0 } },
	443	/* Apostrophes */
	444	{ "wibble 'spong", { "wibble", "spong", 0 } },
	445	{ " wibble's", { "wibble's", 0 } },
	446	{ " wibblespong' ", { "wibblespong", 0 } },
	447	{ "wibble sp''ong ", { "wibble", "sp", "ong", 0 } },
	448	};
	449	#define NWTEST (sizeof wtest / sizeof *wtest)
	450
	451	static void test_words(void) {
	452	size_t t, nexpect, ngot, i;
	453	int right;
	454
	455	fprintf(stderr, "test_words\n");
	456	for(t = 0; t < NWTEST; ++t) {
	457	char **got = utf8_word_split(wtest[t].in, strlen(wtest[t].in), &ngot, 0);
	458
	459	for(nexpect = 0; wtest[t].expect[nexpect]; ++nexpect)
	460	;
	461	if(nexpect == ngot) {
	462	for(i = 0; i < ngot; ++i)
	463	if(strcmp(wtest[t].expect[i], got[i]))
	464	break;
	465	right = i == ngot;
	466	} else
	467	right = 0;
	468	if(!right) {
	469	fprintf(stderr, "word split %zu failed\n", t);
	470	fprintf(stderr, "input: %s\n", wtest[t].in);
	471	fprintf(stderr, " \| %-30s \| %-30s\n",
	472	"expected", "got");
	473	for(i = 0; i < nexpect \|\| i < ngot; ++i) {
	474	const char *e = i < nexpect ? wtest[t].expect[i] : "<none>";
	475	const char *g = i < ngot ? got[i] : "<none>";
	476	fprintf(stderr, " %2zu \| %-30s \| %-30s\n", i, e, g);
	477	}
	478	count_error();
	479	}
	480	++tests;
	481	}
	482	}
	483
	484	/** @brief Less-than comparison function for integer heap */
	485	static inline int int_lt(int a, int b) { return a < b; }
	486
	487	/** @struct iheap
	488	* @brief A heap with @c int elements */
	489	HEAP_TYPE(iheap, int, int_lt);
	490	HEAP_DEFINE(iheap, int, int_lt);
	491
	492	/** @brief Tests for @ref heap.h */
	493	static void test_heap(void) {
	494	struct iheap h[1];
	495	int n;
	496	int last = -1;
	497
	498	fprintf(stderr, "test_heap\n");
	499
	500	iheap_init(h);
	501	for(n = 0; n < 1000; ++n)
	502	iheap_insert(h, random() % 100);
	503	for(n = 0; n < 1000; ++n) {
	504	const int latest = iheap_remove(h);
	505	if(last > latest)
	506	fprintf(stderr, "should have %d <= %d\n", last, latest);
	507	insist(last <= latest);
	508	last = latest;
	509	}
	510	putchar('\n');
	511	}
	512
	513	/** @brief Open a Unicode test file */
	514	static FILE open_unicode_test(const char path) {
	515	const char *base;
	516	FILE *fp;
	517	char buffer[1024];
	518	int w;
	519
	520	if((base = strrchr(path, '/')))
	521	++base;
	522	else
	523	base = path;
	524	if(!(fp = fopen(base, "r"))) {
	525	snprintf(buffer, sizeof buffer,
	526	"wget http://www.unicode.org/Public/5.0.0/ucd/%s", path);
	527	if((w = system(buffer)))
	528	fatal(0, "%s: %s", buffer, wstat(w));
	529	if(chmod(base, 0444) < 0)
	530	fatal(errno, "chmod %s", base);
	531	if(!(fp = fopen(base, "r")))
	532	fatal(errno, "%s", base);
	533	}
	534	return fp;
	535	}
	536
	537	/** @brief Run breaking tests for utf32_grapheme_boundary() etc */
	538	static void breaktest(const char *path,
	539	int (breakfn)(const uint32_t , size_t, size_t)) {
	540	FILE *fp = open_unicode_test(path);
	541	int lineno = 0;
	542	char l, lp;
	543	size_t bn, n;
	544	char break_allowed[1024];
	545	uint32_t buffer[1024];
	546
	547	while(!inputline(path, fp, &l, '\n')) {
	548	++lineno;
	549	if(l[0] == '#') continue;
	550	bn = 0;
	551	lp = l;
	552	while(*lp) {
	553	if(lp == ' ' \|\| lp == '\t') {
	554	++lp;
	555	continue;
	556	}
	557	if(*lp == '#')
	558	break;
	559	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0xB7) {
	560	/* 00F7 DIVISION SIGN */
	561	break_allowed[bn] = 1;
	562	lp += 2;
	563	continue;
	564	}
	565	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0x97) {
	566	/* 00D7 MULTIPLICATION SIGN */
	567	break_allowed[bn] = 0;
	568	lp += 2;
	569	continue;
	570	}
	571	if(isxdigit((unsigned char)*lp)) {
	572	buffer[bn++] = strtoul(lp, &lp, 16);
	573	continue;
	574	}
	575	fatal(0, "%s:%d: evil line: %s", path, lineno, l);
	576	}
	577	for(n = 0; n <= bn; ++n) {
	578	if(breakfn(buffer, bn, n) != break_allowed[n]) {
	579	fprintf(stderr,
	580	"%s:%d: offset %zu: mismatch\n"
	581	"%s\n"
	582	"\n",
	583	path, lineno, n, l);
	584	count_error();
	585	}
	586	++tests;
	587	}
	588	xfree(l);
	589	}
	590	fclose(fp);
	591	}
	592
	593	/** @brief Tests for @ref lib/unicode.h */
	594	static void test_unicode(void) {
	595	FILE *fp;
	596	int lineno = 0;
	597	char l, lp;
	598	uint32_t buffer[1024];
	599	uint32_t c[6], NFD_c[6], NFKD_c[6], NFC_c[6], NFKC_c[6]; / 1-indexed */
	600	int cn, bn;
	601
	602	fprintf(stderr, "test_unicode\n");
	603	fp = open_unicode_test("NormalizationTest.txt");
	604	while(!inputline("NormalizationTest.txt", fp, &l, '\n')) {
	605	++lineno;
	606	if(l == '#' \|\| l == '@')
	607	continue;
	608	bn = 0;
	609	cn = 1;
	610	lp = l;
	611	c[cn++] = &buffer[bn];
	612	while(lp && lp != '#') {
	613	if(*lp == ' ') {
	614	++lp;
	615	continue;
	616	}
	617	if(*lp == ';') {
	618	buffer[bn++] = 0;
	619	if(cn == 6)
	620	break;
	621	c[cn++] = &buffer[bn];
	622	++lp;
	623	continue;
	624	}
	625	buffer[bn++] = strtoul(lp, &lp, 16);
	626	}
	627	buffer[bn] = 0;
	628	assert(cn == 6);
	629	for(cn = 1; cn <= 5; ++cn) {
	630	NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
	631	NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
	632	NFC_c[cn] = utf32_compose_canon(c[cn], utf32_len(c[cn]), 0);
	633	NFKC_c[cn] = utf32_compose_compat(c[cn], utf32_len(c[cn]), 0);
	634	}
	635	#define unt_check(T, A, B) do { \
	636	++tests; \
	637	if(utf32_cmp(c[A], T##_c[B])) { \
	638	fprintf(stderr, \
	639	"NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \
	640	lineno, A, B); \
	641	fprintf(stderr, " c%d:%s\n", \
	642	A, format_utf32(c[A])); \
	643	fprintf(stderr, " c%d:%s\n", \
	644	B, format_utf32(c[B])); \
	645	fprintf(stderr, "%4s(c%d):%s\n", \
	646	#T, B, format_utf32(T##_c[B])); \
	647	count_error(); \
	648	} \
	649	} while(0)
	650	unt_check(NFD, 3, 1);
	651	unt_check(NFD, 3, 2);
	652	unt_check(NFD, 3, 3);
	653	unt_check(NFD, 5, 4);
	654	unt_check(NFD, 5, 5);
	655	unt_check(NFKD, 5, 1);
	656	unt_check(NFKD, 5, 2);
	657	unt_check(NFKD, 5, 3);
	658	unt_check(NFKD, 5, 4);
	659	unt_check(NFKD, 5, 5);
	660	unt_check(NFC, 2, 1);
	661	unt_check(NFC, 2, 2);
	662	unt_check(NFC, 2, 3);
	663	unt_check(NFC, 4, 4);
	664	unt_check(NFC, 4, 5);
	665	unt_check(NFKC, 4, 1);
	666	unt_check(NFKC, 4, 2);
	667	unt_check(NFKC, 4, 3);
	668	unt_check(NFKC, 4, 4);
	669	unt_check(NFKC, 4, 5);
	670	for(cn = 1; cn <= 5; ++cn) {
	671	xfree(NFD_c[cn]);
	672	xfree(NFKD_c[cn]);
	673	}
	674	xfree(l);
	675	}
	676	fclose(fp);
	677	breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
	678	breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
	679	insist(utf32_combining_class(0x40000) == 0);
	680	insist(utf32_combining_class(0xE0000) == 0);
	681	}
	682
	683	static void test_signame(void) {
	684	fprintf(stderr, "test_signame\n");
	685	insist(find_signal("SIGTERM") == SIGTERM);
	686	insist(find_signal("SIGHUP") == SIGHUP);
	687	insist(find_signal("SIGINT") == SIGINT);
	688	insist(find_signal("SIGQUIT") == SIGQUIT);
	689	insist(find_signal("SIGKILL") == SIGKILL);
	690	insist(find_signal("SIGYOURMUM") == -1);
	691	}
	692
	693	static void test_cache(void) {
	694	const struct cache_type t1 = { 1 }, t2 = { 10 };
	695	const char v11[] = "spong", v12[] = "wibble", v2[] = "blat";
	696	fprintf(stderr, "test_cache\n");
	697	cache_put(&t1, "1_1", v11);
	698	cache_put(&t1, "1_2", v12);
	699	cache_put(&t2, "2", v2);
	700	insist(cache_count() == 3);
	701	insist(cache_get(&t2, "2") == v2);
	702	insist(cache_get(&t1, "1_1") == v11);
	703	insist(cache_get(&t1, "1_2") == v12);
	704	insist(cache_get(&t1, "2") == 0);
	705	insist(cache_get(&t2, "1_1") == 0);
	706	insist(cache_get(&t2, "1_2") == 0);
	707	insist(cache_get(&t1, "2") == 0);
	708	insist(cache_get(&t2, "1_1") == 0);
	709	insist(cache_get(&t2, "1_2") == 0);
	710	sleep(2);
	711	cache_expire();
	712	insist(cache_count() == 1);
	713	insist(cache_get(&t1, "1_1") == 0);
	714	insist(cache_get(&t1, "1_2") == 0);
	715	insist(cache_get(&t2, "2") == v2);
	716	cache_clean(0);
	717	insist(cache_count() == 0);
	718	insist(cache_get(&t2, "2") == 0);
	719	}
	720
	721	static void test_filepart(void) {
	722	fprintf(stderr, "test_filepart\n");
	723	check_string(d_dirname("/"), "/");
	724	check_string(d_dirname("/spong"), "/");
	725	check_string(d_dirname("/foo/bar"), "/foo");
	726	check_string(d_dirname("./bar"), ".");
	727	check_string(d_dirname("."), ".");
	728	check_string(d_dirname(".."), ".");
	729	check_string(d_dirname("../blat"), "..");
	730	check_string(d_dirname("wibble"), ".");
	731	check_string(extension("foo.c"), ".c");
	732	check_string(extension(".c"), ".c");
	733	check_string(extension("."), ".");
	734	check_string(extension("foo"), "");
	735	check_string(extension("./foo"), "");
	736	check_string(extension("./foo.c"), ".c");
	737	}
	738
	739	int main(void) {
	740	fail_first = !!getenv("FAIL_FIRST");
	741	insist('\n' == 0x0A);
	742	insist('\r' == 0x0D);
	743	insist(' ' == 0x20);
	744	insist('0' == 0x30);
	745	insist('9' == 0x39);
	746	insist('A' == 0x41);
	747	insist('Z' == 0x5A);
	748	insist('a' == 0x61);
	749	insist('z' == 0x7A);
	750	/* addr.c */
	751	/* asprintf.c */
	752	/* authhash.c */
	753	/* basen.c */
	754	/* charset.c */
	755	/* client.c */
	756	/* configuration.c */
	757	/* event.c */
	758	/* filepart.c */
	759	test_filepart();
	760	/* fprintf.c */
	761	/* heap.c */
	762	test_heap();
	763	/* hex.c */
	764	test_hex();
	765	/* inputline.c */
	766	/* kvp.c */
	767	/* log.c */
	768	/* mem.c */
	769	/* mime.c */
	770	test_mime();
	771	/* mixer.c */
	772	/* plugin.c */
	773	/* printf.c */
	774	/* queue.c */
	775	/* sink.c */
	776	/* snprintf.c */
	777	/* split.c */
	778	/* syscalls.c */
	779	/* table.c */
	780	/* unicode.c */
	781	test_unicode();
	782	/* utf8.c */
	783	test_utf8();
	784	/* vector.c */
	785	/* words.c */
	786	test_casefold();
	787	test_words();
	788	/* wstat.c */
	789	/* signame.c */
	790	test_signame();
	791	/* cache.c */
	792	test_cache();
	793	fprintf(stderr, "%d errors out of %d tests\n", errors, tests);
	794	return !!errors;
	795	}
	796
	797	/*
	798	Local Variables:
	799	c-basic-offset:2
	800	comment-column:40
	801	fill-column:79
	802	indent-tabs-mode:nil
	803	End:
	804	*/