chiark - git - mdw - disorder/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* This file is part of DisOrder.
	3	* Copyright (C) 2005, 2007 Richard Kettlewell
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful, but
	11	* WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
	18	* USA
	19	*/
	20	/** @file lib/test.c @brief Library tests */
	21
	22	#include <config.h>
	23	#include "types.h"
	24
	25	#include <stdio.h>
	26	#include <string.h>
	27	#include <stdlib.h>
	28	#include <errno.h>
	29	#include <ctype.h>
	30	#include <assert.h>
	31	#include <sys/types.h>
	32	#include <sys/stat.h>
	33	#include <unistd.h>
	34	#include <signal.h>
	35	#include <sys/wait.h>
	36
	37	#include "utf8.h"
	38	#include "mem.h"
	39	#include "log.h"
	40	#include "vector.h"
	41	#include "charset.h"
	42	#include "mime.h"
	43	#include "hex.h"
	44	#include "heap.h"
	45	#include "unicode.h"
	46	#include "inputline.h"
	47	#include "wstat.h"
	48	#include "signame.h"
	49	#include "cache.h"
	50	#include "filepart.h"
	51	#include "hash.h"
	52	#include "selection.h"
	53	#include "syscalls.h"
	54	#include "kvp.h"
	55	#include "sink.h"
	56
	57	static int tests, errors;
	58	static int fail_first;
	59
	60	static void count_error() {
	61	++errors;
	62	if(fail_first)
	63	abort();
	64	}
	65
	66	/** @brief Checks that @p expr is nonzero */
	67	#define insist(expr) do { \
	68	if(!(expr)) { \
	69	count_error(); \
	70	fprintf(stderr, "%s:%d: error checking %s\n", \
	71	__FILE__, __LINE__, #expr); \
	72	} \
	73	++tests; \
	74	} while(0)
	75
	76	static const char format(const char s) {
	77	struct dynstr d;
	78	int c;
	79	char buf[10];
	80
	81	dynstr_init(&d);
	82	while((c = (unsigned char)*s++)) {
	83	if(c >= ' ' && c <= '~')
	84	dynstr_append(&d, c);
	85	else {
	86	sprintf(buf, "\\x%02X", (unsigned)c);
	87	dynstr_append_string(&d, buf);
	88	}
	89	}
	90	dynstr_terminate(&d);
	91	return d.vec;
	92	}
	93
	94	static const char format_utf32(const uint32_t s) {
	95	struct dynstr d;
	96	uint32_t c;
	97	char buf[64];
	98
	99	dynstr_init(&d);
	100	while((c = *s++)) {
	101	sprintf(buf, " %04lX", (long)c);
	102	dynstr_append_string(&d, buf);
	103	}
	104	dynstr_terminate(&d);
	105	return d.vec;
	106	}
	107
	108	#define check_string(GOT, WANT) do { \
	109	const char *got = GOT; \
	110	const char *want = WANT; \
	111	\
	112	if(want == 0) { \
	113	fprintf(stderr, "%s:%d: %s returned 0\n", \
	114	__FILE__, __LINE__, #GOT); \
	115	count_error(); \
	116	} else if(strcmp(want, got)) { \
	117	fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n", \
	118	__FILE__, __LINE__, #GOT, format(got), format(want)); \
	119	count_error(); \
	120	} \
	121	++tests; \
	122	} while(0)
	123
	124	#define check_string_prefix(GOT, WANT) do { \
	125	const char *got = GOT; \
	126	const char *want = WANT; \
	127	\
	128	if(want == 0) { \
	129	fprintf(stderr, "%s:%d: %s returned 0\n", \
	130	__FILE__, __LINE__, #GOT); \
	131	count_error(); \
	132	} else if(strncmp(want, got, strlen(want))) { \
	133	fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s...\n", \
	134	__FILE__, __LINE__, #GOT, format(got), format(want)); \
	135	count_error(); \
	136	} \
	137	++tests; \
	138	} while(0)
	139
	140	#define check_integer(GOT, WANT) do { \
	141	const intmax_t got = GOT, want = WANT; \
	142	if(got != want) { \
	143	fprintf(stderr, "%s:%d: %s returned: %jd expected: %jd\n", \
	144	__FILE__, __LINE__, #GOT, got, want); \
	145	count_error(); \
	146	} \
	147	++tests; \
	148	} while(0)
	149
	150	static uint32_t ucs4parse(const char s) {
	151	struct dynstr_ucs4 d;
	152	char *e;
	153
	154	dynstr_ucs4_init(&d);
	155	while(*s) {
	156	errno = 0;
	157	dynstr_ucs4_append(&d, strtoul(s, &e, 0));
	158	if(errno) fatal(errno, "strtoul (%s)", s);
	159	s = e;
	160	}
	161	dynstr_ucs4_terminate(&d);
	162	return d.vec;
	163	}
	164
	165	static void test_utf8(void) {
	166	/* Test validutf8, convert to UCS-4, check the answer is right,
	167	* convert back to UTF-8, check we got to where we started */
	168	#define U8(CHARS, WORDS) do { \
	169	uint32_t *w = ucs4parse(WORDS); \
	170	uint32_t *ucs; \
	171	char *u8; \
	172	\
	173	insist(validutf8(CHARS)); \
	174	ucs = utf8_to_utf32(CHARS, strlen(CHARS), 0); \
	175	insist(ucs != 0); \
	176	insist(!utf32_cmp(w, ucs)); \
	177	u8 = utf32_to_utf8(ucs, utf32_len(ucs), 0); \
	178	insist(u8 != 0); \
	179	check_string(u8, CHARS); \
	180	} while(0)
	181
	182	fprintf(stderr, "test_utf8\n");
	183	#define validutf8(S) utf8_valid((S), strlen(S))
	184
	185	/* empty string */
	186
	187	U8("", "");
	188
	189	/* ASCII characters */
	190
	191	U8(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{\|}~",
	192	"0x20 0x21 0x22 0x23 0x24 0x25 0x26 0x27 0x28 0x29 0x2a 0x2b 0x2c 0x2d "
	193	"0x2e 0x2f 0x30 0x31 0x32 0x33 0x34 0x35 0x36 0x37 0x38 0x39 0x3a "
	194	"0x3b 0x3c 0x3d 0x3e 0x3f 0x40 0x41 0x42 0x43 0x44 0x45 0x46 0x47 "
	195	"0x48 0x49 0x4a 0x4b 0x4c 0x4d 0x4e 0x4f 0x50 0x51 0x52 0x53 0x54 "
	196	"0x55 0x56 0x57 0x58 0x59 0x5a 0x5b 0x5c 0x5d 0x5e 0x5f 0x60 0x61 "
	197	"0x62 0x63 0x64 0x65 0x66 0x67 0x68 0x69 0x6a 0x6b 0x6c 0x6d 0x6e "
	198	"0x6f 0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77 0x78 0x79 0x7a 0x7b "
	199	"0x7c 0x7d 0x7e");
	200	U8("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177",
	201	"0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf 0x10 "
	202	"0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d "
	203	"0x1e 0x1f 0x7f");
	204
	205	/* from RFC3629 */
	206
	207	/* UTF8-2 = %xC2-DF UTF8-tail */
	208	insist(!validutf8("\xC0\x80"));
	209	insist(!validutf8("\xC1\x80"));
	210	insist(!validutf8("\xC2\x7F"));
	211	U8("\xC2\x80", "0x80");
	212	U8("\xDF\xBF", "0x7FF");
	213	insist(!validutf8("\xDF\xC0"));
	214
	215	/* UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
	216	* %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
	217	*/
	218	insist(!validutf8("\xE0\x9F\x80"));
	219	U8("\xE0\xA0\x80", "0x800");
	220	U8("\xE0\xBF\xBF", "0xFFF");
	221	insist(!validutf8("\xE0\xC0\xBF"));
	222
	223	insist(!validutf8("\xE1\x80\x7F"));
	224	U8("\xE1\x80\x80", "0x1000");
	225	U8("\xEC\xBF\xBF", "0xCFFF");
	226	insist(!validutf8("\xEC\xC0\xBF"));
	227
	228	U8("\xED\x80\x80", "0xD000");
	229	U8("\xED\x9F\xBF", "0xD7FF");
	230	insist(!validutf8("\xED\xA0\xBF"));
	231
	232	insist(!validutf8("\xEE\x7f\x80"));
	233	U8("\xEE\x80\x80", "0xE000");
	234	U8("\xEF\xBF\xBF", "0xFFFF");
	235	insist(!validutf8("\xEF\xC0\xBF"));
	236
	237	/* UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
	238	* %xF4 %x80-8F 2( UTF8-tail )
	239	*/
	240	insist(!validutf8("\xF0\x8F\x80\x80"));
	241	U8("\xF0\x90\x80\x80", "0x10000");
	242	U8("\xF0\xBF\xBF\xBF", "0x3FFFF");
	243	insist(!validutf8("\xF0\xC0\x80\x80"));
	244
	245	insist(!validutf8("\xF1\x80\x80\x7F"));
	246	U8("\xF1\x80\x80\x80", "0x40000");
	247	U8("\xF3\xBF\xBF\xBF", "0xFFFFF");
	248	insist(!validutf8("\xF3\xC0\x80\x80"));
	249
	250	insist(!validutf8("\xF4\x80\x80\x7F"));
	251	U8("\xF4\x80\x80\x80", "0x100000");
	252	U8("\xF4\x8F\xBF\xBF", "0x10FFFF");
	253	insist(!validutf8("\xF4\x90\x80\x80"));
	254	insist(!validutf8("\xF4\x80\xFF\x80"));
	255
	256	/* miscellaneous non-UTF-8 rubbish */
	257	insist(!validutf8("\x80"));
	258	insist(!validutf8("\xBF"));
	259	insist(!validutf8("\xC0"));
	260	insist(!validutf8("\xC0\x7F"));
	261	insist(!validutf8("\xC0\xC0"));
	262	insist(!validutf8("\xE0"));
	263	insist(!validutf8("\xE0\x7F"));
	264	insist(!validutf8("\xE0\xC0"));
	265	insist(!validutf8("\xE0\x80"));
	266	insist(!validutf8("\xE0\x80\x7f"));
	267	insist(!validutf8("\xE0\x80\xC0"));
	268	insist(!validutf8("\xF0"));
	269	insist(!validutf8("\xF0\x7F"));
	270	insist(!validutf8("\xF0\xC0"));
	271	insist(!validutf8("\xF0\x80"));
	272	insist(!validutf8("\xF0\x80\x7f"));
	273	insist(!validutf8("\xF0\x80\xC0"));
	274	insist(!validutf8("\xF0\x80\x80\x7f"));
	275	insist(!validutf8("\xF0\x80\x80\xC0"));
	276	insist(!validutf8("\xF5\x80\x80\x80"));
	277	insist(!validutf8("\xF8"));
	278	}
	279
	280	static int test_multipart_callback(const char s, void u) {
	281	struct vector *parts = u;
	282
	283	vector_append(parts, (char *)s);
	284	return 0;
	285	}
	286
	287	static void test_mime(void) {
	288	char t, n, *v;
	289	struct vector parts[1];
	290
	291	fprintf(stderr, "test_mime\n");
	292
	293	t = n = v = 0;
	294	insist(!mime_content_type("text/plain", &t, &n, &v));
	295	check_string(t, "text/plain");
	296	insist(n == 0);
	297	insist(v == 0);
	298
	299	insist(mime_content_type("TEXT ((broken) comment", &t, &n, &v) < 0);
	300	insist(mime_content_type("TEXT ((broken) comment\\", &t, &n, &v) < 0);
	301
	302	t = n = v = 0;
	303	insist(!mime_content_type("TEXT ((nested)\\ comment) /plain", &t, &n, &v));
	304	check_string(t, "text/plain");
	305	insist(n == 0);
	306	insist(v == 0);
	307
	308	t = n = v = 0;
	309	insist(!mime_content_type(" text/plain ; Charset=\"utf-\\8\"", &t, &n, &v));
	310	check_string(t, "text/plain");
	311	check_string(n, "charset");
	312	check_string(v, "utf-8");
	313
	314	t = n = v = 0;
	315	insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
	316	check_string(t, "text/plain");
	317	check_string(n, "charset");
	318	check_string(v, "ISO-8859-1");
	319
	320	t = n = v = 0;
	321	insist(!mime_rfc2388_content_disposition("form-data; name=\"field1\"", &t, &n, &v));
	322	check_string(t, "form-data");
	323	check_string(n, "name");
	324	check_string(v, "field1");
	325
	326	insist(!mime_rfc2388_content_disposition("inline", &t, &n, &v));
	327	check_string(t, "inline");
	328	insist(n == 0);
	329	insist(v == 0);
	330
	331	/* Current versions of the code only understand a single arg to these
	332	* headers. This is a bug at the level they work at but suffices for
	333	* DisOrder's current purposes. */
	334
	335	insist(!mime_rfc2388_content_disposition(
	336	"attachment; filename=genome.jpeg;\n"
	337	"modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"",
	338	&t, &n, &v));
	339	check_string(t, "attachment");
	340	check_string(n, "filename");
	341	check_string(v, "genome.jpeg");
	342
	343	vector_init(parts);
	344	insist(mime_multipart("--outer\r\n"
	345	"Content-Type: text/plain\r\n"
	346	"Content-Disposition: inline\r\n"
	347	"Content-Description: text-part-1\r\n"
	348	"\r\n"
	349	"Some text goes here\r\n"
	350	"\r\n"
	351	"--outer\r\n"
	352	"Content-Type: multipart/mixed; boundary=inner\r\n"
	353	"Content-Disposition: attachment\r\n"
	354	"Content-Description: multipart-2\r\n"
	355	"\r\n"
	356	"--inner\r\n"
	357	"Content-Type: text/plain\r\n"
	358	"Content-Disposition: inline\r\n"
	359	"Content-Description: text-part-2\r\n"
	360	"\r\n"
	361	"Some more text here.\r\n"
	362	"\r\n"
	363	"--inner\r\n"
	364	"Content-Type: image/jpeg\r\n"
	365	"Content-Disposition: attachment\r\n"
	366	"Content-Description: jpeg-1\r\n"
	367	"\r\n"
	368	"<jpeg data>\r\n"
	369	"--inner--\r\n"
	370	"--outer--\r\n",
	371	test_multipart_callback,
	372	"outer",
	373	parts) == 0);
	374	check_integer(parts->nvec, 2);
	375	check_string(parts->vec[0],
	376	"Content-Type: text/plain\r\n"
	377	"Content-Disposition: inline\r\n"
	378	"Content-Description: text-part-1\r\n"
	379	"\r\n"
	380	"Some text goes here\r\n");
	381	check_string(parts->vec[1],
	382	"Content-Type: multipart/mixed; boundary=inner\r\n"
	383	"Content-Disposition: attachment\r\n"
	384	"Content-Description: multipart-2\r\n"
	385	"\r\n"
	386	"--inner\r\n"
	387	"Content-Type: text/plain\r\n"
	388	"Content-Disposition: inline\r\n"
	389	"Content-Description: text-part-2\r\n"
	390	"\r\n"
	391	"Some more text here.\r\n"
	392	"\r\n"
	393	"--inner\r\n"
	394	"Content-Type: image/jpeg\r\n"
	395	"Content-Disposition: attachment\r\n"
	396	"Content-Description: jpeg-1\r\n"
	397	"\r\n"
	398	"<jpeg data>\r\n"
	399	"--inner--");
	400	/* No trailing CRLF is _correct_ - see RFC2046 5.1.1 note regarding CRLF
	401	* preceding the boundary delimiter line. An implication of this is that we
	402	* must cope with partial lines at the end of the input when recursively
	403	* decomposing a multipart message. */
	404	vector_init(parts);
	405	insist(mime_multipart("--inner\r\n"
	406	"Content-Type: text/plain\r\n"
	407	"Content-Disposition: inline\r\n"
	408	"Content-Description: text-part-2\r\n"
	409	"\r\n"
	410	"Some more text here.\r\n"
	411	"\r\n"
	412	"--inner\r\n"
	413	"Content-Type: image/jpeg\r\n"
	414	"Content-Disposition: attachment\r\n"
	415	"Content-Description: jpeg-1\r\n"
	416	"\r\n"
	417	"<jpeg data>\r\n"
	418	"--inner--",
	419	test_multipart_callback,
	420	"inner",
	421	parts) == 0);
	422	check_integer(parts->nvec, 2);
	423	check_string(parts->vec[0],
	424	"Content-Type: text/plain\r\n"
	425	"Content-Disposition: inline\r\n"
	426	"Content-Description: text-part-2\r\n"
	427	"\r\n"
	428	"Some more text here.\r\n");
	429	check_string(parts->vec[1],
	430	"Content-Type: image/jpeg\r\n"
	431	"Content-Disposition: attachment\r\n"
	432	"Content-Description: jpeg-1\r\n"
	433	"\r\n"
	434	"<jpeg data>");
	435
	436	/* XXX mime_parse */
	437
	438	check_string(mime_qp(""), "");
	439	check_string(mime_qp("foobar"), "foobar");
	440	check_string(mime_qp("foo=20bar"), "foo bar");
	441	check_string(mime_qp("x \r\ny"), "x\r\ny");
	442	check_string(mime_qp("x=\r\ny"), "xy");
	443	check_string(mime_qp("x= \r\ny"), "xy");
	444	check_string(mime_qp("x =\r\ny"), "x y");
	445	check_string(mime_qp("x = \r\ny"), "x y");
	446
	447	/* from RFC2045 */
	448	check_string(mime_qp("Now's the time =\r\n"
	449	"for all folk to come=\r\n"
	450	" to the aid of their country."),
	451	"Now's the time for all folk to come to the aid of their country.");
	452
	453	check_string(mime_base64(""), "");
	454	check_string(mime_base64("BBBB"), "\x04\x10\x41");
	455	check_string(mime_base64("////"), "\xFF\xFF\xFF");
	456	check_string(mime_base64("//BB"), "\xFF\xF0\x41");
	457	check_string(mime_base64("BBBB//BB////"),
	458	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	459	check_string(mime_base64("B B B B / / B B / / / /"),
	460	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	461	check_string(mime_base64("B\r\nBBB.// B-B//~//"),
	462	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	463	check_string(mime_base64("BBBB="),
	464	"\x04\x10\x41");
	465	check_string(mime_base64("BBBBx="), /* not actually valid base64 */
	466	"\x04\x10\x41");
	467	check_string(mime_base64("BBBB BB=="),
	468	"\x04\x10\x41" "\x04");
	469	check_string(mime_base64("BBBB BBB="),
	470	"\x04\x10\x41" "\x04\x10");
	471	}
	472
	473	static void test_hex(void) {
	474	unsigned n;
	475	static const unsigned char h[] = { 0x00, 0xFF, 0x80, 0x7F };
	476	uint8_t *u;
	477	size_t ul;
	478
	479	fprintf(stderr, "test_hex\n");
	480
	481	for(n = 0; n <= UCHAR_MAX; ++n) {
	482	if(!isxdigit(n))
	483	insist(unhexdigitq(n) == -1);
	484	}
	485	insist(unhexdigitq('0') == 0);
	486	insist(unhexdigitq('1') == 1);
	487	insist(unhexdigitq('2') == 2);
	488	insist(unhexdigitq('3') == 3);
	489	insist(unhexdigitq('4') == 4);
	490	insist(unhexdigitq('5') == 5);
	491	insist(unhexdigitq('6') == 6);
	492	insist(unhexdigitq('7') == 7);
	493	insist(unhexdigitq('8') == 8);
	494	insist(unhexdigitq('9') == 9);
	495	insist(unhexdigitq('a') == 10);
	496	insist(unhexdigitq('b') == 11);
	497	insist(unhexdigitq('c') == 12);
	498	insist(unhexdigitq('d') == 13);
	499	insist(unhexdigitq('e') == 14);
	500	insist(unhexdigitq('f') == 15);
	501	insist(unhexdigitq('A') == 10);
	502	insist(unhexdigitq('B') == 11);
	503	insist(unhexdigitq('C') == 12);
	504	insist(unhexdigitq('D') == 13);
	505	insist(unhexdigitq('E') == 14);
	506	insist(unhexdigitq('F') == 15);
	507	check_string(hex(h, sizeof h), "00ff807f");
	508	check_string(hex(0, 0), "");
	509	u = unhex("00ff807f", &ul);
	510	insist(ul == 4);
	511	insist(memcmp(u, h, 4) == 0);
	512	u = unhex("00FF807F", &ul);
	513	insist(ul == 4);
	514	insist(memcmp(u, h, 4) == 0);
	515	u = unhex("", &ul);
	516	insist(ul == 0);
	517	fprintf(stderr, "2 ERROR reports expected {\n");
	518	insist(unhex("F", 0) == 0);
	519	insist(unhex("az", 0) == 0);
	520	fprintf(stderr, "}\n");
	521	}
	522
	523	static void test_casefold(void) {
	524	uint32_t c, l;
	525	const char input, canon_folded, compat_folded, canon_expected, *compat_expected;
	526
	527	fprintf(stderr, "test_casefold\n");
	528
	529	/* This isn't a very exhaustive test. Unlike for normalization, there don't
	530	* seem to be any public test vectors for these algorithms. */
	531
	532	for(c = 1; c < 256; ++c) {
	533	input = utf32_to_utf8(&c, 1, 0);
	534	canon_folded = utf8_casefold_canon(input, strlen(input), 0);
	535	compat_folded = utf8_casefold_compat(input, strlen(input), 0);
	536	switch(c) {
	537	default:
	538	if((c >= 'A' && c <= 'Z')
	539	\|\| (c >= 0xC0 && c <= 0xDE && c != 0xD7))
	540	l = c ^ 0x20;
	541	else
	542	l = c;
	543	break;
	544	case 0xB5: /* MICRO SIGN */
	545	l = 0x3BC; /* GREEK SMALL LETTER MU */
	546	break;
	547	case 0xDF: /* LATIN SMALL LETTER SHARP S */
	548	check_string(canon_folded, "ss");
	549	check_string(compat_folded, "ss");
	550	l = 0;
	551	break;
	552	}
	553	if(l) {
	554	uint32_t *d;
	555	/* Case-folded data is now normalized */
	556	d = utf32_decompose_canon(&l, 1, 0);
	557	canon_expected = utf32_to_utf8(d, utf32_len(d), 0);
	558	if(strcmp(canon_folded, canon_expected)) {
	559	fprintf(stderr, "%s:%d: canon-casefolding %#lx got '%s', expected '%s'\n",
	560	__FILE__, __LINE__, (unsigned long)c,
	561	format(canon_folded), format(canon_expected));
	562	count_error();
	563	}
	564	++tests;
	565	d = utf32_decompose_compat(&l, 1, 0);
	566	compat_expected = utf32_to_utf8(d, utf32_len(d), 0);
	567	if(strcmp(compat_folded, compat_expected)) {
	568	fprintf(stderr, "%s:%d: compat-casefolding %#lx got '%s', expected '%s'\n",
	569	__FILE__, __LINE__, (unsigned long)c,
	570	format(compat_folded), format(compat_expected));
	571	count_error();
	572	}
	573	++tests;
	574	}
	575	}
	576	check_string(utf8_casefold_canon("", 0, 0), "");
	577	}
	578
	579	struct {
	580	const char *in;
	581	const char *expect[10];
	582	} wtest[] = {
	583	/* Empty string */
	584	{ "", { 0 } },
	585	/* Only whitespace and punctuation */
	586	{ " ", { 0 } },
	587	{ " ' ", { 0 } },
	588	{ " ! ", { 0 } },
	589	{ " \"\" ", { 0 } },
	590	{ " @ ", { 0 } },
	591	/* Basics */
	592	{ "wibble", { "wibble", 0 } },
	593	{ " wibble", { "wibble", 0 } },
	594	{ " wibble ", { "wibble", 0 } },
	595	{ "wibble ", { "wibble", 0 } },
	596	{ "wibble spong", { "wibble", "spong", 0 } },
	597	{ " wibble spong", { "wibble", "spong", 0 } },
	598	{ " wibble spong ", { "wibble", "spong", 0 } },
	599	{ "wibble spong ", { "wibble", "spong", 0 } },
	600	{ "wibble spong splat foo zot ", { "wibble", "spong", "splat", "foo", "zot", 0 } },
	601	/* Apostrophes */
	602	{ "wibble 'spong", { "wibble", "spong", 0 } },
	603	{ " wibble's", { "wibble's", 0 } },
	604	{ " wibblespong' ", { "wibblespong", 0 } },
	605	{ "wibble sp''ong ", { "wibble", "sp", "ong", 0 } },
	606	};
	607	#define NWTEST (sizeof wtest / sizeof *wtest)
	608
	609	static void test_words(void) {
	610	size_t t, nexpect, ngot, i;
	611	int right;
	612
	613	fprintf(stderr, "test_words\n");
	614	for(t = 0; t < NWTEST; ++t) {
	615	char **got = utf8_word_split(wtest[t].in, strlen(wtest[t].in), &ngot, 0);
	616
	617	for(nexpect = 0; wtest[t].expect[nexpect]; ++nexpect)
	618	;
	619	if(nexpect == ngot) {
	620	for(i = 0; i < ngot; ++i)
	621	if(strcmp(wtest[t].expect[i], got[i]))
	622	break;
	623	right = i == ngot;
	624	} else
	625	right = 0;
	626	if(!right) {
	627	fprintf(stderr, "word split %zu failed\n", t);
	628	fprintf(stderr, "input: %s\n", wtest[t].in);
	629	fprintf(stderr, " \| %-30s \| %-30s\n",
	630	"expected", "got");
	631	for(i = 0; i < nexpect \|\| i < ngot; ++i) {
	632	const char *e = i < nexpect ? wtest[t].expect[i] : "<none>";
	633	const char *g = i < ngot ? got[i] : "<none>";
	634	fprintf(stderr, " %2zu \| %-30s \| %-30s\n", i, e, g);
	635	}
	636	count_error();
	637	}
	638	++tests;
	639	}
	640	}
	641
	642	/** @brief Less-than comparison function for integer heap */
	643	static inline int int_lt(int a, int b) { return a < b; }
	644
	645	/** @struct iheap
	646	* @brief A heap with @c int elements */
	647	HEAP_TYPE(iheap, int, int_lt);
	648	HEAP_DEFINE(iheap, int, int_lt);
	649
	650	/** @brief Tests for @ref heap.h */
	651	static void test_heap(void) {
	652	struct iheap h[1];
	653	int n;
	654	int last = -1;
	655
	656	fprintf(stderr, "test_heap\n");
	657
	658	iheap_init(h);
	659	for(n = 0; n < 1000; ++n)
	660	iheap_insert(h, random() % 100);
	661	for(n = 0; n < 1000; ++n) {
	662	const int latest = iheap_remove(h);
	663	if(last > latest)
	664	fprintf(stderr, "should have %d <= %d\n", last, latest);
	665	insist(last <= latest);
	666	last = latest;
	667	}
	668	putchar('\n');
	669	}
	670
	671	/** @brief Open a Unicode test file */
	672	static FILE open_unicode_test(const char path) {
	673	const char *base;
	674	FILE *fp;
	675	char buffer[1024];
	676	int w;
	677
	678	if((base = strrchr(path, '/')))
	679	++base;
	680	else
	681	base = path;
	682	if(!(fp = fopen(base, "r"))) {
	683	snprintf(buffer, sizeof buffer,
	684	"wget http://www.unicode.org/Public/5.0.0/ucd/%s", path);
	685	if((w = system(buffer)))
	686	fatal(0, "%s: %s", buffer, wstat(w));
	687	if(chmod(base, 0444) < 0)
	688	fatal(errno, "chmod %s", base);
	689	if(!(fp = fopen(base, "r")))
	690	fatal(errno, "%s", base);
	691	}
	692	return fp;
	693	}
	694
	695	/** @brief Run breaking tests for utf32_grapheme_boundary() etc */
	696	static void breaktest(const char *path,
	697	int (breakfn)(const uint32_t , size_t, size_t)) {
	698	FILE *fp = open_unicode_test(path);
	699	int lineno = 0;
	700	char l, lp;
	701	size_t bn, n;
	702	char break_allowed[1024];
	703	uint32_t buffer[1024];
	704
	705	while(!inputline(path, fp, &l, '\n')) {
	706	++lineno;
	707	if(l[0] == '#') continue;
	708	bn = 0;
	709	lp = l;
	710	while(*lp) {
	711	if(lp == ' ' \|\| lp == '\t') {
	712	++lp;
	713	continue;
	714	}
	715	if(*lp == '#')
	716	break;
	717	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0xB7) {
	718	/* 00F7 DIVISION SIGN */
	719	break_allowed[bn] = 1;
	720	lp += 2;
	721	continue;
	722	}
	723	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0x97) {
	724	/* 00D7 MULTIPLICATION SIGN */
	725	break_allowed[bn] = 0;
	726	lp += 2;
	727	continue;
	728	}
	729	if(isxdigit((unsigned char)*lp)) {
	730	buffer[bn++] = strtoul(lp, &lp, 16);
	731	continue;
	732	}
	733	fatal(0, "%s:%d: evil line: %s", path, lineno, l);
	734	}
	735	for(n = 0; n <= bn; ++n) {
	736	if(breakfn(buffer, bn, n) != break_allowed[n]) {
	737	fprintf(stderr,
	738	"%s:%d: offset %zu: mismatch\n"
	739	"%s\n"
	740	"\n",
	741	path, lineno, n, l);
	742	count_error();
	743	}
	744	++tests;
	745	}
	746	xfree(l);
	747	}
	748	fclose(fp);
	749	}
	750
	751	/** @brief Tests for @ref lib/unicode.h */
	752	static void test_unicode(void) {
	753	FILE *fp;
	754	int lineno = 0;
	755	char l, lp;
	756	uint32_t buffer[1024];
	757	uint32_t c[6], NFD_c[6], NFKD_c[6], NFC_c[6], NFKC_c[6]; / 1-indexed */
	758	int cn, bn;
	759
	760	fprintf(stderr, "test_unicode\n");
	761	fp = open_unicode_test("NormalizationTest.txt");
	762	while(!inputline("NormalizationTest.txt", fp, &l, '\n')) {
	763	++lineno;
	764	if(l == '#' \|\| l == '@')
	765	continue;
	766	bn = 0;
	767	cn = 1;
	768	lp = l;
	769	c[cn++] = &buffer[bn];
	770	while(lp && lp != '#') {
	771	if(*lp == ' ') {
	772	++lp;
	773	continue;
	774	}
	775	if(*lp == ';') {
	776	buffer[bn++] = 0;
	777	if(cn == 6)
	778	break;
	779	c[cn++] = &buffer[bn];
	780	++lp;
	781	continue;
	782	}
	783	buffer[bn++] = strtoul(lp, &lp, 16);
	784	}
	785	buffer[bn] = 0;
	786	assert(cn == 6);
	787	for(cn = 1; cn <= 5; ++cn) {
	788	NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
	789	NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
	790	NFC_c[cn] = utf32_compose_canon(c[cn], utf32_len(c[cn]), 0);
	791	NFKC_c[cn] = utf32_compose_compat(c[cn], utf32_len(c[cn]), 0);
	792	}
	793	#define unt_check(T, A, B) do { \
	794	++tests; \
	795	if(utf32_cmp(c[A], T##_c[B])) { \
	796	fprintf(stderr, \
	797	"NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \
	798	lineno, A, B); \
	799	fprintf(stderr, " c%d:%s\n", \
	800	A, format_utf32(c[A])); \
	801	fprintf(stderr, " c%d:%s\n", \
	802	B, format_utf32(c[B])); \
	803	fprintf(stderr, "%4s(c%d):%s\n", \
	804	#T, B, format_utf32(T##_c[B])); \
	805	count_error(); \
	806	} \
	807	} while(0)
	808	unt_check(NFD, 3, 1);
	809	unt_check(NFD, 3, 2);
	810	unt_check(NFD, 3, 3);
	811	unt_check(NFD, 5, 4);
	812	unt_check(NFD, 5, 5);
	813	unt_check(NFKD, 5, 1);
	814	unt_check(NFKD, 5, 2);
	815	unt_check(NFKD, 5, 3);
	816	unt_check(NFKD, 5, 4);
	817	unt_check(NFKD, 5, 5);
	818	unt_check(NFC, 2, 1);
	819	unt_check(NFC, 2, 2);
	820	unt_check(NFC, 2, 3);
	821	unt_check(NFC, 4, 4);
	822	unt_check(NFC, 4, 5);
	823	unt_check(NFKC, 4, 1);
	824	unt_check(NFKC, 4, 2);
	825	unt_check(NFKC, 4, 3);
	826	unt_check(NFKC, 4, 4);
	827	unt_check(NFKC, 4, 5);
	828	for(cn = 1; cn <= 5; ++cn) {
	829	xfree(NFD_c[cn]);
	830	xfree(NFKD_c[cn]);
	831	}
	832	xfree(l);
	833	}
	834	fclose(fp);
	835	breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
	836	breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
	837	insist(utf32_combining_class(0x40000) == 0);
	838	insist(utf32_combining_class(0xE0000) == 0);
	839	}
	840
	841	static void test_signame(void) {
	842	fprintf(stderr, "test_signame\n");
	843	insist(find_signal("SIGTERM") == SIGTERM);
	844	insist(find_signal("SIGHUP") == SIGHUP);
	845	insist(find_signal("SIGINT") == SIGINT);
	846	insist(find_signal("SIGQUIT") == SIGQUIT);
	847	insist(find_signal("SIGKILL") == SIGKILL);
	848	insist(find_signal("SIGYOURMUM") == -1);
	849	}
	850
	851	static void test_cache(void) {
	852	const struct cache_type t1 = { 1 }, t2 = { 10 };
	853	const char v11[] = "spong", v12[] = "wibble", v2[] = "blat";
	854	fprintf(stderr, "test_cache\n");
	855	cache_put(&t1, "1_1", v11);
	856	cache_put(&t1, "1_2", v12);
	857	cache_put(&t2, "2", v2);
	858	insist(cache_count() == 3);
	859	insist(cache_get(&t2, "2") == v2);
	860	insist(cache_get(&t1, "1_1") == v11);
	861	insist(cache_get(&t1, "1_2") == v12);
	862	insist(cache_get(&t1, "2") == 0);
	863	insist(cache_get(&t2, "1_1") == 0);
	864	insist(cache_get(&t2, "1_2") == 0);
	865	insist(cache_get(&t1, "2") == 0);
	866	insist(cache_get(&t2, "1_1") == 0);
	867	insist(cache_get(&t2, "1_2") == 0);
	868	sleep(2);
	869	cache_expire();
	870	insist(cache_count() == 1);
	871	insist(cache_get(&t1, "1_1") == 0);
	872	insist(cache_get(&t1, "1_2") == 0);
	873	insist(cache_get(&t2, "2") == v2);
	874	cache_clean(0);
	875	insist(cache_count() == 0);
	876	insist(cache_get(&t2, "2") == 0);
	877	}
	878
	879	static void test_filepart(void) {
	880	fprintf(stderr, "test_filepart\n");
	881	check_string(d_dirname("/"), "/");
	882	check_string(d_dirname("/spong"), "/");
	883	check_string(d_dirname("/foo/bar"), "/foo");
	884	check_string(d_dirname("./bar"), ".");
	885	check_string(d_dirname("."), ".");
	886	check_string(d_dirname(".."), ".");
	887	check_string(d_dirname("../blat"), "..");
	888	check_string(d_dirname("wibble"), ".");
	889	check_string(extension("foo.c"), ".c");
	890	check_string(extension(".c"), ".c");
	891	check_string(extension("."), ".");
	892	check_string(extension("foo"), "");
	893	check_string(extension("./foo"), "");
	894	check_string(extension("./foo.c"), ".c");
	895	}
	896
	897	static void test_selection(void) {
	898	hash *h;
	899	fprintf(stderr, "test_selection\n");
	900	insist((h = selection_new()) != 0);
	901	selection_set(h, "one", 1);
	902	selection_set(h, "two", 1);
	903	selection_set(h, "three", 0);
	904	selection_set(h, "four", 1);
	905	insist(selection_selected(h, "one") == 1);
	906	insist(selection_selected(h, "two") == 1);
	907	insist(selection_selected(h, "three") == 0);
	908	insist(selection_selected(h, "four") == 1);
	909	insist(selection_selected(h, "five") == 0);
	910	insist(hash_count(h) == 3);
	911	selection_flip(h, "one");
	912	selection_flip(h, "three");
	913	insist(selection_selected(h, "one") == 0);
	914	insist(selection_selected(h, "three") == 1);
	915	insist(hash_count(h) == 3);
	916	selection_live(h, "one");
	917	selection_live(h, "two");
	918	selection_live(h, "three");
	919	selection_cleanup(h);
	920	insist(selection_selected(h, "one") == 0);
	921	insist(selection_selected(h, "two") == 1);
	922	insist(selection_selected(h, "three") == 1);
	923	insist(selection_selected(h, "four") == 0);
	924	insist(selection_selected(h, "five") == 0);
	925	insist(hash_count(h) == 2);
	926	selection_empty(h);
	927	insist(selection_selected(h, "one") == 0);
	928	insist(selection_selected(h, "two") == 0);
	929	insist(selection_selected(h, "three") == 0);
	930	insist(selection_selected(h, "four") == 0);
	931	insist(selection_selected(h, "five") == 0);
	932	insist(hash_count(h) == 0);
	933	}
	934
	935	static void test_wstat(void) {
	936	pid_t pid;
	937	int w;
	938
	939	fprintf(stderr, "test_wstat\n");
	940	if(!(pid = xfork())) {
	941	_exit(1);
	942	}
	943	while(waitpid(pid, &w, 0) < 0 && errno == EINTR)
	944	;
	945	check_string(wstat(w), "exited with status 1");
	946	if(!(pid = xfork())) {
	947	kill(getpid(), SIGTERM);
	948	_exit(-1);
	949	}
	950	while(waitpid(pid, &w, 0) < 0 && errno == EINTR)
	951	;
	952	check_string_prefix(wstat(w), "terminated by signal 15");
	953	}
	954
	955	static void test_kvp(void) {
	956	struct kvp *k;
	957	size_t n;
	958
	959	fprintf(stderr, "test_kvp\n");
	960	/* decoding */
	961	#define KVP_URLDECODE(S) kvp_urldecode((S), strlen(S))
	962	insist(KVP_URLDECODE("=%zz") == 0);
	963	insist(KVP_URLDECODE("=%0") == 0);
	964	insist(KVP_URLDECODE("=%0z") == 0);
	965	insist(KVP_URLDECODE("=%%") == 0);
	966	insist(KVP_URLDECODE("==%") == 0);
	967	insist(KVP_URLDECODE("wibble") == 0);
	968	insist(KVP_URLDECODE("") == 0);
	969	insist(KVP_URLDECODE("wibble&") == 0);
	970	insist((k = KVP_URLDECODE("one=bl%61t+foo")) != 0);
	971	check_string(kvp_get(k, "one"), "blat foo");
	972	insist(kvp_get(k, "ONE") == 0);
	973	insist(k->next == 0);
	974	insist((k = KVP_URLDECODE("wibble=splat&bar=spong")) != 0);
	975	check_string(kvp_get(k, "wibble"), "splat");
	976	check_string(kvp_get(k, "bar"), "spong");
	977	insist(kvp_get(k, "ONE") == 0);
	978	insist(k->next->next == 0);
	979	/* encoding */
	980	insist(kvp_set(&k, "bar", "spong") == 0);
	981	insist(kvp_set(&k, "bar", "foo") == 1);
	982	insist(kvp_set(&k, "zog", "%") == 1);
	983	insist(kvp_set(&k, "wibble", 0) == 1);
	984	insist(kvp_set(&k, "wibble", 0) == 0);
	985	check_string(kvp_urlencode(k, 0),
	986	"bar=foo&zog=%25");
	987	check_string(kvp_urlencode(k, &n),
	988	"bar=foo&zog=%25");
	989	insist(n == strlen("bar=foo&zog=%25"));
	990	check_string(urlencodestring("abc% +\n"),
	991	"abc%25%20%2b%0a");
	992	}
	993
	994	static void test_sink(void) {
	995	struct sink *s;
	996	struct dynstr d[1];
	997	FILE *fp;
	998	char *l;
	999
	1000	fprintf(stderr, "test_sink\n");
	1001
	1002	fp = tmpfile();
	1003	assert(fp != 0);
	1004	s = sink_stdio("tmpfile", fp);
	1005	insist(sink_printf(s, "test: %d\n", 999) == 10);
	1006	insist(sink_printf(s, "wibble: %s\n", "foobar") == 15);
	1007	rewind(fp);
	1008	insist(inputline("tmpfile", fp, &l, '\n') == 0);
	1009	check_string(l, "test: 999");
	1010	insist(inputline("tmpfile", fp, &l, '\n') == 0);
	1011	check_string(l, "wibble: foobar");
	1012	insist(inputline("tmpfile", fp, &l, '\n') == -1);
	1013
	1014	dynstr_init(d);
	1015	s = sink_dynstr(d);
	1016	insist(sink_printf(s, "test: %d\n", 999) == 10);
	1017	insist(sink_printf(s, "wibble: %s\n", "foobar") == 15);
	1018	dynstr_terminate(d);
	1019	check_string(d->vec, "test: 999\nwibble: foobar\n");
	1020	}
	1021
	1022	int main(void) {
	1023	fail_first = !!getenv("FAIL_FIRST");
	1024	insist('\n' == 0x0A);
	1025	insist('\r' == 0x0D);
	1026	insist(' ' == 0x20);
	1027	insist('0' == 0x30);
	1028	insist('9' == 0x39);
	1029	insist('A' == 0x41);
	1030	insist('Z' == 0x5A);
	1031	insist('a' == 0x61);
	1032	insist('z' == 0x7A);
	1033	/* addr.c */
	1034	/* asprintf.c */
	1035	/* authhash.c */
	1036	/* basen.c */
	1037	/* charset.c */
	1038	/* client.c */
	1039	/* configuration.c */
	1040	/* event.c */
	1041	/* filepart.c */
	1042	test_filepart();
	1043	/* fprintf.c */
	1044	/* heap.c */
	1045	test_heap();
	1046	/* hex.c */
	1047	test_hex();
	1048	/* inputline.c */
	1049	/* kvp.c */
	1050	test_kvp();
	1051	/* log.c */
	1052	/* mem.c */
	1053	/* mime.c */
	1054	test_mime();
	1055	/* mixer.c */
	1056	/* plugin.c */
	1057	/* printf.c */
	1058	/* queue.c */
	1059	/* sink.c */
	1060	test_sink();
	1061	/* snprintf.c */
	1062	/* split.c */
	1063	/* syscalls.c */
	1064	/* table.c */
	1065	/* unicode.c */
	1066	test_unicode();
	1067	/* utf8.c */
	1068	test_utf8();
	1069	/* vector.c */
	1070	/* words.c */
	1071	test_casefold();
	1072	test_words();
	1073	/* wstat.c */
	1074	test_wstat();
	1075	/* signame.c */
	1076	test_signame();
	1077	/* cache.c */
	1078	test_cache();
	1079	/* selection.c */
	1080	test_selection();
	1081	fprintf(stderr, "%d errors out of %d tests\n", errors, tests);
	1082	return !!errors;
	1083	}
	1084
	1085	/*
	1086	Local Variables:
	1087	c-basic-offset:2
	1088	comment-column:40
	1089	fill-column:79
	1090	indent-tabs-mode:nil
	1091	End:
	1092	*/