chiark - git - mdw - disorder/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* This file is part of DisOrder.
	3	* Copyright (C) 2005, 2007 Richard Kettlewell
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful, but
	11	* WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
	18	* USA
	19	*/
	20	/** @file lib/test.c @brief Library tests */
	21
	22	#include <config.h>
	23	#include "types.h"
	24
	25	#include <stdio.h>
	26	#include <string.h>
	27	#include <stdlib.h>
	28	#include <errno.h>
	29	#include <ctype.h>
	30	#include <assert.h>
	31	#include <sys/types.h>
	32	#include <sys/stat.h>
	33
	34	#include "utf8.h"
	35	#include "mem.h"
	36	#include "log.h"
	37	#include "vector.h"
	38	#include "charset.h"
	39	#include "mime.h"
	40	#include "hex.h"
	41	#include "words.h"
	42	#include "heap.h"
	43	#include "unicode.h"
	44	#include "inputline.h"
	45
	46	static int tests, errors;
	47
	48	/** @brief Checks that @p expr is nonzero */
	49	#define insist(expr) do { \
	50	if(!(expr)) { \
	51	++errors; \
	52	fprintf(stderr, "%s:%d: error checking %s\n", \
	53	__FILE__, __LINE__, #expr); \
	54	} \
	55	++tests; \
	56	} while(0)
	57
	58	static const char format(const char s) {
	59	struct dynstr d;
	60	int c;
	61	char buf[10];
	62
	63	dynstr_init(&d);
	64	while((c = (unsigned char)*s++)) {
	65	if(c >= ' ' && c <= '~')
	66	dynstr_append(&d, c);
	67	else {
	68	sprintf(buf, "\\x%02X", (unsigned)c);
	69	dynstr_append_string(&d, buf);
	70	}
	71	}
	72	dynstr_terminate(&d);
	73	return d.vec;
	74	}
	75
	76	static const char format_utf32(const uint32_t s) {
	77	struct dynstr d;
	78	uint32_t c;
	79	char buf[64];
	80
	81	dynstr_init(&d);
	82	while((c = *s++)) {
	83	if(c >= 32 && c <= 127)
	84	dynstr_append(&d, c);
	85	else {
	86	sprintf(buf, "\\x%04lX", (unsigned long)c);
	87	dynstr_append_string(&d, buf);
	88	}
	89	}
	90	dynstr_terminate(&d);
	91	return d.vec;
	92	}
	93
	94	#define check_string(GOT, WANT) do { \
	95	const char *g = GOT; \
	96	const char *w = WANT; \
	97	\
	98	if(w == 0) { \
	99	fprintf(stderr, "%s:%d: %s returned 0\n", \
	100	__FILE__, __LINE__, #GOT); \
	101	++errors; \
	102	} else if(strcmp(w, g)) { \
	103	fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n", \
	104	__FILE__, __LINE__, #GOT, format(g), format(w)); \
	105	++errors; \
	106	} \
	107	++tests; \
	108	} while(0)
	109
	110	static uint32_t ucs4parse(const char s) {
	111	struct dynstr_ucs4 d;
	112	char *e;
	113
	114	dynstr_ucs4_init(&d);
	115	while(*s) {
	116	errno = 0;
	117	dynstr_ucs4_append(&d, strtoul(s, &e, 0));
	118	if(errno) fatal(errno, "strtoul (%s)", s);
	119	s = e;
	120	}
	121	dynstr_ucs4_terminate(&d);
	122	return d.vec;
	123	}
	124
	125	static void test_utf8(void) {
	126	/* Test validutf8, convert to UCS-4, check the answer is right,
	127	* convert back to UTF-8, check we got to where we started */
	128	#define U8(CHARS, WORDS) do { \
	129	uint32_t *w = ucs4parse(WORDS); \
	130	uint32_t *ucs; \
	131	char *u8; \
	132	\
	133	insist(validutf8(CHARS)); \
	134	ucs = utf82ucs4(CHARS); \
	135	insist(ucs != 0); \
	136	insist(!ucs4cmp(w, ucs)); \
	137	u8 = ucs42utf8(ucs); \
	138	insist(u8 != 0); \
	139	insist(!strcmp(u8, CHARS)); \
	140	} while(0)
	141
	142	fprintf(stderr, "test_utf8\n");
	143
	144	/* empty string */
	145
	146	U8("", "");
	147
	148	/* ASCII characters */
	149
	150	U8(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{\|}~",
	151	"0x20 0x21 0x22 0x23 0x24 0x25 0x26 0x27 0x28 0x29 0x2a 0x2b 0x2c 0x2d "
	152	"0x2e 0x2f 0x30 0x31 0x32 0x33 0x34 0x35 0x36 0x37 0x38 0x39 0x3a "
	153	"0x3b 0x3c 0x3d 0x3e 0x3f 0x40 0x41 0x42 0x43 0x44 0x45 0x46 0x47 "
	154	"0x48 0x49 0x4a 0x4b 0x4c 0x4d 0x4e 0x4f 0x50 0x51 0x52 0x53 0x54 "
	155	"0x55 0x56 0x57 0x58 0x59 0x5a 0x5b 0x5c 0x5d 0x5e 0x5f 0x60 0x61 "
	156	"0x62 0x63 0x64 0x65 0x66 0x67 0x68 0x69 0x6a 0x6b 0x6c 0x6d 0x6e "
	157	"0x6f 0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77 0x78 0x79 0x7a 0x7b "
	158	"0x7c 0x7d 0x7e");
	159	U8("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177",
	160	"0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf 0x10 "
	161	"0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d "
	162	"0x1e 0x1f 0x7f");
	163
	164	/* from RFC3629 */
	165
	166	/* UTF8-2 = %xC2-DF UTF8-tail */
	167	insist(!validutf8("\xC0\x80"));
	168	insist(!validutf8("\xC1\x80"));
	169	insist(!validutf8("\xC2\x7F"));
	170	U8("\xC2\x80", "0x80");
	171	U8("\xDF\xBF", "0x7FF");
	172	insist(!validutf8("\xDF\xC0"));
	173
	174	/* UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
	175	* %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
	176	*/
	177	insist(!validutf8("\xE0\x9F\x80"));
	178	U8("\xE0\xA0\x80", "0x800");
	179	U8("\xE0\xBF\xBF", "0xFFF");
	180	insist(!validutf8("\xE0\xC0\xBF"));
	181
	182	insist(!validutf8("\xE1\x80\x7F"));
	183	U8("\xE1\x80\x80", "0x1000");
	184	U8("\xEC\xBF\xBF", "0xCFFF");
	185	insist(!validutf8("\xEC\xC0\xBF"));
	186
	187	U8("\xED\x80\x80", "0xD000");
	188	U8("\xED\x9F\xBF", "0xD7FF");
	189	insist(!validutf8("\xED\xA0\xBF"));
	190
	191	insist(!validutf8("\xEE\x7f\x80"));
	192	U8("\xEE\x80\x80", "0xE000");
	193	U8("\xEF\xBF\xBF", "0xFFFF");
	194	insist(!validutf8("\xEF\xC0\xBF"));
	195
	196	/* UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
	197	* %xF4 %x80-8F 2( UTF8-tail )
	198	*/
	199	insist(!validutf8("\xF0\x8F\x80\x80"));
	200	U8("\xF0\x90\x80\x80", "0x10000");
	201	U8("\xF0\xBF\xBF\xBF", "0x3FFFF");
	202	insist(!validutf8("\xF0\xC0\x80\x80"));
	203
	204	insist(!validutf8("\xF1\x80\x80\x7F"));
	205	U8("\xF1\x80\x80\x80", "0x40000");
	206	U8("\xF3\xBF\xBF\xBF", "0xFFFFF");
	207	insist(!validutf8("\xF3\xC0\x80\x80"));
	208
	209	insist(!validutf8("\xF4\x80\x80\x7F"));
	210	U8("\xF4\x80\x80\x80", "0x100000");
	211	U8("\xF4\x8F\xBF\xBF", "0x10FFFF");
	212	insist(!validutf8("\xF4\x90\x80\x80"));
	213
	214	/* miscellaneous non-UTF-8 rubbish */
	215	insist(!validutf8("\x80"));
	216	insist(!validutf8("\xBF"));
	217	insist(!validutf8("\xC0"));
	218	insist(!validutf8("\xC0\x7F"));
	219	insist(!validutf8("\xC0\xC0"));
	220	insist(!validutf8("\xE0"));
	221	insist(!validutf8("\xE0\x7F"));
	222	insist(!validutf8("\xE0\xC0"));
	223	insist(!validutf8("\xE0\x80"));
	224	insist(!validutf8("\xE0\x80\x7f"));
	225	insist(!validutf8("\xE0\x80\xC0"));
	226	insist(!validutf8("\xF0"));
	227	insist(!validutf8("\xF0\x7F"));
	228	insist(!validutf8("\xF0\xC0"));
	229	insist(!validutf8("\xF0\x80"));
	230	insist(!validutf8("\xF0\x80\x7f"));
	231	insist(!validutf8("\xF0\x80\xC0"));
	232	insist(!validutf8("\xF0\x80\x80\x7f"));
	233	insist(!validutf8("\xF0\x80\x80\xC0"));
	234	insist(!validutf8("\xF5\x80\x80\x80"));
	235	insist(!validutf8("\xF8"));
	236	}
	237
	238	static void test_mime(void) {
	239	char t, n, *v;
	240
	241	fprintf(stderr, "test_mime\n");
	242
	243	t = n = v = 0;
	244	insist(!mime_content_type("text/plain", &t, &n, &v));
	245	insist(!strcmp(t, "text/plain"));
	246	insist(n == 0);
	247	insist(v == 0);
	248
	249	t = n = v = 0;
	250	insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v));
	251	insist(!strcmp(t, "text/plain"));
	252	insist(n == 0);
	253	insist(v == 0);
	254
	255	t = n = v = 0;
	256	insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v));
	257	insist(!strcmp(t, "text/plain"));
	258	insist(!strcmp(n, "charset"));
	259	insist(!strcmp(v, "utf-8"));
	260
	261	t = n = v = 0;
	262	insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
	263	insist(!strcmp(t, "text/plain"));
	264	insist(!strcmp(n, "charset"));
	265	insist(!strcmp(v, "ISO-8859-1"));
	266
	267	/* XXX mime_parse */
	268	/* XXX mime_multipart */
	269	/* XXX mime_rfc2388_content_disposition */
	270
	271	check_string(mime_qp(""), "");
	272	check_string(mime_qp("foobar"), "foobar");
	273	check_string(mime_qp("foo=20bar"), "foo bar");
	274	check_string(mime_qp("x \r\ny"), "x\r\ny");
	275	check_string(mime_qp("x=\r\ny"), "xy");
	276	check_string(mime_qp("x= \r\ny"), "xy");
	277	check_string(mime_qp("x =\r\ny"), "x y");
	278	check_string(mime_qp("x = \r\ny"), "x y");
	279
	280	/* from RFC2045 */
	281	check_string(mime_qp("Now's the time =\r\n"
	282	"for all folk to come=\r\n"
	283	" to the aid of their country."),
	284	"Now's the time for all folk to come to the aid of their country.");
	285
	286	check_string(mime_base64(""), "");
	287	check_string(mime_base64("BBBB"), "\x04\x10\x41");
	288	check_string(mime_base64("////"), "\xFF\xFF\xFF");
	289	check_string(mime_base64("//BB"), "\xFF\xF0\x41");
	290	check_string(mime_base64("BBBB//BB////"),
	291	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	292	check_string(mime_base64("B B B B / / B B / / / /"),
	293	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	294	check_string(mime_base64("B\r\nBBB.// B-B//~//"),
	295	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	296	check_string(mime_base64("BBBB="),
	297	"\x04\x10\x41");
	298	check_string(mime_base64("BBBBx="), /* not actually valid base64 */
	299	"\x04\x10\x41");
	300	check_string(mime_base64("BBBB BB=="),
	301	"\x04\x10\x41" "\x04");
	302	check_string(mime_base64("BBBB BBB="),
	303	"\x04\x10\x41" "\x04\x10");
	304	}
	305
	306	static void test_hex(void) {
	307	unsigned n;
	308	static const unsigned char h[] = { 0x00, 0xFF, 0x80, 0x7F };
	309	uint8_t *u;
	310	size_t ul;
	311
	312	fprintf(stderr, "test_hex\n");
	313
	314	for(n = 0; n <= UCHAR_MAX; ++n) {
	315	if(!isxdigit(n))
	316	insist(unhexdigitq(n) == -1);
	317	}
	318	insist(unhexdigitq('0') == 0);
	319	insist(unhexdigitq('1') == 1);
	320	insist(unhexdigitq('2') == 2);
	321	insist(unhexdigitq('3') == 3);
	322	insist(unhexdigitq('4') == 4);
	323	insist(unhexdigitq('5') == 5);
	324	insist(unhexdigitq('6') == 6);
	325	insist(unhexdigitq('7') == 7);
	326	insist(unhexdigitq('8') == 8);
	327	insist(unhexdigitq('9') == 9);
	328	insist(unhexdigitq('a') == 10);
	329	insist(unhexdigitq('b') == 11);
	330	insist(unhexdigitq('c') == 12);
	331	insist(unhexdigitq('d') == 13);
	332	insist(unhexdigitq('e') == 14);
	333	insist(unhexdigitq('f') == 15);
	334	insist(unhexdigitq('A') == 10);
	335	insist(unhexdigitq('B') == 11);
	336	insist(unhexdigitq('C') == 12);
	337	insist(unhexdigitq('D') == 13);
	338	insist(unhexdigitq('E') == 14);
	339	insist(unhexdigitq('F') == 15);
	340	check_string(hex(h, sizeof h), "00ff807f");
	341	check_string(hex(0, 0), "");
	342	u = unhex("00ff807f", &ul);
	343	insist(ul == 4);
	344	insist(memcmp(u, h, 4) == 0);
	345	u = unhex("00FF807F", &ul);
	346	insist(ul == 4);
	347	insist(memcmp(u, h, 4) == 0);
	348	u = unhex("", &ul);
	349	insist(ul == 0);
	350	fprintf(stderr, "2 ERROR reports expected {\n");
	351	insist(unhex("F", 0) == 0);
	352	insist(unhex("az", 0) == 0);
	353	fprintf(stderr, "}\n");
	354	}
	355
	356	static void test_casefold(void) {
	357	uint32_t c, l;
	358	const char input, canon_folded, compat_folded, canon_expected, *compat_expected;
	359
	360	fprintf(stderr, "test_casefold\n");
	361
	362	/* This isn't a very exhaustive test. Unlike for normalization, there don't
	363	* seem to be any public test vectors for these algorithms. */
	364
	365	for(c = 1; c < 256; ++c) {
	366	input = utf32_to_utf8(&c, 1, 0);
	367	canon_folded = utf8_casefold_canon(input, strlen(input), 0);
	368	compat_folded = utf8_casefold_compat(input, strlen(input), 0);
	369	switch(c) {
	370	default:
	371	if((c >= 'A' && c <= 'Z')
	372	\|\| (c >= 0xC0 && c <= 0xDE && c != 0xD7))
	373	l = c ^ 0x20;
	374	else
	375	l = c;
	376	break;
	377	case 0xB5: /* MICRO SIGN */
	378	l = 0x3BC; /* GREEK SMALL LETTER MU */
	379	break;
	380	case 0xDF: /* LATIN SMALL LETTER SHARP S */
	381	insist(!strcmp(canon_folded, "ss"));
	382	insist(!strcmp(compat_folded, "ss"));
	383	l = 0;
	384	break;
	385	}
	386	if(l) {
	387	/* Case-folded data is now normalized */
	388	canon_expected = ucs42utf8(utf32_decompose_canon(&l, 1, 0));
	389	if(strcmp(canon_folded, canon_expected)) {
	390	fprintf(stderr, "%s:%d: canon-casefolding %#lx got '%s', expected '%s'\n",
	391	__FILE__, __LINE__, (unsigned long)c,
	392	format(canon_folded), format(canon_expected));
	393	++errors;
	394	}
	395	++tests;
	396	compat_expected = ucs42utf8(utf32_decompose_compat(&l, 1, 0));
	397	if(strcmp(compat_folded, compat_expected)) {
	398	fprintf(stderr, "%s:%d: compat-casefolding %#lx got '%s', expected '%s'\n",
	399	__FILE__, __LINE__, (unsigned long)c,
	400	format(compat_folded), format(compat_expected));
	401	++errors;
	402	}
	403	++tests;
	404	}
	405	}
	406	check_string(casefold(""), "");
	407	}
	408
	409	/** @brief Less-than comparison function for integer heap */
	410	static inline int int_lt(int a, int b) { return a < b; }
	411
	412	/** @struct iheap
	413	* @brief A heap with @c int elements */
	414	HEAP_TYPE(iheap, int, int_lt);
	415	HEAP_DEFINE(iheap, int, int_lt);
	416
	417	/** @brief Tests for @ref heap.h */
	418	static void test_heap(void) {
	419	struct iheap h[1];
	420	int n;
	421	int last = -1;
	422
	423	fprintf(stderr, "test_heap\n");
	424
	425	iheap_init(h);
	426	for(n = 0; n < 1000; ++n)
	427	iheap_insert(h, random() % 100);
	428	for(n = 0; n < 1000; ++n) {
	429	const int latest = iheap_remove(h);
	430	if(last > latest)
	431	fprintf(stderr, "should have %d <= %d\n", last, latest);
	432	insist(last <= latest);
	433	last = latest;
	434	}
	435	putchar('\n');
	436	}
	437
	438	/** @brief Tests for @ref lib/unicode.h */
	439	static void test_unicode(void) {
	440	FILE *fp;
	441	int lineno = 0;
	442	char l, lp;
	443	uint32_t buffer[1024];
	444	uint32_t c[6], NFD_c[6], NFKD_c[6]; / 1-indexed */
	445	int cn, bn;
	446
	447	fprintf(stderr, "test_unicode\n");
	448	if(!(fp = fopen("NormalizationTest.txt", "r"))) {
	449	system("wget http://www.unicode.org/Public/5.0.0/ucd/NormalizationTest.txt");
	450	chmod("NormalizationTest.txt", 0444);
	451	if(!(fp = fopen("NormalizationTest.txt", "r"))) {
	452	perror("NormalizationTest.txt");
	453	++tests; /* don't know how many... */
	454	++errors;
	455	return;
	456	}
	457	}
	458	while(!inputline("NormalizationTest.txt", fp, &l, '\n')) {
	459	++lineno;
	460	if(l == '#' \|\| l == '@')
	461	continue;
	462	bn = 0;
	463	cn = 1;
	464	lp = l;
	465	c[cn++] = &buffer[bn];
	466	while(lp && lp != '#') {
	467	if(*lp == ' ') {
	468	++lp;
	469	continue;
	470	}
	471	if(*lp == ';') {
	472	buffer[bn++] = 0;
	473	if(cn == 6)
	474	break;
	475	c[cn++] = &buffer[bn];
	476	++lp;
	477	continue;
	478	}
	479	buffer[bn++] = strtoul(lp, &lp, 16);
	480	}
	481	buffer[bn] = 0;
	482	assert(cn == 6);
	483	for(cn = 1; cn <= 5; ++cn) {
	484	NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
	485	NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
	486	}
	487	#define unt_check(T, A, B) do { \
	488	++tests; \
	489	if(utf32_cmp(c[A], T##_c[B])) { \
	490	fprintf(stderr, "L%d: c%d != "#T"(c%d)\n", lineno, A, B); \
	491	fprintf(stderr, " c%d: %s\n", \
	492	A, format_utf32(c[A])); \
	493	fprintf(stderr, "%4s(c%d): %s\n", \
	494	#T, B, format_utf32(T##_c[B])); \
	495	++errors; \
	496	} \
	497	} while(0)
	498	unt_check(NFD, 3, 1);
	499	unt_check(NFD, 3, 2);
	500	unt_check(NFD, 3, 3);
	501	unt_check(NFD, 5, 4);
	502	unt_check(NFD, 5, 5);
	503	unt_check(NFKD, 5, 1);
	504	unt_check(NFKD, 5, 2);
	505	unt_check(NFKD, 5, 3);
	506	unt_check(NFKD, 5, 4);
	507	unt_check(NFKD, 5, 5);
	508	for(cn = 1; cn <= 5; ++cn) {
	509	xfree(NFD_c[cn]);
	510	xfree(NFKD_c[cn]);
	511	}
	512	xfree(l);
	513	}
	514	}
	515
	516	int main(void) {
	517	insist('\n' == 0x0A);
	518	insist('\r' == 0x0D);
	519	insist(' ' == 0x20);
	520	insist('0' == 0x30);
	521	insist('9' == 0x39);
	522	insist('A' == 0x41);
	523	insist('Z' == 0x5A);
	524	insist('a' == 0x61);
	525	insist('z' == 0x7A);
	526	/* addr.c */
	527	/* asprintf.c */
	528	/* authhash.c */
	529	/* basen.c */
	530	/* charset.c */
	531	/* client.c */
	532	/* configuration.c */
	533	/* event.c */
	534	/* fprintf.c */
	535	/* heap.c */
	536	test_heap();
	537	/* hex.c */
	538	test_hex();
	539	/* inputline.c */
	540	/* kvp.c */
	541	/* log.c */
	542	/* mem.c */
	543	/* mime.c */
	544	test_mime();
	545	/* mixer.c */
	546	/* plugin.c */
	547	/* printf.c */
	548	/* queue.c */
	549	/* sink.c */
	550	/* snprintf.c */
	551	/* split.c */
	552	/* syscalls.c */
	553	/* table.c */
	554	/* unicode.c */
	555	test_unicode();
	556	/* utf8.c */
	557	test_utf8();
	558	/* vector.c */
	559	/* words.c */
	560	test_casefold();
	561	/* XXX words() */
	562	/* wstat.c */
	563	fprintf(stderr, "%d errors out of %d tests\n", errors, tests);
	564	return !!errors;
	565	}
	566
	567	/*
	568	Local Variables:
	569	c-basic-offset:2
	570	comment-column:40
	571	fill-column:79
	572	indent-tabs-mode:nil
	573	End:
	574	*/