X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/blobdiff_plain/22b9fa74de8e80471a5033ea067d3b360930b91d..5e49fa7f5c838835560a9ee8252174138da28230:/lib/test.c

diff --git a/lib/test.c b/lib/test.c
index ed90183..bef8d79 100644
--- a/lib/test.c
+++ b/lib/test.c
@@ -28,6 +28,12 @@
 #include <errno.h>
 #include <ctype.h>
 #include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <stddef.h>
 
 #include "utf8.h"
 #include "mem.h"
@@ -36,15 +42,35 @@
 #include "charset.h"
 #include "mime.h"
 #include "hex.h"
-#include "words.h"
 #include "heap.h"
+#include "unicode.h"
+#include "inputline.h"
+#include "wstat.h"
+#include "signame.h"
+#include "cache.h"
+#include "filepart.h"
+#include "hash.h"
+#include "selection.h"
+#include "syscalls.h"
+#include "kvp.h"
+#include "sink.h"
+#include "printf.h"
+#include "basen.h"
+#include "split.h"
 
 static int tests, errors;
+static int fail_first;
+
+static void count_error() {
+  ++errors;
+  if(fail_first)
+    abort();
+}
 
 /** @brief Checks that @p expr is nonzero */
 #define insist(expr) do {				\
   if(!(expr)) {						\
-    ++errors;						\
+    count_error();						\
     fprintf(stderr, "%s:%d: error checking %s\n",	\
             __FILE__, __LINE__, #expr);			\
   }							\
@@ -69,22 +95,62 @@ static const char *format(const char *s) {
   return d.vec;
 }
 
-#define check_string(GOT, WANT) do {				\
-  const char *g = GOT;						\
-  const char *w = WANT;						\
-								\
-  if(w == 0) {							\
-    fprintf(stderr, "%s:%d: %s returned 0\n",			\
-            __FILE__, __LINE__, #GOT);				\
-    ++errors;							\
-  } else if(strcmp(w, g)) {					\
-    fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n",	\
-	    __FILE__, __LINE__, #GOT, format(g), format(w));	\
-    ++errors;							\
-  }								\
-  ++tests;							\
+static const char *format_utf32(const uint32_t *s) {
+  struct dynstr d;
+  uint32_t c;
+  char buf[64];
+  
+  dynstr_init(&d);
+  while((c = *s++)) {
+    sprintf(buf, " %04lX", (long)c);
+    dynstr_append_string(&d, buf);
+  }
+  dynstr_terminate(&d);
+  return d.vec;
+}
+
+#define check_string(GOT, WANT) do {                                    \
+  const char *got = GOT;                                                \
+  const char *want = WANT;                                              \
+                                                                        \
+  if(want == 0) {                                                       \
+    fprintf(stderr, "%s:%d: %s returned 0\n",                           \
+            __FILE__, __LINE__, #GOT);                                  \
+    count_error();                                                      \
+  } else if(strcmp(want, got)) {                                        \
+    fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n",         \
+	    __FILE__, __LINE__, #GOT, format(got), format(want));       \
+    count_error();                                                      \
+  }                                                                     \
+  ++tests;                                                              \
  } while(0)
 
+#define check_string_prefix(GOT, WANT) do {                             \
+  const char *got = GOT;                                                \
+  const char *want = WANT;                                              \
+                                                                        \
+  if(want == 0) {                                                       \
+    fprintf(stderr, "%s:%d: %s returned 0\n",                           \
+            __FILE__, __LINE__, #GOT);                                  \
+    count_error();                                                      \
+  } else if(strncmp(want, got, strlen(want))) {                         \
+    fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s...\n",      \
+	    __FILE__, __LINE__, #GOT, format(got), format(want));       \
+    count_error();                                                      \
+  }                                                                     \
+  ++tests;                                                              \
+ } while(0)
+
+#define check_integer(GOT, WANT) do {                           \
+  const intmax_t got = GOT, want = WANT;                        \
+  if(got != want) {                                             \
+    fprintf(stderr, "%s:%d: %s returned: %jd  expected: %jd\n", \
+            __FILE__, __LINE__, #GOT, got, want);               \
+    count_error();                                              \
+  }                                                             \
+  ++tests;                                                      \
+} while(0)
+
 static uint32_t *ucs4parse(const char *s) {
   struct dynstr_ucs4 d;
   char *e;
@@ -109,15 +175,16 @@ static void test_utf8(void) {
   char *u8;					\
 						\
   insist(validutf8(CHARS));			\
-  ucs = utf82ucs4(CHARS);			\
+  ucs = utf8_to_utf32(CHARS, strlen(CHARS), 0); \
   insist(ucs != 0);				\
-  insist(!ucs4cmp(w, ucs));			\
-  u8 = ucs42utf8(ucs);				\
+  insist(!utf32_cmp(w, ucs));			\
+  u8 = utf32_to_utf8(ucs, utf32_len(ucs), 0);   \
   insist(u8 != 0);				\
-  insist(!strcmp(u8, CHARS));			\
+  check_string(u8, CHARS);			\
 } while(0)
 
   fprintf(stderr, "test_utf8\n");
+#define validutf8(S) utf8_valid((S), strlen(S))
 
   /* empty string */
 
@@ -188,6 +255,7 @@ static void test_utf8(void) {
   U8("\xF4\x80\x80\x80", "0x100000");
   U8("\xF4\x8F\xBF\xBF", "0x10FFFF");
   insist(!validutf8("\xF4\x90\x80\x80"));
+  insist(!validutf8("\xF4\x80\xFF\x80"));
 
   /* miscellaneous non-UTF-8 rubbish */
   insist(!validutf8("\x80"));
@@ -213,38 +281,163 @@ static void test_utf8(void) {
   insist(!validutf8("\xF8"));
 }
 
+static int test_multipart_callback(const char *s, void *u) {
+  struct vector *parts = u;
+
+  vector_append(parts, (char *)s);
+  return 0;
+}
+
 static void test_mime(void) {
   char *t, *n, *v;
+  struct vector parts[1];
 
   fprintf(stderr, "test_mime\n");
 
   t = n = v = 0;
   insist(!mime_content_type("text/plain", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
+  check_string(t, "text/plain");
   insist(n == 0);
   insist(v == 0);
 
+  insist(mime_content_type("TEXT ((broken) comment", &t, &n, &v) < 0);
+  insist(mime_content_type("TEXT ((broken) comment\\", &t, &n, &v) < 0);
+  
   t = n = v = 0;
-  insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
+  insist(!mime_content_type("TEXT ((nested)\\ comment) /plain", &t, &n, &v));
+  check_string(t, "text/plain");
   insist(n == 0);
   insist(v == 0);
 
   t = n = v = 0;
-  insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
-  insist(!strcmp(n, "charset"));
-  insist(!strcmp(v, "utf-8"));
+  insist(!mime_content_type(" text/plain ; Charset=\"utf-\\8\"", &t, &n, &v));
+  check_string(t, "text/plain");
+  check_string(n, "charset");
+  check_string(v, "utf-8");
 
   t = n = v = 0;
   insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
-  insist(!strcmp(n, "charset"));
-  insist(!strcmp(v, "ISO-8859-1"));
+  check_string(t, "text/plain");
+  check_string(n, "charset");
+  check_string(v, "ISO-8859-1");
+
+  t = n = v = 0;
+  insist(!mime_rfc2388_content_disposition("form-data; name=\"field1\"", &t, &n, &v));
+  check_string(t, "form-data");
+  check_string(n, "name");
+  check_string(v, "field1");
+
+  insist(!mime_rfc2388_content_disposition("inline", &t, &n, &v));
+  check_string(t, "inline");
+  insist(n == 0);
+  insist(v == 0);
+
+  /* Current versions of the code only understand a single arg to these
+   * headers.  This is a bug at the level they work at but suffices for
+   * DisOrder's current purposes. */
+
+  insist(!mime_rfc2388_content_disposition(
+              "attachment; filename=genome.jpeg;\n"
+              "modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"",
+         &t, &n, &v));
+  check_string(t, "attachment");
+  check_string(n, "filename");
+  check_string(v, "genome.jpeg");
 
+  vector_init(parts);
+  insist(mime_multipart("--outer\r\n"
+                        "Content-Type: text/plain\r\n"
+                        "Content-Disposition: inline\r\n"
+                        "Content-Description: text-part-1\r\n"
+                        "\r\n"
+                        "Some text goes here\r\n"
+                        "\r\n"
+                        "--outer\r\n"
+                        "Content-Type: multipart/mixed; boundary=inner\r\n"
+                        "Content-Disposition: attachment\r\n"
+                        "Content-Description: multipart-2\r\n"
+                        "\r\n"
+                        "--inner\r\n"
+                        "Content-Type: text/plain\r\n"
+                        "Content-Disposition: inline\r\n"
+                        "Content-Description: text-part-2\r\n"
+                        "\r\n"
+                        "Some more text here.\r\n"
+                        "\r\n"
+                        "--inner\r\n"
+                        "Content-Type: image/jpeg\r\n"
+                        "Content-Disposition: attachment\r\n"
+                        "Content-Description: jpeg-1\r\n"
+                        "\r\n"
+                        "<jpeg data>\r\n"
+                        "--inner--\r\n"
+                        "--outer--\r\n",
+                        test_multipart_callback,
+                        "outer",
+                        parts) == 0);
+  check_integer(parts->nvec, 2);
+  check_string(parts->vec[0],
+               "Content-Type: text/plain\r\n"
+               "Content-Disposition: inline\r\n"
+               "Content-Description: text-part-1\r\n"
+               "\r\n"
+               "Some text goes here\r\n");
+  check_string(parts->vec[1],
+               "Content-Type: multipart/mixed; boundary=inner\r\n"
+               "Content-Disposition: attachment\r\n"
+               "Content-Description: multipart-2\r\n"
+               "\r\n"
+               "--inner\r\n"
+               "Content-Type: text/plain\r\n"
+               "Content-Disposition: inline\r\n"
+               "Content-Description: text-part-2\r\n"
+               "\r\n"
+               "Some more text here.\r\n"
+               "\r\n"
+               "--inner\r\n"
+               "Content-Type: image/jpeg\r\n"
+               "Content-Disposition: attachment\r\n"
+               "Content-Description: jpeg-1\r\n"
+               "\r\n"
+               "<jpeg data>\r\n"
+               "--inner--");
+  /* No trailing CRLF is _correct_ - see RFC2046 5.1.1 note regarding CRLF
+   * preceding the boundary delimiter line.  An implication of this is that we
+   * must cope with partial lines at the end of the input when recursively
+   * decomposing a multipart message. */
+  vector_init(parts);
+  insist(mime_multipart("--inner\r\n"
+                        "Content-Type: text/plain\r\n"
+                        "Content-Disposition: inline\r\n"
+                        "Content-Description: text-part-2\r\n"
+                        "\r\n"
+                        "Some more text here.\r\n"
+                        "\r\n"
+                        "--inner\r\n"
+                        "Content-Type: image/jpeg\r\n"
+                        "Content-Disposition: attachment\r\n"
+                        "Content-Description: jpeg-1\r\n"
+                        "\r\n"
+                        "<jpeg data>\r\n"
+                        "--inner--",
+                        test_multipart_callback,
+                        "inner",
+                        parts) == 0);
+  check_integer(parts->nvec, 2);
+  check_string(parts->vec[0],
+               "Content-Type: text/plain\r\n"
+               "Content-Disposition: inline\r\n"
+               "Content-Description: text-part-2\r\n"
+               "\r\n"
+               "Some more text here.\r\n");
+  check_string(parts->vec[1],
+               "Content-Type: image/jpeg\r\n"
+               "Content-Disposition: attachment\r\n"
+               "Content-Description: jpeg-1\r\n"
+               "\r\n"
+               "<jpeg data>");
+ 
   /* XXX mime_parse */
-  /* XXX mime_multipart */
-  /* XXX mime_rfc2388_content_disposition */
 
   check_string(mime_qp(""), "");
   check_string(mime_qp("foobar"), "foobar");
@@ -332,16 +525,18 @@ static void test_hex(void) {
 }
 
 static void test_casefold(void) {
-  uint32_t c, l, u[2];
-  const char *s, *ls;
+  uint32_t c, l;
+  const char *input, *canon_folded, *compat_folded, *canon_expected, *compat_expected;
 
   fprintf(stderr, "test_casefold\n");
 
+  /* This isn't a very exhaustive test.  Unlike for normalization, there don't
+   * seem to be any public test vectors for these algorithms. */
+  
   for(c = 1; c < 256; ++c) {
-    u[0] = c;
-    u[1] = 0;
-    s = ucs42utf8(u);
-    ls = casefold(s);
+    input = utf32_to_utf8(&c, 1, 0);
+    canon_folded = utf8_casefold_canon(input, strlen(input), 0);
+    compat_folded = utf8_casefold_compat(input, strlen(input), 0);
     switch(c) {
     default:
       if((c >= 'A' && c <= 'Z')
@@ -354,24 +549,98 @@ static void test_casefold(void) {
       l = 0x3BC;			/* GREEK SMALL LETTER MU */
       break;
     case 0xDF:				/* LATIN SMALL LETTER SHARP S */
-      insist(!strcmp(ls, "ss"));
+      check_string(canon_folded, "ss");
+      check_string(compat_folded, "ss");
       l = 0;
       break;
     }
     if(l) {
-      u[0] = l;
-      u[1] = 0;
-      s = ucs42utf8(u);
-      if(strcmp(s, ls)) {
-	fprintf(stderr, "%s:%d: casefolding %#lx got '%s', expected '%s'\n",
+      uint32_t *d;
+      /* Case-folded data is now normalized */
+      d = utf32_decompose_canon(&l, 1, 0);
+      canon_expected = utf32_to_utf8(d, utf32_len(d), 0);
+      if(strcmp(canon_folded, canon_expected)) {
+	fprintf(stderr, "%s:%d: canon-casefolding %#lx got '%s', expected '%s'\n",
 		__FILE__, __LINE__, (unsigned long)c,
-		format(ls), format(s));
-	++errors;
+		format(canon_folded), format(canon_expected));
+	count_error();
       }
       ++tests;
+      d = utf32_decompose_compat(&l, 1, 0);
+      compat_expected = utf32_to_utf8(d, utf32_len(d), 0);
+      if(strcmp(compat_folded, compat_expected)) {
+	fprintf(stderr, "%s:%d: compat-casefolding %#lx got '%s', expected '%s'\n",
+		__FILE__, __LINE__, (unsigned long)c,
+		format(compat_folded), format(compat_expected));
+	count_error();
+      }
+      ++tests;
+    }
+  }
+  check_string(utf8_casefold_canon("", 0, 0), "");
+}
+
+struct {
+  const char *in;
+  const char *expect[10];
+} wtest[] = {
+  /* Empty string */
+  { "", { 0 } },
+  /* Only whitespace and punctuation */
+  { "    ", { 0 } },
+  { " '   ", { 0 } },
+  { " !  ", { 0 } },
+  { " \"\"  ", { 0 } },
+  { " @  ", { 0 } },
+  /* Basics */
+  { "wibble", { "wibble", 0 } },
+  { " wibble", { "wibble", 0 } },
+  { " wibble ", { "wibble", 0 } },
+  { "wibble ", { "wibble", 0 } },
+  { "wibble spong", { "wibble", "spong", 0 } },
+  { " wibble  spong", { "wibble", "spong", 0 } },
+  { " wibble  spong   ", { "wibble", "spong", 0 } },
+  { "wibble   spong  ", { "wibble", "spong", 0 } },
+  { "wibble   spong splat foo zot  ", { "wibble", "spong", "splat", "foo", "zot", 0 } },
+  /* Apostrophes */
+  { "wibble 'spong", { "wibble", "spong", 0 } },
+  { " wibble's", { "wibble's", 0 } },
+  { " wibblespong'   ", { "wibblespong", 0 } },
+  { "wibble   sp''ong  ", { "wibble", "sp", "ong", 0 } },
+};
+#define NWTEST (sizeof wtest / sizeof *wtest)
+
+static void test_words(void) {
+  size_t t, nexpect, ngot, i;
+  int right;
+  
+  fprintf(stderr, "test_words\n");
+  for(t = 0; t < NWTEST; ++t) {
+    char **got = utf8_word_split(wtest[t].in, strlen(wtest[t].in), &ngot, 0);
+
+    for(nexpect = 0; wtest[t].expect[nexpect]; ++nexpect)
+      ;
+    if(nexpect == ngot) {
+      for(i = 0; i < ngot; ++i)
+        if(strcmp(wtest[t].expect[i], got[i]))
+          break;
+      right = i == ngot;
+    } else
+      right = 0;
+    if(!right) {
+      fprintf(stderr, "word split %zu failed\n", t);
+      fprintf(stderr, "input: %s\n", wtest[t].in);
+      fprintf(stderr, "    | %-30s | %-30s\n",
+              "expected", "got");
+      for(i = 0; i < nexpect || i < ngot; ++i) {
+        const char *e = i < nexpect ? wtest[t].expect[i] : "<none>";
+        const char *g = i < ngot ? got[i] : "<none>";
+        fprintf(stderr, " %2zu | %-30s | %-30s\n", i, e, g);
+      }
+      count_error();
     }
+    ++tests;
   }
-  check_string(casefold(""), "");
 }
 
 /** @brief Less-than comparison function for integer heap */
@@ -380,6 +649,7 @@ static inline int int_lt(int a, int b) { return a < b; }
 /** @struct iheap
  * @brief A heap with @c int elements */
 HEAP_TYPE(iheap, int, int_lt);
+HEAP_DEFINE(iheap, int, int_lt);
 
 /** @brief Tests for @ref heap.h */
 static void test_heap(void) {
@@ -402,7 +672,563 @@ static void test_heap(void) {
   putchar('\n');
 }
 
+/** @brief Open a Unicode test file */
+static FILE *open_unicode_test(const char *path) {
+  const char *base;
+  FILE *fp;
+  char buffer[1024];
+  int w;
+
+  if((base = strrchr(path, '/')))
+    ++base;
+  else
+    base = path;
+  if(!(fp = fopen(base, "r"))) {
+    snprintf(buffer, sizeof buffer,
+             "wget http://www.unicode.org/Public/5.0.0/ucd/%s", path);
+    if((w = system(buffer)))
+      fatal(0, "%s: %s", buffer, wstat(w));
+    if(chmod(base, 0444) < 0)
+      fatal(errno, "chmod %s", base);
+    if(!(fp = fopen(base, "r")))
+      fatal(errno, "%s", base);
+  }
+  return fp;
+}
+
+/** @brief Run breaking tests for utf32_grapheme_boundary() etc */
+static void breaktest(const char *path,
+                      int (*breakfn)(const uint32_t *, size_t, size_t)) {
+  FILE *fp = open_unicode_test(path);
+  int lineno = 0;
+  char *l, *lp;
+  size_t bn, n;
+  char break_allowed[1024];
+  uint32_t buffer[1024];
+
+  while(!inputline(path, fp, &l, '\n')) {
+    ++lineno;
+    if(l[0] == '#') continue;
+    bn = 0;
+    lp = l;
+    while(*lp) {
+      if(*lp == ' ' || *lp == '\t') {
+        ++lp;
+        continue;
+      }
+      if(*lp == '#')
+        break;
+      if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0xB7) {
+        /* 00F7 DIVISION SIGN */
+        break_allowed[bn] = 1;
+        lp += 2;
+        continue;
+      }
+      if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0x97) {
+        /* 00D7 MULTIPLICATION SIGN */
+        break_allowed[bn] = 0;
+        lp += 2;
+        continue;
+      }
+      if(isxdigit((unsigned char)*lp)) {
+        buffer[bn++] = strtoul(lp, &lp, 16);
+        continue;
+      }
+      fatal(0, "%s:%d: evil line: %s", path, lineno, l);
+    }
+    for(n = 0; n <= bn; ++n) {
+      if(breakfn(buffer, bn, n) != break_allowed[n]) {
+        fprintf(stderr,
+                "%s:%d: offset %zu: mismatch\n"
+                "%s\n"
+                "\n",
+                path, lineno, n, l);
+        count_error();
+      }
+      ++tests;
+    }
+    xfree(l);
+  }
+  fclose(fp);
+}
+
+/** @brief Tests for @ref lib/unicode.h */
+static void test_unicode(void) {
+  FILE *fp;
+  int lineno = 0;
+  char *l, *lp;
+  uint32_t buffer[1024];
+  uint32_t *c[6], *NFD_c[6], *NFKD_c[6], *NFC_c[6], *NFKC_c[6]; /* 1-indexed */
+  int cn, bn;
+
+  fprintf(stderr, "test_unicode\n");
+  fp = open_unicode_test("NormalizationTest.txt");
+  while(!inputline("NormalizationTest.txt", fp, &l, '\n')) {
+    ++lineno;
+    if(*l == '#' || *l == '@')
+      continue;
+    bn = 0;
+    cn = 1;
+    lp = l;
+    c[cn++] = &buffer[bn];
+    while(*lp && *lp != '#') {
+      if(*lp == ' ') {
+	++lp;
+	continue;
+      }
+      if(*lp == ';') {
+	buffer[bn++] = 0;
+	if(cn == 6)
+	  break;
+	c[cn++] = &buffer[bn];
+	++lp;
+	continue;
+      }
+      buffer[bn++] = strtoul(lp, &lp, 16);
+    }
+    buffer[bn] = 0;
+    assert(cn == 6);
+    for(cn = 1; cn <= 5; ++cn) {
+      NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
+      NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
+      NFC_c[cn] = utf32_compose_canon(c[cn], utf32_len(c[cn]), 0);
+      NFKC_c[cn] = utf32_compose_compat(c[cn], utf32_len(c[cn]), 0);
+    }
+#define unt_check(T, A, B) do {					\
+    ++tests;							\
+    if(utf32_cmp(c[A], T##_c[B])) {				\
+      fprintf(stderr,                                           \
+              "NormalizationTest.txt:%d: c%d != "#T"(c%d)\n",   \
+              lineno, A, B);                                    \
+      fprintf(stderr, "      c%d:%s\n",                         \
+              A, format_utf32(c[A]));				\
+      fprintf(stderr, "      c%d:%s\n",                         \
+              B, format_utf32(c[B]));				\
+      fprintf(stderr, "%4s(c%d):%s\n",				\
+              #T, B, format_utf32(T##_c[B]));			\
+      count_error();						\
+    }								\
+  } while(0)
+    unt_check(NFD, 3, 1);
+    unt_check(NFD, 3, 2);
+    unt_check(NFD, 3, 3);
+    unt_check(NFD, 5, 4);
+    unt_check(NFD, 5, 5);
+    unt_check(NFKD, 5, 1);
+    unt_check(NFKD, 5, 2);
+    unt_check(NFKD, 5, 3);
+    unt_check(NFKD, 5, 4);
+    unt_check(NFKD, 5, 5);
+    unt_check(NFC, 2, 1);
+    unt_check(NFC, 2, 2);
+    unt_check(NFC, 2, 3);
+    unt_check(NFC, 4, 4);
+    unt_check(NFC, 4, 5);
+    unt_check(NFKC, 4, 1);
+    unt_check(NFKC, 4, 2);
+    unt_check(NFKC, 4, 3);
+    unt_check(NFKC, 4, 4);
+    unt_check(NFKC, 4, 5);
+    for(cn = 1; cn <= 5; ++cn) {
+      xfree(NFD_c[cn]);
+      xfree(NFKD_c[cn]);
+    }
+    xfree(l);
+  }
+  fclose(fp);
+  breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
+  breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
+  insist(utf32_combining_class(0x40000) == 0);
+  insist(utf32_combining_class(0xE0000) == 0);
+}
+
+static void test_signame(void) {
+  fprintf(stderr, "test_signame\n");
+  insist(find_signal("SIGTERM") == SIGTERM);
+  insist(find_signal("SIGHUP") == SIGHUP);
+  insist(find_signal("SIGINT") == SIGINT);
+  insist(find_signal("SIGQUIT") == SIGQUIT);
+  insist(find_signal("SIGKILL") == SIGKILL);
+  insist(find_signal("SIGYOURMUM") == -1);
+}
+
+static void test_cache(void) {
+  const struct cache_type t1 = { 1 }, t2 = { 10 };
+  const char v11[] = "spong", v12[] = "wibble", v2[] = "blat";
+  fprintf(stderr, "test_cache\n");
+  cache_put(&t1, "1_1", v11);
+  cache_put(&t1, "1_2", v12);
+  cache_put(&t2, "2", v2);
+  insist(cache_count() == 3);
+  insist(cache_get(&t2, "2") == v2);
+  insist(cache_get(&t1, "1_1") == v11);
+  insist(cache_get(&t1, "1_2") == v12);
+  insist(cache_get(&t1, "2") == 0);
+  insist(cache_get(&t2, "1_1") == 0);
+  insist(cache_get(&t2, "1_2") == 0);
+  insist(cache_get(&t1, "2") == 0);
+  insist(cache_get(&t2, "1_1") == 0);
+  insist(cache_get(&t2, "1_2") == 0);
+  sleep(2);
+  cache_expire();
+  insist(cache_count() == 1);
+  insist(cache_get(&t1, "1_1") == 0);
+  insist(cache_get(&t1, "1_2") == 0);
+  insist(cache_get(&t2, "2") == v2);
+  cache_clean(0);
+  insist(cache_count() == 0);
+  insist(cache_get(&t2, "2") == 0); 
+}
+
+static void test_filepart(void) {
+  fprintf(stderr, "test_filepart\n");
+  check_string(d_dirname("/"), "/");
+  check_string(d_dirname("/spong"), "/");
+  check_string(d_dirname("/foo/bar"), "/foo");
+  check_string(d_dirname("./bar"), ".");
+  check_string(d_dirname("."), ".");
+  check_string(d_dirname(".."), ".");
+  check_string(d_dirname("../blat"), "..");
+  check_string(d_dirname("wibble"), ".");
+  check_string(extension("foo.c"), ".c");
+  check_string(extension(".c"), ".c");
+  check_string(extension("."), ".");
+  check_string(extension("foo"), "");
+  check_string(extension("./foo"), "");
+  check_string(extension("./foo.c"), ".c");
+}
+
+static void test_selection(void) {
+  hash *h;
+  fprintf(stderr, "test_selection\n");
+  insist((h = selection_new()) != 0);
+  selection_set(h, "one", 1);
+  selection_set(h, "two", 1);
+  selection_set(h, "three", 0);
+  selection_set(h, "four", 1);
+  insist(selection_selected(h, "one") == 1);
+  insist(selection_selected(h, "two") == 1);
+  insist(selection_selected(h, "three") == 0);
+  insist(selection_selected(h, "four") == 1);
+  insist(selection_selected(h, "five") == 0);
+  insist(hash_count(h) == 3);
+  selection_flip(h, "one"); 
+  selection_flip(h, "three"); 
+  insist(selection_selected(h, "one") == 0);
+  insist(selection_selected(h, "three") == 1);
+  insist(hash_count(h) == 3);
+  selection_live(h, "one");
+  selection_live(h, "two");
+  selection_live(h, "three");
+  selection_cleanup(h);
+  insist(selection_selected(h, "one") == 0);
+  insist(selection_selected(h, "two") == 1);
+  insist(selection_selected(h, "three") == 1);
+  insist(selection_selected(h, "four") == 0);
+  insist(selection_selected(h, "five") == 0);
+  insist(hash_count(h) == 2);
+  selection_empty(h);
+  insist(selection_selected(h, "one") == 0);
+  insist(selection_selected(h, "two") == 0);
+  insist(selection_selected(h, "three") == 0);
+  insist(selection_selected(h, "four") == 0);
+  insist(selection_selected(h, "five") == 0);
+  insist(hash_count(h) == 0);
+}
+
+static void test_wstat(void) {
+  pid_t pid;
+  int w;
+  
+  fprintf(stderr, "test_wstat\n");
+  if(!(pid = xfork())) {
+    _exit(1);
+  }
+  while(waitpid(pid, &w, 0) < 0 && errno == EINTR)
+    ;
+  check_string(wstat(w), "exited with status 1");
+  if(!(pid = xfork())) {
+    kill(getpid(), SIGTERM);
+    _exit(-1);
+  }
+  while(waitpid(pid, &w, 0) < 0 && errno == EINTR)
+    ;
+  check_string_prefix(wstat(w), "terminated by signal 15");
+}
+
+static void test_kvp(void) {
+  struct kvp *k;
+  size_t n;
+  
+  fprintf(stderr, "test_kvp\n");
+  /* decoding */
+#define KVP_URLDECODE(S) kvp_urldecode((S), strlen(S))
+  insist(KVP_URLDECODE("=%zz") == 0);
+  insist(KVP_URLDECODE("=%0") == 0);
+  insist(KVP_URLDECODE("=%0z") == 0);
+  insist(KVP_URLDECODE("=%%") == 0);
+  insist(KVP_URLDECODE("==%") == 0);
+  insist(KVP_URLDECODE("wibble") == 0);
+  insist(KVP_URLDECODE("") == 0);
+  insist(KVP_URLDECODE("wibble&") == 0);
+  insist((k = KVP_URLDECODE("one=bl%61t+foo")) != 0);
+  check_string(kvp_get(k, "one"), "blat foo");
+  insist(kvp_get(k, "ONE") == 0);
+  insist(k->next == 0);
+  insist((k = KVP_URLDECODE("wibble=splat&bar=spong")) != 0);
+  check_string(kvp_get(k, "wibble"), "splat");
+  check_string(kvp_get(k, "bar"), "spong");
+  insist(kvp_get(k, "ONE") == 0);
+  insist(k->next->next == 0);
+  /* encoding */
+  insist(kvp_set(&k, "bar", "spong") == 0);
+  insist(kvp_set(&k, "bar", "foo") == 1);
+  insist(kvp_set(&k, "zog", "%") == 1);
+  insist(kvp_set(&k, "wibble", 0) == 1);
+  insist(kvp_set(&k, "wibble", 0) == 0);
+  check_string(kvp_urlencode(k, 0),
+               "bar=foo&zog=%25");
+  check_string(kvp_urlencode(k, &n),
+               "bar=foo&zog=%25");
+  insist(n == strlen("bar=foo&zog=%25"));
+  check_string(urlencodestring("abc% +\n"),
+               "abc%25%20%2b%0a");
+}
+
+static void test_sink(void) {
+  struct sink *s;
+  struct dynstr d[1];
+  FILE *fp;
+  char *l;
+  
+  fprintf(stderr, "test_sink\n");
+
+  fp = tmpfile();
+  assert(fp != 0);
+  s = sink_stdio("tmpfile", fp);
+  insist(sink_printf(s, "test: %d\n", 999) == 10);
+  insist(sink_printf(s, "wibble: %s\n", "foobar") == 15);
+  rewind(fp);
+  insist(inputline("tmpfile", fp, &l, '\n') == 0);
+  check_string(l, "test: 999");
+  insist(inputline("tmpfile", fp, &l, '\n') == 0);
+  check_string(l, "wibble: foobar");
+  insist(inputline("tmpfile", fp, &l, '\n') == -1);
+  
+  dynstr_init(d);
+  s = sink_dynstr(d);
+  insist(sink_printf(s, "test: %d\n", 999) == 10);
+  insist(sink_printf(s, "wibble: %s\n", "foobar") == 15);
+  dynstr_terminate(d);
+  check_string(d->vec, "test: 999\nwibble: foobar\n");
+}
+
+static const char *do_printf(const char *fmt, ...) {
+  va_list ap;
+  char *s;
+  int rc;
+
+  va_start(ap, fmt);
+  rc = byte_vasprintf(&s, fmt, ap);
+  va_end(ap);
+  if(rc < 0)
+    return 0;
+  return s;
+}
+
+static void test_printf(void) {
+  char c;
+  short s;
+  int i;
+  long l;
+  long long ll;
+  intmax_t m;
+  ssize_t ssz;
+  ptrdiff_t p;
+  
+  fprintf(stderr, "test_printf\n");
+  check_string(do_printf("%d", 999), "999");
+  check_string(do_printf("%d", -999), "-999");
+  check_string(do_printf("%i", 999), "999");
+  check_string(do_printf("%i", -999), "-999");
+  check_string(do_printf("%u", 999), "999");
+  check_string(do_printf("%2u", 999), "999");
+  check_string(do_printf("%10u", 999), "       999");
+  check_string(do_printf("%-10u", 999), "999       ");
+  check_string(do_printf("%010u", 999), "0000000999");
+  check_string(do_printf("%-10d", -999), "-999      ");
+  check_string(do_printf("%-010d", -999), "-999      "); /* "-" beats "0" */
+  check_string(do_printf("%66u", 999), "                                                               999");
+  check_string(do_printf("%o", 999), "1747");
+  check_string(do_printf("%#o", 999), "01747");
+  check_string(do_printf("%#o", 0), "0");
+  check_string(do_printf("%x", 999), "3e7");
+  check_string(do_printf("%#x", 999), "0x3e7");
+  check_string(do_printf("%#X", 999), "0X3E7");
+  check_string(do_printf("%#x", 0), "0");
+  check_string(do_printf("%hd", (short)999), "999");
+  check_string(do_printf("%hhd", (short)99), "99");
+  check_string(do_printf("%ld", 100000L), "100000");
+  check_string(do_printf("%lld", 10000000000LL), "10000000000");
+  check_string(do_printf("%qd", 10000000000LL), "10000000000");
+  check_string(do_printf("%jd", (intmax_t)10000000000LL), "10000000000");
+  check_string(do_printf("%zd", (ssize_t)2000000000), "2000000000");
+  check_string(do_printf("%td", (ptrdiff_t)2000000000), "2000000000");
+  check_string(do_printf("%hu", (short)999), "999");
+  check_string(do_printf("%hhu", (short)99), "99");
+  check_string(do_printf("%lu", 100000L), "100000");
+  check_string(do_printf("%llu", 10000000000LL), "10000000000");
+  check_string(do_printf("%ju", (uintmax_t)10000000000LL), "10000000000");
+  check_string(do_printf("%zu", (size_t)2000000000), "2000000000");
+  check_string(do_printf("%tu", (ptrdiff_t)2000000000), "2000000000");
+  check_string(do_printf("%p", (void *)0x100), "0x100");
+  check_string(do_printf("%s", "wibble"), "wibble");
+  check_string(do_printf("%s-%s", "wibble", "wobble"), "wibble-wobble");
+  check_string(do_printf("%10s", "wibble"), "    wibble");
+  check_string(do_printf("%010s", "wibble"), "    wibble"); /* 0 ignored for %s */
+  check_string(do_printf("%-10s", "wibble"), "wibble    ");
+  check_string(do_printf("%2s", "wibble"), "wibble");
+  check_string(do_printf("%.2s", "wibble"), "wi");
+  check_string(do_printf("%.2s", "w"), "w");
+  check_string(do_printf("%4.2s", "wibble"), "  wi");
+  check_string(do_printf("%c", 'a'), "a");
+  check_string(do_printf("%4c", 'a'), "   a");
+  check_string(do_printf("%-4c", 'a'), "a   ");
+  check_string(do_printf("%*c", 0, 'a'), "a");
+  check_string(do_printf("x%hhny", &c), "xy");
+  insist(c == 1);
+  check_string(do_printf("xx%hnyy", &s), "xxyy");
+  insist(s == 2);
+  check_string(do_printf("xxx%nyyy", &i), "xxxyyy");
+  insist(i == 3);
+  check_string(do_printf("xxxx%lnyyyy", &l), "xxxxyyyy");
+  insist(l == 4);
+  check_string(do_printf("xxxxx%llnyyyyy", &ll), "xxxxxyyyyy");
+  insist(ll == 5);
+  check_string(do_printf("xxxxxx%jnyyyyyy", &m), "xxxxxxyyyyyy");
+  insist(m == 6);
+  check_string(do_printf("xxxxxxx%znyyyyyyy", &ssz), "xxxxxxxyyyyyyy");
+  insist(ssz == 7);
+  check_string(do_printf("xxxxxxxx%tnyyyyyyyy", &p), "xxxxxxxxyyyyyyyy");
+  insist(p == 8);
+  check_string(do_printf("%*d", 5, 99), "   99");
+  check_string(do_printf("%*d", -5, 99), "99   ");
+  check_string(do_printf("%.*d", 5, 99), "00099");
+  check_string(do_printf("%.*d", -5, 99), "99");
+  check_string(do_printf("%.0d", 0), "");
+  check_string(do_printf("%.d", 0), "");
+  check_string(do_printf("%.d", 0), "");
+  check_string(do_printf("%%"), "%");
+  check_string(do_printf("wibble"), "wibble");
+  insist(do_printf("%") == 0);
+  insist(do_printf("%=") == 0);
+}
+
+static void test_basen(void) {
+  unsigned long v[64];
+  char buffer[1024];
+
+  fprintf(stderr, "test_basen\n");
+  v[0] = 999;
+  insist(basen(v, 1, buffer, sizeof buffer, 10) == 0);
+  check_string(buffer, "999");
+
+  v[0] = 1+2*7+3*7*7+4*7*7*7;
+  insist(basen(v, 1, buffer, sizeof buffer, 7) == 0);
+  check_string(buffer, "4321");
+
+  v[0] = 0x00010203;
+  v[1] = 0x04050607;
+  v[2] = 0x08090A0B;
+  v[3] = 0x0C0D0E0F;
+  insist(basen(v, 4, buffer, sizeof buffer, 256) == 0);
+  check_string(buffer, "123456789abcdef");
+
+  v[0] = 0x00010203;
+  v[1] = 0x04050607;
+  v[2] = 0x08090A0B;
+  v[3] = 0x0C0D0E0F;
+  insist(basen(v, 4, buffer, sizeof buffer, 16) == 0);
+  check_string(buffer, "102030405060708090a0b0c0d0e0f");
+
+  v[0] = 0x00010203;
+  v[1] = 0x04050607;
+  v[2] = 0x08090A0B;
+  v[3] = 0x0C0D0E0F;
+  insist(basen(v, 4, buffer, 10, 16) == -1);
+}
+
+static void test_split(void) {
+  char **v;
+  int nv;
+
+  fprintf(stderr, "test_split\n");
+  insist(split("\"misquoted", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0);
+  insist(split("\'misquoted", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0);
+  insist(split("\'misquoted\\", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0);
+  insist(split("\'misquoted\\\"", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0);
+  insist(split("\'mis\\escaped\'", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0);
+
+  insist((v = split("", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0)));
+  check_integer(nv, 0);
+  insist(*v == 0);
+
+  insist((v = split("wibble", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0)));
+  check_integer(nv, 1);
+  check_string(v[0], "wibble");
+  insist(v[1] == 0);
+
+  insist((v = split("   wibble \t\r\n wobble   ", &nv,
+                    SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0)));
+  check_integer(nv, 2);
+  check_string(v[0], "wibble");
+  check_string(v[1], "wobble");
+  insist(v[2] == 0);
+
+  insist((v = split("wibble wobble #splat", &nv,
+                    SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0)));
+  check_integer(nv, 2);
+  check_string(v[0], "wibble");
+  check_string(v[1], "wobble");
+  insist(v[2] == 0);
+
+  insist((v = split("\"wibble wobble\" #splat", &nv,
+                    SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0)));
+  check_integer(nv, 1);
+  check_string(v[0], "wibble wobble");
+  insist(v[1] == 0);
+
+  insist((v = split("\"wibble \\\"\\nwobble\"", &nv,
+                    SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0)));
+  check_integer(nv, 1);
+  check_string(v[0], "wibble \"\nwobble");
+  insist(v[1] == 0);
+
+  insist((v = split("\"wibble wobble\" #splat", &nv,
+                    SPLIT_QUOTES, 0, 0)));
+  check_integer(nv, 2);
+  check_string(v[0], "wibble wobble");
+  check_string(v[1], "#splat");
+  insist(v[2] == 0);
+
+  insist((v = split("\"wibble wobble\" #splat", &nv,
+                    SPLIT_COMMENTS, 0, 0)));
+  check_integer(nv, 2);
+  check_string(v[0], "\"wibble");
+  check_string(v[1], "wobble\"");
+  insist(v[2] == 0);
+
+  check_string(quoteutf8("wibble"), "wibble");
+  check_string(quoteutf8("  wibble  "), "\"  wibble  \"");
+  check_string(quoteutf8("wibble wobble"), "\"wibble wobble\"");
+  check_string(quoteutf8("wibble\"wobble"), "\"wibble\\\"wobble\"");
+  check_string(quoteutf8("wibble\nwobble"), "\"wibble\\nwobble\"");
+  check_string(quoteutf8("wibble\\wobble"), "\"wibble\\\\wobble\"");
+  check_string(quoteutf8("wibble'wobble"), "\"wibble'wobble\"");
+}
+
 int main(void) {
+  fail_first = !!getenv("FAIL_FIRST");
   insist('\n' == 0x0A);
   insist('\r' == 0x0D);
   insist(' ' == 0x20);
@@ -416,10 +1242,13 @@ int main(void) {
   /* asprintf.c */
   /* authhash.c */
   /* basen.c */
+  test_basen();
   /* charset.c */
   /* client.c */
   /* configuration.c */
   /* event.c */
+  /* filepart.c */
+  test_filepart();
   /* fprintf.c */
   /* heap.c */
   test_heap();
@@ -427,6 +1256,7 @@ int main(void) {
   test_hex();
   /* inputline.c */
   /* kvp.c */
+  test_kvp();
   /* log.c */
   /* mem.c */
   /* mime.c */
@@ -434,19 +1264,31 @@ int main(void) {
   /* mixer.c */
   /* plugin.c */
   /* printf.c */
+  test_printf();
   /* queue.c */
   /* sink.c */
+  test_sink();
   /* snprintf.c */
   /* split.c */
+  test_split();
   /* syscalls.c */
   /* table.c */
+  /* unicode.c */
+  test_unicode();
   /* utf8.c */
   test_utf8();
   /* vector.c */
   /* words.c */
   test_casefold();
-  /* XXX words() */
+  test_words();
   /* wstat.c */
+  test_wstat();
+  /* signame.c */
+  test_signame();
+  /* cache.c */
+  test_cache();
+  /* selection.c */
+  test_selection();
   fprintf(stderr,  "%d errors out of %d tests\n", errors, tests);
   return !!errors;
 }
@@ -455,6 +1297,7 @@ int main(void) {
 Local Variables:
 c-basic-offset:2
 comment-column:40
+fill-column:79
+indent-tabs-mode:nil
 End:
 */
-