[disorder] / lib / test.c

/*
 * This file is part of DisOrder.
 * Copyright (C) 2005, 2007 Richard Kettlewell
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */
/** @file lib/test.c @brief Library tests */

#include <config.h>
#include "types.h"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <ctype.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>

#include "utf8.h"
#include "mem.h"
#include "log.h"
#include "vector.h"
#include "charset.h"
#include "mime.h"
#include "hex.h"
#include "words.h"
#include "heap.h"
#include "unicode.h"
#include "inputline.h"
#include "wstat.h"

static int tests, errors;
static int fail_first;

static void count_error() {
  ++errors;
  if(fail_first)
    abort();
}

/** @brief Checks that @p expr is nonzero */
#define insist(expr) do {				\
  if(!(expr)) {						\
    count_error();						\
    fprintf(stderr, "%s:%d: error checking %s\n",	\
            __FILE__, __LINE__, #expr);			\
  }							\
  ++tests;						\
} while(0)

static const char *format(const char *s) {
  struct dynstr d;
  int c;
  char buf[10];
  
  dynstr_init(&d);
  while((c = (unsigned char)*s++)) {
    if(c >= ' ' && c <= '~')
      dynstr_append(&d, c);
    else {
      sprintf(buf, "\\x%02X", (unsigned)c);
      dynstr_append_string(&d, buf);
    }
  }
  dynstr_terminate(&d);
  return d.vec;
}

static const char *format_utf32(const uint32_t *s) {
  struct dynstr d;
  uint32_t c;
  char buf[64];
  
  dynstr_init(&d);
  while((c = *s++)) {
    if(c >= 32 && c <= 127)
      dynstr_append(&d, c);
    else {
      sprintf(buf, "\\x%04lX", (unsigned long)c);
      dynstr_append_string(&d, buf);
    }
  }
  dynstr_terminate(&d);
  return d.vec;
}

#define check_string(GOT, WANT) do {				\
  const char *g = GOT;						\
  const char *w = WANT;						\
								\
  if(w == 0) {							\
    fprintf(stderr, "%s:%d: %s returned 0\n",			\
            __FILE__, __LINE__, #GOT);				\
    count_error();							\
  } else if(strcmp(w, g)) {					\
    fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n",	\
	    __FILE__, __LINE__, #GOT, format(g), format(w));	\
    count_error();							\
  }								\
  ++tests;							\
 } while(0)

static uint32_t *ucs4parse(const char *s) {
  struct dynstr_ucs4 d;
  char *e;

  dynstr_ucs4_init(&d);
  while(*s) {
    errno = 0;
    dynstr_ucs4_append(&d, strtoul(s, &e, 0));
    if(errno) fatal(errno, "strtoul (%s)", s);
    s = e;
  }
  dynstr_ucs4_terminate(&d);
  return d.vec;
}

static void test_utf8(void) {
  /* Test validutf8, convert to UCS-4, check the answer is right,
   * convert back to UTF-8, check we got to where we started */
#define U8(CHARS, WORDS) do {			\
  uint32_t *w = ucs4parse(WORDS);		\
  uint32_t *ucs;				\
  char *u8;					\
						\
  insist(validutf8(CHARS));			\
  ucs = utf82ucs4(CHARS);			\
  insist(ucs != 0);				\
  insist(!ucs4cmp(w, ucs));			\
  u8 = ucs42utf8(ucs);				\
  insist(u8 != 0);				\
  insist(!strcmp(u8, CHARS));			\
} while(0)

  fprintf(stderr, "test_utf8\n");

  /* empty string */

  U8("", "");
  
  /* ASCII characters */

  U8(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~",
     "0x20 0x21 0x22 0x23 0x24 0x25 0x26 0x27 0x28 0x29 0x2a 0x2b 0x2c 0x2d "
     "0x2e 0x2f 0x30 0x31 0x32 0x33 0x34 0x35 0x36 0x37 0x38 0x39 0x3a "
     "0x3b 0x3c 0x3d 0x3e 0x3f 0x40 0x41 0x42 0x43 0x44 0x45 0x46 0x47 "
     "0x48 0x49 0x4a 0x4b 0x4c 0x4d 0x4e 0x4f 0x50 0x51 0x52 0x53 0x54 "
     "0x55 0x56 0x57 0x58 0x59 0x5a 0x5b 0x5c 0x5d 0x5e 0x5f 0x60 0x61 "
     "0x62 0x63 0x64 0x65 0x66 0x67 0x68 0x69 0x6a 0x6b 0x6c 0x6d 0x6e "
     "0x6f 0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77 0x78 0x79 0x7a 0x7b "
     "0x7c 0x7d 0x7e");
  U8("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177",
     "0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf 0x10 "
     "0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d "
     "0x1e 0x1f 0x7f");

  /* from RFC3629 */

  /* UTF8-2      = %xC2-DF UTF8-tail */
  insist(!validutf8("\xC0\x80"));
  insist(!validutf8("\xC1\x80"));
  insist(!validutf8("\xC2\x7F"));
  U8("\xC2\x80", "0x80");
  U8("\xDF\xBF", "0x7FF");
  insist(!validutf8("\xDF\xC0"));

  /*  UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
   *                %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
   */
  insist(!validutf8("\xE0\x9F\x80"));
  U8("\xE0\xA0\x80", "0x800");
  U8("\xE0\xBF\xBF", "0xFFF");
  insist(!validutf8("\xE0\xC0\xBF"));

  insist(!validutf8("\xE1\x80\x7F"));
  U8("\xE1\x80\x80", "0x1000");
  U8("\xEC\xBF\xBF", "0xCFFF");
  insist(!validutf8("\xEC\xC0\xBF"));
  
  U8("\xED\x80\x80", "0xD000");
  U8("\xED\x9F\xBF", "0xD7FF");
  insist(!validutf8("\xED\xA0\xBF"));

  insist(!validutf8("\xEE\x7f\x80"));
  U8("\xEE\x80\x80", "0xE000");
  U8("\xEF\xBF\xBF", "0xFFFF");
  insist(!validutf8("\xEF\xC0\xBF"));

  /*  UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
   *                %xF4 %x80-8F 2( UTF8-tail )
   */
  insist(!validutf8("\xF0\x8F\x80\x80"));
  U8("\xF0\x90\x80\x80", "0x10000");
  U8("\xF0\xBF\xBF\xBF", "0x3FFFF");
  insist(!validutf8("\xF0\xC0\x80\x80"));

  insist(!validutf8("\xF1\x80\x80\x7F"));
  U8("\xF1\x80\x80\x80", "0x40000");
  U8("\xF3\xBF\xBF\xBF", "0xFFFFF");
  insist(!validutf8("\xF3\xC0\x80\x80"));

  insist(!validutf8("\xF4\x80\x80\x7F"));
  U8("\xF4\x80\x80\x80", "0x100000");
  U8("\xF4\x8F\xBF\xBF", "0x10FFFF");
  insist(!validutf8("\xF4\x90\x80\x80"));

  /* miscellaneous non-UTF-8 rubbish */
  insist(!validutf8("\x80"));
  insist(!validutf8("\xBF"));
  insist(!validutf8("\xC0"));
  insist(!validutf8("\xC0\x7F"));
  insist(!validutf8("\xC0\xC0"));
  insist(!validutf8("\xE0"));
  insist(!validutf8("\xE0\x7F"));
  insist(!validutf8("\xE0\xC0"));
  insist(!validutf8("\xE0\x80"));
  insist(!validutf8("\xE0\x80\x7f"));
  insist(!validutf8("\xE0\x80\xC0"));
  insist(!validutf8("\xF0"));
  insist(!validutf8("\xF0\x7F"));
  insist(!validutf8("\xF0\xC0"));
  insist(!validutf8("\xF0\x80"));
  insist(!validutf8("\xF0\x80\x7f"));
  insist(!validutf8("\xF0\x80\xC0"));
  insist(!validutf8("\xF0\x80\x80\x7f"));
  insist(!validutf8("\xF0\x80\x80\xC0"));
  insist(!validutf8("\xF5\x80\x80\x80"));
  insist(!validutf8("\xF8"));
}

static void test_mime(void) {
  char *t, *n, *v;

  fprintf(stderr, "test_mime\n");

  t = n = v = 0;
  insist(!mime_content_type("text/plain", &t, &n, &v));
  insist(!strcmp(t, "text/plain"));
  insist(n == 0);
  insist(v == 0);

  t = n = v = 0;
  insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v));
  insist(!strcmp(t, "text/plain"));
  insist(n == 0);
  insist(v == 0);

  t = n = v = 0;
  insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v));
  insist(!strcmp(t, "text/plain"));
  insist(!strcmp(n, "charset"));
  insist(!strcmp(v, "utf-8"));

  t = n = v = 0;
  insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
  insist(!strcmp(t, "text/plain"));
  insist(!strcmp(n, "charset"));
  insist(!strcmp(v, "ISO-8859-1"));

  /* XXX mime_parse */
  /* XXX mime_multipart */
  /* XXX mime_rfc2388_content_disposition */

  check_string(mime_qp(""), "");
  check_string(mime_qp("foobar"), "foobar");
  check_string(mime_qp("foo=20bar"), "foo bar");
  check_string(mime_qp("x \r\ny"), "x\r\ny");
  check_string(mime_qp("x=\r\ny"), "xy");
  check_string(mime_qp("x= \r\ny"), "xy");
  check_string(mime_qp("x =\r\ny"), "x y");
  check_string(mime_qp("x = \r\ny"), "x y");

  /* from RFC2045 */
  check_string(mime_qp("Now's the time =\r\n"
"for all folk to come=\r\n"
" to the aid of their country."),
	       "Now's the time for all folk to come to the aid of their country.");

  check_string(mime_base64(""),  "");
  check_string(mime_base64("BBBB"), "\x04\x10\x41");
  check_string(mime_base64("////"), "\xFF\xFF\xFF");
  check_string(mime_base64("//BB"), "\xFF\xF0\x41");
  check_string(mime_base64("BBBB//BB////"),
	       "\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
  check_string(mime_base64("B B B B  / / B B / / / /"),
	       "\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
  check_string(mime_base64("B\r\nBBB.// B-B//~//"),
	       "\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
  check_string(mime_base64("BBBB="),
	       "\x04\x10\x41");
  check_string(mime_base64("BBBBx="),	/* not actually valid base64 */
	       "\x04\x10\x41");
  check_string(mime_base64("BBBB BB=="),
	       "\x04\x10\x41" "\x04");
  check_string(mime_base64("BBBB BBB="),
	       "\x04\x10\x41" "\x04\x10");
}

static void test_hex(void) {
  unsigned n;
  static const unsigned char h[] = { 0x00, 0xFF, 0x80, 0x7F };
  uint8_t *u;
  size_t ul;

  fprintf(stderr, "test_hex\n");

  for(n = 0; n <= UCHAR_MAX; ++n) {
    if(!isxdigit(n))
      insist(unhexdigitq(n) == -1);
  }
  insist(unhexdigitq('0') == 0);
  insist(unhexdigitq('1') == 1);
  insist(unhexdigitq('2') == 2);
  insist(unhexdigitq('3') == 3);
  insist(unhexdigitq('4') == 4);
  insist(unhexdigitq('5') == 5);
  insist(unhexdigitq('6') == 6);
  insist(unhexdigitq('7') == 7);
  insist(unhexdigitq('8') == 8);
  insist(unhexdigitq('9') == 9);
  insist(unhexdigitq('a') == 10);
  insist(unhexdigitq('b') == 11);
  insist(unhexdigitq('c') == 12);
  insist(unhexdigitq('d') == 13);
  insist(unhexdigitq('e') == 14);
  insist(unhexdigitq('f') == 15);
  insist(unhexdigitq('A') == 10);
  insist(unhexdigitq('B') == 11);
  insist(unhexdigitq('C') == 12);
  insist(unhexdigitq('D') == 13);
  insist(unhexdigitq('E') == 14);
  insist(unhexdigitq('F') == 15);
  check_string(hex(h, sizeof h), "00ff807f");
  check_string(hex(0, 0), "");
  u = unhex("00ff807f", &ul);
  insist(ul == 4);
  insist(memcmp(u, h, 4) == 0);
  u = unhex("00FF807F", &ul);
  insist(ul == 4);
  insist(memcmp(u, h, 4) == 0);
  u = unhex("", &ul);
  insist(ul == 0);
  fprintf(stderr, "2 ERROR reports expected {\n");
  insist(unhex("F", 0) == 0);
  insist(unhex("az", 0) == 0);
  fprintf(stderr, "}\n");
}

static void test_casefold(void) {
  uint32_t c, l;
  const char *input, *canon_folded, *compat_folded, *canon_expected, *compat_expected;

  fprintf(stderr, "test_casefold\n");

  /* This isn't a very exhaustive test.  Unlike for normalization, there don't
   * seem to be any public test vectors for these algorithms. */
  
  for(c = 1; c < 256; ++c) {
    input = utf32_to_utf8(&c, 1, 0);
    canon_folded = utf8_casefold_canon(input, strlen(input), 0);
    compat_folded = utf8_casefold_compat(input, strlen(input), 0);
    switch(c) {
    default:
      if((c >= 'A' && c <= 'Z')
	 || (c >= 0xC0 && c <= 0xDE && c != 0xD7))
	l = c ^ 0x20;
      else
	l = c;
      break;
    case 0xB5:				/* MICRO SIGN */
      l = 0x3BC;			/* GREEK SMALL LETTER MU */
      break;
    case 0xDF:				/* LATIN SMALL LETTER SHARP S */
      insist(!strcmp(canon_folded, "ss"));
      insist(!strcmp(compat_folded, "ss"));
      l = 0;
      break;
    }
    if(l) {
      /* Case-folded data is now normalized */
      canon_expected = ucs42utf8(utf32_decompose_canon(&l, 1, 0));
      if(strcmp(canon_folded, canon_expected)) {
	fprintf(stderr, "%s:%d: canon-casefolding %#lx got '%s', expected '%s'\n",
		__FILE__, __LINE__, (unsigned long)c,
		format(canon_folded), format(canon_expected));
	count_error();
      }
      ++tests;
      compat_expected = ucs42utf8(utf32_decompose_compat(&l, 1, 0));
      if(strcmp(compat_folded, compat_expected)) {
	fprintf(stderr, "%s:%d: compat-casefolding %#lx got '%s', expected '%s'\n",
		__FILE__, __LINE__, (unsigned long)c,
		format(compat_folded), format(compat_expected));
	count_error();
      }
      ++tests;
    }
  }
  check_string(casefold(""), "");
}

/** @brief Less-than comparison function for integer heap */
static inline int int_lt(int a, int b) { return a < b; }

/** @struct iheap
 * @brief A heap with @c int elements */
HEAP_TYPE(iheap, int, int_lt);
HEAP_DEFINE(iheap, int, int_lt);

/** @brief Tests for @ref heap.h */
static void test_heap(void) {
  struct iheap h[1];
  int n;
  int last = -1;

  fprintf(stderr, "test_heap\n");

  iheap_init(h);
  for(n = 0; n < 1000; ++n)
    iheap_insert(h, random() % 100);
  for(n = 0; n < 1000; ++n) {
    const int latest = iheap_remove(h);
    if(last > latest)
      fprintf(stderr, "should have %d <= %d\n", last, latest);
    insist(last <= latest);
    last = latest;
  }
  putchar('\n');
}

/** @brief Open a Unicode test file */
static FILE *open_unicode_test(const char *path) {
  const char *base;
  FILE *fp;
  char buffer[1024];
  int w;

  if((base = strrchr(path, '/')))
    ++base;
  else
    base = path;
  if(!(fp = fopen(base, "r"))) {
    snprintf(buffer, sizeof buffer,
             "wget http://www.unicode.org/Public/5.0.0/ucd/%s", path);
    if((w = system(buffer)))
      fatal(0, "%s: %s", buffer, wstat(w));
    if(chmod(base, 0444) < 0)
      fatal(errno, "chmod %s", base);
    if(!(fp = fopen(base, "r")))
      fatal(errno, "%s", base);
  }
  return fp;
}

/** @brief Run breaking tests for utf32_grapheme_boundary() etc */
static void breaktest(const char *path,
                      int (*breakfn)(const uint32_t *, size_t, size_t)) {
  FILE *fp = open_unicode_test(path);
  int lineno = 0;
  char *l, *lp;
  size_t bn, n;
  char break_allowed[1024];
  uint32_t buffer[1024];

  while(!inputline(path, fp, &l, '\n')) {
    ++lineno;
    if(l[0] == '#') continue;
    bn = 0;
    lp = l;
    while(*lp) {
      if(*lp == ' ' || *lp == '\t') {
        ++lp;
        continue;
      }
      if(*lp == '#')
        break;
      if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0xB7) {
        /* 00F7 DIVISION SIGN */
        break_allowed[bn] = 1;
        lp += 2;
        continue;
      }
      if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0x97) {
        /* 00D7 MULTIPLICATION SIGN */
        break_allowed[bn] = 0;
        lp += 2;
        continue;
      }
      if(isxdigit((unsigned char)*lp)) {
        buffer[bn++] = strtoul(lp, &lp, 16);
        continue;
      }
      fatal(0, "%s:%d: evil line: %s", path, lineno, l);
    }
    for(n = 0; n <= bn; ++n) {
      if(breakfn(buffer, bn, n) != break_allowed[n]) {
        fprintf(stderr,
                "%s:%d: offset %zu: mismatch\n",
                path, lineno, n);
        count_error();
      }
      ++tests;
    }
    xfree(l);
  }
  fclose(fp);
}

/** @brief Tests for @ref lib/unicode.h */
static void test_unicode(void) {
  FILE *fp;
  int lineno = 0;
  char *l, *lp;
  uint32_t buffer[1024];
  uint32_t *c[6], *NFD_c[6],  *NFKD_c[6]; /* 1-indexed */
  int cn, bn;

  fprintf(stderr, "test_unicode\n");
  fp = open_unicode_test("NormalizationTest.txt");
  while(!inputline("NormalizationTest.txt", fp, &l, '\n')) {
    ++lineno;
    if(*l == '#' || *l == '@')
      continue;
    bn = 0;
    cn = 1;
    lp = l;
    c[cn++] = &buffer[bn];
    while(*lp && *lp != '#') {
      if(*lp == ' ') {
	++lp;
	continue;
      }
      if(*lp == ';') {
	buffer[bn++] = 0;
	if(cn == 6)
	  break;
	c[cn++] = &buffer[bn];
	++lp;
	continue;
      }
      buffer[bn++] = strtoul(lp, &lp, 16);
    }
    buffer[bn] = 0;
    assert(cn == 6);
    for(cn = 1; cn <= 5; ++cn) {
      NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
      NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
    }
#define unt_check(T, A, B) do {					\
    ++tests;							\
    if(utf32_cmp(c[A], T##_c[B])) {				\
      fprintf(stderr,                                           \
              "NormalizationTest.txt:%d: c%d != "#T"(c%d)\n",   \
              lineno, A, B);                                    \
      fprintf(stderr, "      c%d: %s\n",                         \
              A, format_utf32(c[A]));				\
      fprintf(stderr, "%4s(c%d): %s\n",				\
              #T, B, format_utf32(T##_c[B]));			\
      count_error();						\
    }								\
  } while(0)
    unt_check(NFD, 3, 1);
    unt_check(NFD, 3, 2);
    unt_check(NFD, 3, 3);
    unt_check(NFD, 5, 4);
    unt_check(NFD, 5, 5);
    unt_check(NFKD, 5, 1);
    unt_check(NFKD, 5, 2);
    unt_check(NFKD, 5, 3);
    unt_check(NFKD, 5, 4);
    unt_check(NFKD, 5, 5);
    for(cn = 1; cn <= 5; ++cn) {
      xfree(NFD_c[cn]);
      xfree(NFKD_c[cn]);
    }
    xfree(l);
  }
  fclose(fp);
  breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
  breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
}

int main(void) {
  fail_first = !!getenv("FAIL_FIRST");
  insist('\n' == 0x0A);
  insist('\r' == 0x0D);
  insist(' ' == 0x20);
  insist('0' == 0x30);
  insist('9' == 0x39);
  insist('A' == 0x41);
  insist('Z' == 0x5A);
  insist('a' == 0x61);
  insist('z' == 0x7A);
  /* addr.c */
  /* asprintf.c */
  /* authhash.c */
  /* basen.c */
  /* charset.c */
  /* client.c */
  /* configuration.c */
  /* event.c */
  /* fprintf.c */
  /* heap.c */
  test_heap();
  /* hex.c */
  test_hex();
  /* inputline.c */
  /* kvp.c */
  /* log.c */
  /* mem.c */
  /* mime.c */
  test_mime();
  /* mixer.c */
  /* plugin.c */
  /* printf.c */
  /* queue.c */
  /* sink.c */
  /* snprintf.c */
  /* split.c */
  /* syscalls.c */
  /* table.c */
  /* unicode.c */
  test_unicode();
  /* utf8.c */
  test_utf8();
  /* vector.c */
  /* words.c */
  test_casefold();
  /* XXX words() */
  /* wstat.c */
  fprintf(stderr,  "%d errors out of %d tests\n", errors, tests);
  return !!errors;
}
  
/*
Local Variables:
c-basic-offset:2
comment-column:40
fill-column:79
indent-tabs-mode:nil
End:
*/
Commit	Line	Data
460b9539	1	/*
460b9539	2	* This file is part of DisOrder.
033fd4e3	3	* Copyright (C) 2005, 2007 Richard Kettlewell
460b9539	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful, but
	11	* WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
	18	* USA
	19	*/
033fd4e3	20	/** @file lib/test.c @brief Library tests */
460b9539	21
	22	#include <config.h>
	23	#include "types.h"
	24
	25	#include <stdio.h>
	26	#include <string.h>
	27	#include <stdlib.h>
	28	#include <errno.h>
	29	#include <ctype.h>
033fd4e3	30	#include <assert.h>
e5a5a138 RK	31	#include <sys/types.h>
e5a5a138 RK	32	#include <sys/stat.h>
460b9539	33
	34	#include "utf8.h"
	35	#include "mem.h"
	36	#include "log.h"
	37	#include "vector.h"
	38	#include "charset.h"
	39	#include "mime.h"
	40	#include "hex.h"
	41	#include "words.h"
033fd4e3	42	#include "heap.h"
e5a5a138 RK	43	#include "unicode.h"
e5a5a138 RK	44	#include "inputline.h"
e2452add	45	#include "wstat.h"
460b9539	46
460b9539	47	static int tests, errors;
bb48024f RK	48	static int fail_first;
	49
	50	static void count_error() {
	51	++errors;
	52	if(fail_first)
	53	abort();
	54	}
460b9539	55
033fd4e3	56	/** @brief Checks that @p expr is nonzero */
460b9539	57	#define insist(expr) do { \
033fd4e3	58	if(!(expr)) { \
bb48024f	59	count_error(); \
460b9539	60	fprintf(stderr, "%s:%d: error checking %s\n", \
	61	__FILE__, __LINE__, #expr); \
	62	} \
	63	++tests; \
	64	} while(0)
	65
	66	static const char format(const char s) {
	67	struct dynstr d;
	68	int c;
	69	char buf[10];
	70
	71	dynstr_init(&d);
	72	while((c = (unsigned char)*s++)) {
	73	if(c >= ' ' && c <= '~')
	74	dynstr_append(&d, c);
	75	else {
	76	sprintf(buf, "\\x%02X", (unsigned)c);
	77	dynstr_append_string(&d, buf);
	78	}
	79	}
	80	dynstr_terminate(&d);
	81	return d.vec;
	82	}
	83
e5a5a138 RK	84	static const char format_utf32(const uint32_t s) {
	85	struct dynstr d;
	86	uint32_t c;
	87	char buf[64];
	88
	89	dynstr_init(&d);
	90	while((c = *s++)) {
	91	if(c >= 32 && c <= 127)
	92	dynstr_append(&d, c);
	93	else {
	94	sprintf(buf, "\\x%04lX", (unsigned long)c);
	95	dynstr_append_string(&d, buf);
	96	}
	97	}
	98	dynstr_terminate(&d);
	99	return d.vec;
	100	}
	101
460b9539	102	#define check_string(GOT, WANT) do { \
	103	const char *g = GOT; \
	104	const char *w = WANT; \
	105	\
	106	if(w == 0) { \
	107	fprintf(stderr, "%s:%d: %s returned 0\n", \
	108	__FILE__, __LINE__, #GOT); \
bb48024f	109	count_error(); \
460b9539	110	} else if(strcmp(w, g)) { \
	111	fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n", \
	112	__FILE__, __LINE__, #GOT, format(g), format(w)); \
bb48024f	113	count_error(); \
460b9539	114	} \
	115	++tests; \
	116	} while(0)
	117
	118	static uint32_t ucs4parse(const char s) {
	119	struct dynstr_ucs4 d;
	120	char *e;
	121
	122	dynstr_ucs4_init(&d);
	123	while(*s) {
	124	errno = 0;
	125	dynstr_ucs4_append(&d, strtoul(s, &e, 0));
	126	if(errno) fatal(errno, "strtoul (%s)", s);
	127	s = e;
	128	}
	129	dynstr_ucs4_terminate(&d);
	130	return d.vec;
	131	}
	132
	133	static void test_utf8(void) {
	134	/* Test validutf8, convert to UCS-4, check the answer is right,
	135	* convert back to UTF-8, check we got to where we started */
	136	#define U8(CHARS, WORDS) do { \
	137	uint32_t *w = ucs4parse(WORDS); \
	138	uint32_t *ucs; \
	139	char *u8; \
	140	\
	141	insist(validutf8(CHARS)); \
	142	ucs = utf82ucs4(CHARS); \
	143	insist(ucs != 0); \
	144	insist(!ucs4cmp(w, ucs)); \
	145	u8 = ucs42utf8(ucs); \
	146	insist(u8 != 0); \
	147	insist(!strcmp(u8, CHARS)); \
	148	} while(0)
	149
033fd4e3 RK	150	fprintf(stderr, "test_utf8\n");
033fd4e3 RK	151
460b9539	152	/* empty string */
	153
	154	U8("", "");
	155
	156	/* ASCII characters */
	157
	158	U8(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{\|}~",
	159	"0x20 0x21 0x22 0x23 0x24 0x25 0x26 0x27 0x28 0x29 0x2a 0x2b 0x2c 0x2d "
	160	"0x2e 0x2f 0x30 0x31 0x32 0x33 0x34 0x35 0x36 0x37 0x38 0x39 0x3a "
	161	"0x3b 0x3c 0x3d 0x3e 0x3f 0x40 0x41 0x42 0x43 0x44 0x45 0x46 0x47 "
	162	"0x48 0x49 0x4a 0x4b 0x4c 0x4d 0x4e 0x4f 0x50 0x51 0x52 0x53 0x54 "
	163	"0x55 0x56 0x57 0x58 0x59 0x5a 0x5b 0x5c 0x5d 0x5e 0x5f 0x60 0x61 "
	164	"0x62 0x63 0x64 0x65 0x66 0x67 0x68 0x69 0x6a 0x6b 0x6c 0x6d 0x6e "
	165	"0x6f 0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77 0x78 0x79 0x7a 0x7b "
	166	"0x7c 0x7d 0x7e");
	167	U8("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177",
	168	"0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf 0x10 "
	169	"0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d "
	170	"0x1e 0x1f 0x7f");
	171
	172	/* from RFC3629 */
	173
	174	/* UTF8-2 = %xC2-DF UTF8-tail */
	175	insist(!validutf8("\xC0\x80"));
	176	insist(!validutf8("\xC1\x80"));
	177	insist(!validutf8("\xC2\x7F"));
	178	U8("\xC2\x80", "0x80");
	179	U8("\xDF\xBF", "0x7FF");
	180	insist(!validutf8("\xDF\xC0"));
	181
	182	/* UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
	183	* %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
	184	*/
	185	insist(!validutf8("\xE0\x9F\x80"));
	186	U8("\xE0\xA0\x80", "0x800");
	187	U8("\xE0\xBF\xBF", "0xFFF");
	188	insist(!validutf8("\xE0\xC0\xBF"));
	189
	190	insist(!validutf8("\xE1\x80\x7F"));
	191	U8("\xE1\x80\x80", "0x1000");
	192	U8("\xEC\xBF\xBF", "0xCFFF");
	193	insist(!validutf8("\xEC\xC0\xBF"));
	194
	195	U8("\xED\x80\x80", "0xD000");
	196	U8("\xED\x9F\xBF", "0xD7FF");
	197	insist(!validutf8("\xED\xA0\xBF"));
	198
	199	insist(!validutf8("\xEE\x7f\x80"));
	200	U8("\xEE\x80\x80", "0xE000");
	201	U8("\xEF\xBF\xBF", "0xFFFF");
	202	insist(!validutf8("\xEF\xC0\xBF"));
	203
	204	/* UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
	205	* %xF4 %x80-8F 2( UTF8-tail )
	206	*/
	207	insist(!validutf8("\xF0\x8F\x80\x80"));
	208	U8("\xF0\x90\x80\x80", "0x10000");
	209	U8("\xF0\xBF\xBF\xBF", "0x3FFFF");
	210	insist(!validutf8("\xF0\xC0\x80\x80"));
	211
	212	insist(!validutf8("\xF1\x80\x80\x7F"));
	213	U8("\xF1\x80\x80\x80", "0x40000");
	214	U8("\xF3\xBF\xBF\xBF", "0xFFFFF");
	215	insist(!validutf8("\xF3\xC0\x80\x80"));
216
217	insist(!validutf8("\xF4\x80\x80\x7F"));
218	U8("\xF4\x80\x80\x80", "0x100000");
219	U8("\xF4\x8F\xBF\xBF", "0x10FFFF");
220	insist(!validutf8("\xF4\x90\x80\x80"));
221
222	/* miscellaneous non-UTF-8 rubbish */
223	insist(!validutf8("\x80"));
224	insist(!validutf8("\xBF"));
225	insist(!validutf8("\xC0"));
226	insist(!validutf8("\xC0\x7F"));
227	insist(!validutf8("\xC0\xC0"));
228	insist(!validutf8("\xE0"));
229	insist(!validutf8("\xE0\x7F"));
230	insist(!validutf8("\xE0\xC0"));
231	insist(!validutf8("\xE0\x80"));
232	insist(!validutf8("\xE0\x80\x7f"));
233	insist(!validutf8("\xE0\x80\xC0"));
234	insist(!validutf8("\xF0"));
235	insist(!validutf8("\xF0\x7F"));
236	insist(!validutf8("\xF0\xC0"));
237	insist(!validutf8("\xF0\x80"));
238	insist(!validutf8("\xF0\x80\x7f"));
239	insist(!validutf8("\xF0\x80\xC0"));
240	insist(!validutf8("\xF0\x80\x80\x7f"));
241	insist(!validutf8("\xF0\x80\x80\xC0"));
242	insist(!validutf8("\xF5\x80\x80\x80"));
243	insist(!validutf8("\xF8"));
244	}
245
246	static void test_mime(void) {
247	char t, n, *v;
248
033fd4e3 RK	249	fprintf(stderr, "test_mime\n");
033fd4e3 RK	250
460b9539	251	t = n = v = 0;
	252	insist(!mime_content_type("text/plain", &t, &n, &v));
	253	insist(!strcmp(t, "text/plain"));
	254	insist(n == 0);
	255	insist(v == 0);
	256
	257	t = n = v = 0;
	258	insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v));
	259	insist(!strcmp(t, "text/plain"));
	260	insist(n == 0);
	261	insist(v == 0);
	262
	263	t = n = v = 0;
	264	insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v));
	265	insist(!strcmp(t, "text/plain"));
	266	insist(!strcmp(n, "charset"));
	267	insist(!strcmp(v, "utf-8"));
	268
	269	t = n = v = 0;
	270	insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
	271	insist(!strcmp(t, "text/plain"));
	272	insist(!strcmp(n, "charset"));
	273	insist(!strcmp(v, "ISO-8859-1"));
	274
	275	/* XXX mime_parse */
	276	/* XXX mime_multipart */
	277	/* XXX mime_rfc2388_content_disposition */
	278
	279	check_string(mime_qp(""), "");
	280	check_string(mime_qp("foobar"), "foobar");
	281	check_string(mime_qp("foo=20bar"), "foo bar");
	282	check_string(mime_qp("x \r\ny"), "x\r\ny");
	283	check_string(mime_qp("x=\r\ny"), "xy");
	284	check_string(mime_qp("x= \r\ny"), "xy");
	285	check_string(mime_qp("x =\r\ny"), "x y");
	286	check_string(mime_qp("x = \r\ny"), "x y");
	287
	288	/* from RFC2045 */
	289	check_string(mime_qp("Now's the time =\r\n"
	290	"for all folk to come=\r\n"
	291	" to the aid of their country."),
	292	"Now's the time for all folk to come to the aid of their country.");
	293
	294	check_string(mime_base64(""), "");
	295	check_string(mime_base64("BBBB"), "\x04\x10\x41");
	296	check_string(mime_base64("////"), "\xFF\xFF\xFF");
	297	check_string(mime_base64("//BB"), "\xFF\xF0\x41");
	298	check_string(mime_base64("BBBB//BB////"),
	299	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	300	check_string(mime_base64("B B B B / / B B / / / /"),
	301	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	302	check_string(mime_base64("B\r\nBBB.// B-B//~//"),
	303	"\x04\x10\x41" "\xFF\xF0\x41" "\xFF\xFF\xFF");
	304	check_string(mime_base64("BBBB="),
	305	"\x04\x10\x41");
	306	check_string(mime_base64("BBBBx="), /* not actually valid base64 */
	307	"\x04\x10\x41");
	308	check_string(mime_base64("BBBB BB=="),
	309	"\x04\x10\x41" "\x04");
	310	check_string(mime_base64("BBBB BBB="),
	311	"\x04\x10\x41" "\x04\x10");
	312	}
	313
	314	static void test_hex(void) {
315	unsigned n;
316	static const unsigned char h[] = { 0x00, 0xFF, 0x80, 0x7F };
317	uint8_t *u;
318	size_t ul;
319
033fd4e3 RK	320	fprintf(stderr, "test_hex\n");
033fd4e3 RK	321
460b9539	322	for(n = 0; n <= UCHAR_MAX; ++n) {
	323	if(!isxdigit(n))
	324	insist(unhexdigitq(n) == -1);
	325	}
	326	insist(unhexdigitq('0') == 0);
	327	insist(unhexdigitq('1') == 1);
	328	insist(unhexdigitq('2') == 2);
	329	insist(unhexdigitq('3') == 3);
	330	insist(unhexdigitq('4') == 4);
	331	insist(unhexdigitq('5') == 5);
	332	insist(unhexdigitq('6') == 6);
	333	insist(unhexdigitq('7') == 7);
	334	insist(unhexdigitq('8') == 8);
	335	insist(unhexdigitq('9') == 9);
	336	insist(unhexdigitq('a') == 10);
	337	insist(unhexdigitq('b') == 11);
	338	insist(unhexdigitq('c') == 12);
	339	insist(unhexdigitq('d') == 13);
	340	insist(unhexdigitq('e') == 14);
	341	insist(unhexdigitq('f') == 15);
	342	insist(unhexdigitq('A') == 10);
	343	insist(unhexdigitq('B') == 11);
	344	insist(unhexdigitq('C') == 12);
	345	insist(unhexdigitq('D') == 13);
	346	insist(unhexdigitq('E') == 14);
	347	insist(unhexdigitq('F') == 15);
	348	check_string(hex(h, sizeof h), "00ff807f");
	349	check_string(hex(0, 0), "");
	350	u = unhex("00ff807f", &ul);
	351	insist(ul == 4);
	352	insist(memcmp(u, h, 4) == 0);
	353	u = unhex("00FF807F", &ul);
	354	insist(ul == 4);
	355	insist(memcmp(u, h, 4) == 0);
	356	u = unhex("", &ul);
	357	insist(ul == 0);
033fd4e3	358	fprintf(stderr, "2 ERROR reports expected {\n");
460b9539	359	insist(unhex("F", 0) == 0);
460b9539	360	insist(unhex("az", 0) == 0);
033fd4e3	361	fprintf(stderr, "}\n");
460b9539	362	}
	363
	364	static void test_casefold(void) {
e5a5a138	365	uint32_t c, l;
56fd389c	366	const char input, canon_folded, compat_folded, canon_expected, *compat_expected;
460b9539	367
033fd4e3	368	fprintf(stderr, "test_casefold\n");
56fd389c RK	369
	370	/* This isn't a very exhaustive test. Unlike for normalization, there don't
	371	* seem to be any public test vectors for these algorithms. */
e5a5a138	372
460b9539	373	for(c = 1; c < 256; ++c) {
e5a5a138	374	input = utf32_to_utf8(&c, 1, 0);
56fd389c RK	375	canon_folded = utf8_casefold_canon(input, strlen(input), 0);
56fd389c RK	376	compat_folded = utf8_casefold_compat(input, strlen(input), 0);
460b9539	377	switch(c) {
	378	default:
	379	if((c >= 'A' && c <= 'Z')
	380	\|\| (c >= 0xC0 && c <= 0xDE && c != 0xD7))
	381	l = c ^ 0x20;
	382	else
	383	l = c;
	384	break;
	385	case 0xB5: /* MICRO SIGN */
e5a5a138	386	l = 0x3BC; /* GREEK SMALL LETTER MU */
460b9539	387	break;
460b9539	388	case 0xDF: /* LATIN SMALL LETTER SHARP S */
56fd389c RK	389	insist(!strcmp(canon_folded, "ss"));
56fd389c RK	390	insist(!strcmp(compat_folded, "ss"));
460b9539	391	l = 0;
	392	break;
	393	}
	394	if(l) {
e5a5a138	395	/* Case-folded data is now normalized */
56fd389c RK	396	canon_expected = ucs42utf8(utf32_decompose_canon(&l, 1, 0));
	397	if(strcmp(canon_folded, canon_expected)) {
	398	fprintf(stderr, "%s:%d: canon-casefolding %#lx got '%s', expected '%s'\n",
	399	__FILE__, __LINE__, (unsigned long)c,
	400	format(canon_folded), format(canon_expected));
bb48024f	401	count_error();
56fd389c RK	402	}
	403	++tests;
	404	compat_expected = ucs42utf8(utf32_decompose_compat(&l, 1, 0));
	405	if(strcmp(compat_folded, compat_expected)) {
	406	fprintf(stderr, "%s:%d: compat-casefolding %#lx got '%s', expected '%s'\n",
460b9539	407	__FILE__, __LINE__, (unsigned long)c,
56fd389c	408	format(compat_folded), format(compat_expected));
bb48024f	409	count_error();
460b9539	410	}
	411	++tests;
	412	}
	413	}
	414	check_string(casefold(""), "");
	415	}
	416
033fd4e3 RK	417	/** @brief Less-than comparison function for integer heap */
	418	static inline int int_lt(int a, int b) { return a < b; }
	419
dab22732 RK	420	/** @struct iheap
dab22732 RK	421	* @brief A heap with @c int elements */
033fd4e3	422	HEAP_TYPE(iheap, int, int_lt);
8e3fe3d8	423	HEAP_DEFINE(iheap, int, int_lt);
033fd4e3 RK	424
	425	/** @brief Tests for @ref heap.h */
	426	static void test_heap(void) {
	427	struct iheap h[1];
	428	int n;
	429	int last = -1;
	430
	431	fprintf(stderr, "test_heap\n");
	432
	433	iheap_init(h);
	434	for(n = 0; n < 1000; ++n)
	435	iheap_insert(h, random() % 100);
	436	for(n = 0; n < 1000; ++n) {
	437	const int latest = iheap_remove(h);
	438	if(last > latest)
	439	fprintf(stderr, "should have %d <= %d\n", last, latest);
	440	insist(last <= latest);
	441	last = latest;
	442	}
	443	putchar('\n');
	444	}
	445
e2452add RK	446	/** @brief Open a Unicode test file */
	447	static FILE open_unicode_test(const char path) {
	448	const char *base;
	449	FILE *fp;
	450	char buffer[1024];
	451	int w;
	452
	453	if((base = strrchr(path, '/')))
	454	++base;
	455	else
	456	base = path;
	457	if(!(fp = fopen(base, "r"))) {
	458	snprintf(buffer, sizeof buffer,
	459	"wget http://www.unicode.org/Public/5.0.0/ucd/%s", path);
	460	if((w = system(buffer)))
	461	fatal(0, "%s: %s", buffer, wstat(w));
	462	if(chmod(base, 0444) < 0)
	463	fatal(errno, "chmod %s", base);
	464	if(!(fp = fopen(base, "r")))
	465	fatal(errno, "%s", base);
	466	}
	467	return fp;
	468	}
	469
1625e11a	470	/** @brief Run breaking tests for utf32_grapheme_boundary() etc */
bb48024f RK	471	static void breaktest(const char *path,
	472	int (breakfn)(const uint32_t , size_t, size_t)) {
	473	FILE *fp = open_unicode_test(path);
	474	int lineno = 0;
	475	char l, lp;
	476	size_t bn, n;
	477	char break_allowed[1024];
	478	uint32_t buffer[1024];
	479
	480	while(!inputline(path, fp, &l, '\n')) {
	481	++lineno;
	482	if(l[0] == '#') continue;
	483	bn = 0;
	484	lp = l;
	485	while(*lp) {
	486	if(lp == ' ' \|\| lp == '\t') {
	487	++lp;
	488	continue;
	489	}
	490	if(*lp == '#')
	491	break;
	492	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0xB7) {
	493	/* 00F7 DIVISION SIGN */
	494	break_allowed[bn] = 1;
	495	lp += 2;
	496	continue;
	497	}
	498	if((unsigned char)*lp == 0xC3 && (unsigned char)lp[1] == 0x97) {
	499	/* 00D7 MULTIPLICATION SIGN */
	500	break_allowed[bn] = 0;
	501	lp += 2;
	502	continue;
	503	}
	504	if(isxdigit((unsigned char)*lp)) {
	505	buffer[bn++] = strtoul(lp, &lp, 16);
	506	continue;
	507	}
	508	fatal(0, "%s:%d: evil line: %s", path, lineno, l);
	509	}
	510	for(n = 0; n <= bn; ++n) {
	511	if(breakfn(buffer, bn, n) != break_allowed[n]) {
	512	fprintf(stderr,
	513	"%s:%d: offset %zu: mismatch\n",
	514	path, lineno, n);
	515	count_error();
	516	}
	517	++tests;
	518	}
	519	xfree(l);
	520	}
	521	fclose(fp);
	522	}
	523
e5a5a138 RK	524	/** @brief Tests for @ref lib/unicode.h */
	525	static void test_unicode(void) {
	526	FILE *fp;
	527	int lineno = 0;
	528	char l, lp;
	529	uint32_t buffer[1024];
	530	uint32_t c[6], NFD_c[6], NFKD_c[6]; / 1-indexed */
	531	int cn, bn;
	532
	533	fprintf(stderr, "test_unicode\n");
e2452add	534	fp = open_unicode_test("NormalizationTest.txt");
e5a5a138 RK	535	while(!inputline("NormalizationTest.txt", fp, &l, '\n')) {
	536	++lineno;
	537	if(l == '#' \|\| l == '@')
	538	continue;
	539	bn = 0;
	540	cn = 1;
	541	lp = l;
	542	c[cn++] = &buffer[bn];
	543	while(lp && lp != '#') {
	544	if(*lp == ' ') {
	545	++lp;
	546	continue;
	547	}
	548	if(*lp == ';') {
	549	buffer[bn++] = 0;
	550	if(cn == 6)
	551	break;
	552	c[cn++] = &buffer[bn];
	553	++lp;
	554	continue;
	555	}
	556	buffer[bn++] = strtoul(lp, &lp, 16);
	557	}
	558	buffer[bn] = 0;
	559	assert(cn == 6);
	560	for(cn = 1; cn <= 5; ++cn) {
	561	NFD_c[cn] = utf32_decompose_canon(c[cn], utf32_len(c[cn]), 0);
	562	NFKD_c[cn] = utf32_decompose_compat(c[cn], utf32_len(c[cn]), 0);
	563	}
	564	#define unt_check(T, A, B) do { \
	565	++tests; \
	566	if(utf32_cmp(c[A], T##_c[B])) { \
e2452add RK	567	fprintf(stderr, \
	568	"NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \
	569	lineno, A, B); \
bcf9ed7f	570	fprintf(stderr, " c%d: %s\n", \
e5a5a138 RK	571	A, format_utf32(c[A])); \
	572	fprintf(stderr, "%4s(c%d): %s\n", \
	573	#T, B, format_utf32(T##_c[B])); \
bcf9ed7f	574	count_error(); \
e5a5a138 RK	575	} \
	576	} while(0)
	577	unt_check(NFD, 3, 1);
	578	unt_check(NFD, 3, 2);
	579	unt_check(NFD, 3, 3);
	580	unt_check(NFD, 5, 4);
	581	unt_check(NFD, 5, 5);
	582	unt_check(NFKD, 5, 1);
	583	unt_check(NFKD, 5, 2);
	584	unt_check(NFKD, 5, 3);
	585	unt_check(NFKD, 5, 4);
	586	unt_check(NFKD, 5, 5);
	587	for(cn = 1; cn <= 5; ++cn) {
	588	xfree(NFD_c[cn]);
	589	xfree(NFKD_c[cn]);
	590	}
	591	xfree(l);
	592	}
e2452add	593	fclose(fp);
1625e11a	594	breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary);
bb48024f	595	breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary);
e5a5a138 RK	596	}
e5a5a138 RK	597
460b9539	598	int main(void) {
bb48024f	599	fail_first = !!getenv("FAIL_FIRST");
460b9539	600	insist('\n' == 0x0A);
	601	insist('\r' == 0x0D);
	602	insist(' ' == 0x20);
	603	insist('0' == 0x30);
	604	insist('9' == 0x39);
	605	insist('A' == 0x41);
	606	insist('Z' == 0x5A);
	607	insist('a' == 0x61);
	608	insist('z' == 0x7A);
	609	/* addr.c */
	610	/* asprintf.c */
	611	/* authhash.c */
	612	/* basen.c */
	613	/* charset.c */
	614	/* client.c */
	615	/* configuration.c */
	616	/* event.c */
	617	/* fprintf.c */
033fd4e3 RK	618	/* heap.c */
033fd4e3 RK	619	test_heap();
460b9539	620	/* hex.c */
	621	test_hex();
	622	/* inputline.c */
	623	/* kvp.c */
	624	/* log.c */
	625	/* mem.c */
	626	/* mime.c */
	627	test_mime();
	628	/* mixer.c */
	629	/* plugin.c */
	630	/* printf.c */
	631	/* queue.c */
	632	/* sink.c */
	633	/* snprintf.c */
	634	/* split.c */
	635	/* syscalls.c */
	636	/* table.c */
e5a5a138 RK	637	/* unicode.c */
e5a5a138 RK	638	test_unicode();
460b9539	639	/* utf8.c */
	640	test_utf8();
	641	/* vector.c */
	642	/* words.c */
	643	test_casefold();
	644	/* XXX words() */
	645	/* wstat.c */
	646	fprintf(stderr, "%d errors out of %d tests\n", errors, tests);
	647	return !!errors;
	648	}
	649
	650	/*
	651	Local Variables:
	652	c-basic-offset:2
	653	comment-column:40
56fd389c RK	654	fill-column:79
56fd389c RK	655	indent-tabs-mode:nil
460b9539	656	End:
460b9539	657	*/