From: Richard Kettlewell Date: Tue, 18 Dec 2007 10:40:52 +0000 (+0000) Subject: merge extra MIME parsing X-Git-Tag: 3.0~205 X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/commitdiff_plain/71b70599a2cd81c13cc4326499a5d0c45358cd7d?hp=-c merge extra MIME parsing --- 71b70599a2cd81c13cc4326499a5d0c45358cd7d diff --combined lib/mime.c index 03aa94b,c8ffe31..5562c7e --- a/lib/mime.c +++ b/lib/mime.c @@@ -1,6 -1,6 +1,6 @@@ /* * This file is part of DisOrder - * Copyright (C) 2005 Richard Kettlewell + * Copyright (C) 2005, 2007 Richard Kettlewell * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@@ -17,7 -17,9 +17,9 @@@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ - + /** @file lib/mime.c + * @brief Support for MIME and allied protocols + */ #include #include "types.h" @@@ -25,13 -27,13 +27,15 @@@ #include #include +#include + #include "mem.h" #include "mime.h" #include "vector.h" #include "hex.h" + #include "log.h" + /** @brief Match whitespace characters */ static int whitespace(int c) { switch(c) { case ' ': @@@ -44,6 -46,7 +48,7 @@@ } } + /** @brief Match RFC2045 tspecial characters */ static int tspecial(int c) { switch(c) { case '(': @@@ -67,7 -70,43 +72,43 @@@ } } - static const char *skipwhite(const char *s) { + /** @brief Mathc RFC2616 seprator characters */ + static int http_separator(int c) { + switch(c) { + case '(': + case ')': + case '<': + case '>': + case '@': + case ',': + case ';': + case ':': + case '\\': + case '"': + case '/': + case '[': + case ']': + case '?': + case '=': + case '{': + case '}': + case ' ': + case '\t': + return 1; + default: + return 0; + } + } + + /** @brief Match CRLF */ + static int iscrlf(const char *ptr) { + return ptr[0] == '\r' && ptr[1] == '\n'; + } + + /** @brief Skip whitespace + * @param rfc822_comments If true, skip RFC822 nested comments + */ + static const char *skipwhite(const char *s, int rfc822_comments) { int c, depth; for(;;) { @@@ -79,6 -118,8 +120,8 @@@ ++s; break; case '(': + if(!rfc822_comments) + return s; ++s; depth = 1; while(*s && depth) { @@@ -100,66 -141,103 +143,103 @@@ } } - static const char *parsestring(const char *s, char **valuep) { - struct dynstr value; + /** @brief Test for a word character + * @param c Character to test + * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @return 1 if @p c is a word character, else 0 + */ + static int iswordchar(int c, int (*special)(int)) { + return !(c <= ' ' || c > '~' || special(c)); + } + + /** @brief Parse an RFC1521/RFC2616 word + * @param s Pointer to start of word + * @param valuep Where to store value + * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @return Pointer just after end of word or NULL if there's no word + * + * A word is a token or a quoted-string. + */ + static const char *parseword(const char *s, char **valuep, + int (*special)(int)) { + struct dynstr value[1]; int c; - dynstr_init(&value); - ++s; - while((c = *s++) != '"') { - switch(c) { - case '\\': - if(!(c = *s++)) return 0; - default: - dynstr_append(&value, c); - break; + dynstr_init(value); + if(*s == '"') { + ++s; + while((c = *s++) != '"') { + switch(c) { + case '\\': + if(!(c = *s++)) return 0; + default: + dynstr_append(value, c); + break; + } } + if(!c) return 0; + } else { + if(!iswordchar((unsigned char)*s, special)) + return NULL; + dynstr_init(value); + while(iswordchar((unsigned char)*s, special)) + dynstr_append(value, *s++); } - if(!c) return 0; - dynstr_terminate(&value); - *valuep = value.vec; + dynstr_terminate(value); + *valuep = value->vec; return s; } + /** @brief Parse an RFC1521/RFC2616 token + * @param s Pointer to start of token + * @param valuep Where to store value + * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @return Pointer just after end of token or NULL if there's no token + */ + static const char *parsetoken(const char *s, char **valuep, + int (*special)(int)) { + if(*s == '"') return 0; + return parseword(s, valuep, special); + } + + /** @brief Parse a MIME content-type field + * @param s Start of field + * @param typep Where to store type + * @param parameternamep Where to store parameter name + * @param parameternvaluep Wher to store parameter value + * @return 0 on success, non-0 on error + */ int mime_content_type(const char *s, char **typep, char **parameternamep, char **parametervaluep) { - struct dynstr type, parametername, parametervalue; + struct dynstr type, parametername; dynstr_init(&type); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '/') return -1; dynstr_append(&type, '/'); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s == ';') { dynstr_init(¶metername); ++s; - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '=') return -1; - if(!(s = skipwhite(s))) return -1; - if(*s == '"') { - if(!(s = parsestring(s, parametervaluep))) return -1; - } else { - dynstr_init(¶metervalue); - while(*s && !tspecial(*s) && !whitespace(*s)) - dynstr_append(¶metervalue, *s++); - dynstr_terminate(¶metervalue); - *parametervaluep = parametervalue.vec; - } - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; + if(!(s = parseword(s, parametervaluep, tspecial))) return -1; + if(!(s = skipwhite(s, 1))) return -1; dynstr_terminate(¶metername); *parameternamep = parametername.vec; } else @@@ -169,10 -247,12 +249,12 @@@ return 0; } - static int iscrlf(const char *ptr) { - return ptr[0] == '\r' && ptr[1] == '\n'; - } - + /** @brief Parse a MIME message + * @param s Start of message + * @param callback Called for each header field + * @param u Passed to callback + * @return Pointer to decoded body (might be in original string) + */ const char *mime_parse(const char *s, int (*callback)(const char *name, const char *value, void *u), @@@ -185,7 -265,7 +267,7 @@@ dynstr_init(&value); while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&name, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return 0; + if(!(s = skipwhite(s, 1))) return 0; if(*s != ':') return 0; ++s; while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t'))) @@@ -215,7 -295,7 +297,7 @@@ static int isboundary(const char *ptr, && (iscrlf(ptr + bl + 2) || (ptr[bl + 2] == '-' && ptr[bl + 3] == '-' - && iscrlf(ptr + bl + 4)))); + && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)))); } static int isfinal(const char *ptr, const char *boundary, size_t bl) { @@@ -224,9 -304,16 +306,16 @@@ && !strncmp(ptr + 2, boundary, bl) && ptr[bl + 2] == '-' && ptr[bl + 3] == '-' - && iscrlf(ptr + bl + 4)); + && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)); } + /** @brief Parse a multipart MIME body + * @param s Start of message + * @param callback CAllback for each part + * @param boundary Boundary string + * @param u Passed to callback + * @return 0 on success, non-0 on error + */ int mime_multipart(const char *s, int (*callback)(const char *s, void *u), const char *boundary, @@@ -235,16 -322,12 +324,16 @@@ const char *start, *e; int ret; - if(!isboundary(s, boundary, bl)) return -1; + /* We must start with a boundary string */ + if(!isboundary(s, boundary, bl)) + return -1; + /* Keep going until we hit a final boundary */ while(!isfinal(s, boundary, bl)) { s = strstr(s, "\r\n") + 2; start = s; while(!isboundary(s, boundary, bl)) { - if(!(e = strstr(s, "\r\n"))) return -1; + if(!(e = strstr(s, "\r\n"))) + return -1; s = e + 2; } if((ret = callback(xstrndup(start, @@@ -255,39 -338,38 +344,38 @@@ return 0; } + /** @brief Parse an RFC2388-style content-disposition field + * @param s Start of field + * @param typep Where to store type + * @param parameternamep Where to store parameter name + * @param parameternvaluep Wher to store parameter value + * @return 0 on success, non-0 on error + */ int mime_rfc2388_content_disposition(const char *s, char **dispositionp, char **parameternamep, char **parametervaluep) { - struct dynstr disposition, parametername, parametervalue; + struct dynstr disposition, parametername; dynstr_init(&disposition); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&disposition, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s == ';') { dynstr_init(¶metername); ++s; - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '=') return -1; - if(!(s = skipwhite(s))) return -1; - if(*s == '"') { - if(!(s = parsestring(s, parametervaluep))) return -1; - } else { - dynstr_init(¶metervalue); - while(*s && !tspecial(*s) && !whitespace(*s)) - dynstr_append(¶metervalue, *s++); - dynstr_terminate(¶metervalue); - *parametervaluep = parametervalue.vec; - } - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; + if(!(s = parseword(s, parametervaluep, tspecial))) return -1; + if(!(s = skipwhite(s, 1))) return -1; dynstr_terminate(¶metername); *parameternamep = parametername.vec; } else @@@ -297,6 -379,10 +385,10 @@@ return 0; } + /** @brief Convert MIME quoted-printable + * @param s Quoted-printable data + * @return Decoded data + */ char *mime_qp(const char *s) { struct dynstr d; int c, a, b; @@@ -339,6 -425,10 +431,10 @@@ return d.vec; } + /** @brief Convert MIME base64 + * @param s base64 data + * @return Decoded data + */ char *mime_base64(const char *s) { struct dynstr d; const char *t; @@@ -370,6 -460,82 +466,82 @@@ return d.vec; } + /** @brief Parse a RFC2109 Cookie: header + * @param s Header field value + * @param cd Where to store result + * @return 0 on success, non-0 on error + */ + int parse_cookie(const char *s, + struct cookiedata *cd) { + char *n = 0, *v = 0; + + memset(cd, 0, sizeof *cd); + s = skipwhite(s, 0); + while(*s) { + /* Skip separators */ + if(*s == ';' || *s == ',') { + ++s; + s = skipwhite(s, 0); + continue; + } + if(!(s = parsetoken(s, &n, http_separator))) return -1; + s = skipwhite(s, 0); + if(*s++ != '=') return -1; + s = skipwhite(s, 0); + if(!(s = parseword(s, &v, http_separator))) return -1; + if(n[0] == '$') { + /* Some bit of meta-information */ + if(!strcmp(n, "$Version")) + cd->version = v; + else if(!strcmp(n, "$Path")) { + if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].path == 0) + cd->cookies[cd->ncookies-1].path = v; + else { + error(0, "redundant $Path in Cookie: header"); + return -1; + } + } else if(!strcmp(n, "$Domain")) { + if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].domain == 0) + cd->cookies[cd->ncookies-1].domain = v; + else { + error(0, "redundant $Domain in Cookie: header"); + return -1; + } + } + } else { + /* It's a new cookie */ + cd->cookies = xrealloc(cd->cookies, + (cd->ncookies + 1) * sizeof (struct cookie)); + cd->cookies[cd->ncookies].name = n; + cd->cookies[cd->ncookies].value = v; + cd->cookies[cd->ncookies].path = 0; + cd->cookies[cd->ncookies].domain = 0; + ++cd->ncookies; + } + s = skipwhite(s, 0); + if(*s && (*s != ',' && *s != ';')) { + error(0, "missing separator in Cookie: header"); + return -1; + } + } + return 0; + } + + /** @brief Find a named cookie + * @param cd Parse cookie data + * @param name Name of cookie + * @return Cookie structure or NULL if not found + */ + const struct cookie *find_cookie(const struct cookiedata *cd, + const char *name) { + int n; + + for(n = 0; n < cd->ncookies; ++n) + if(!strcmp(cd->cookies[n].name, name)) + return &cd->cookies[n]; + return 0; + } + /* Local Variables: c-basic-offset:2 diff --combined lib/test.c index 5028054,3ae97c8..399c7f1 --- a/lib/test.c +++ b/lib/test.c @@@ -30,14 -30,8 +30,14 @@@ #include #include #include +#include +#include +#include +#include +#include +#include +#include -#include "utf8.h" #include "mem.h" #include "log.h" #include "vector.h" @@@ -48,19 -42,6 +48,19 @@@ #include "unicode.h" #include "inputline.h" #include "wstat.h" +#include "signame.h" +#include "cache.h" +#include "filepart.h" +#include "hash.h" +#include "selection.h" +#include "syscalls.h" +#include "kvp.h" +#include "sink.h" +#include "printf.h" +#include "basen.h" +#include "split.h" +#include "configuration.h" +#include "addr.h" static int tests, errors; static int fail_first; @@@ -113,48 -94,22 +113,48 @@@ static const char *format_utf32(const u return d.vec; } -#define check_string(GOT, WANT) do { \ - const char *g = GOT; \ - const char *w = WANT; \ - \ - if(w == 0) { \ - fprintf(stderr, "%s:%d: %s returned 0\n", \ - __FILE__, __LINE__, #GOT); \ - count_error(); \ - } else if(strcmp(w, g)) { \ - fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n", \ - __FILE__, __LINE__, #GOT, format(g), format(w)); \ - count_error(); \ - } \ - ++tests; \ +#define check_string(GOT, WANT) do { \ + const char *got = GOT; \ + const char *want = WANT; \ + \ + if(want == 0) { \ + fprintf(stderr, "%s:%d: %s returned 0\n", \ + __FILE__, __LINE__, #GOT); \ + count_error(); \ + } else if(strcmp(want, got)) { \ + fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s\n", \ + __FILE__, __LINE__, #GOT, format(got), format(want)); \ + count_error(); \ + } \ + ++tests; \ } while(0) +#define check_string_prefix(GOT, WANT) do { \ + const char *got = GOT; \ + const char *want = WANT; \ + \ + if(want == 0) { \ + fprintf(stderr, "%s:%d: %s returned 0\n", \ + __FILE__, __LINE__, #GOT); \ + count_error(); \ + } else if(strncmp(want, got, strlen(want))) { \ + fprintf(stderr, "%s:%d: %s returned:\n%s\nexpected:\n%s...\n", \ + __FILE__, __LINE__, #GOT, format(got), format(want)); \ + count_error(); \ + } \ + ++tests; \ + } while(0) + +#define check_integer(GOT, WANT) do { \ + const intmax_t got = GOT, want = WANT; \ + if(got != want) { \ + fprintf(stderr, "%s:%d: %s returned: %jd expected: %jd\n", \ + __FILE__, __LINE__, #GOT, got, want); \ + count_error(); \ + } \ + ++tests; \ +} while(0) + static uint32_t *ucs4parse(const char *s) { struct dynstr_ucs4 d; char *e; @@@ -184,7 -139,7 +184,7 @@@ static void test_utf8(void) insist(!utf32_cmp(w, ucs)); \ u8 = utf32_to_utf8(ucs, utf32_len(ucs), 0); \ insist(u8 != 0); \ - insist(!strcmp(u8, CHARS)); \ + check_string(u8, CHARS); \ } while(0) fprintf(stderr, "test_utf8\n"); @@@ -259,7 -214,6 +259,7 @@@ U8("\xF4\x80\x80\x80", "0x100000"); U8("\xF4\x8F\xBF\xBF", "0x10FFFF"); insist(!validutf8("\xF4\x90\x80\x80")); + insist(!validutf8("\xF4\x80\xFF\x80")); /* miscellaneous non-UTF-8 rubbish */ insist(!validutf8("\x80")); @@@ -285,163 -239,38 +285,163 @@@ insist(!validutf8("\xF8")); } +static int test_multipart_callback(const char *s, void *u) { + struct vector *parts = u; + + vector_append(parts, (char *)s); + return 0; +} + static void test_mime(void) { char *t, *n, *v; + struct vector parts[1]; fprintf(stderr, "test_mime\n"); t = n = v = 0; insist(!mime_content_type("text/plain", &t, &n, &v)); - insist(!strcmp(t, "text/plain")); + check_string(t, "text/plain"); insist(n == 0); insist(v == 0); + insist(mime_content_type("TEXT ((broken) comment", &t, &n, &v) < 0); + insist(mime_content_type("TEXT ((broken) comment\\", &t, &n, &v) < 0); + t = n = v = 0; - insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v)); - insist(!strcmp(t, "text/plain")); + insist(!mime_content_type("TEXT ((nested)\\ comment) /plain", &t, &n, &v)); + check_string(t, "text/plain"); insist(n == 0); insist(v == 0); t = n = v = 0; - insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v)); - insist(!strcmp(t, "text/plain")); - insist(!strcmp(n, "charset")); - insist(!strcmp(v, "utf-8")); + insist(!mime_content_type(" text/plain ; Charset=\"utf-\\8\"", &t, &n, &v)); + check_string(t, "text/plain"); + check_string(n, "charset"); + check_string(v, "utf-8"); t = n = v = 0; insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v)); - insist(!strcmp(t, "text/plain")); - insist(!strcmp(n, "charset")); - insist(!strcmp(v, "ISO-8859-1")); + check_string(t, "text/plain"); + check_string(n, "charset"); + check_string(v, "ISO-8859-1"); + + t = n = v = 0; + insist(!mime_rfc2388_content_disposition("form-data; name=\"field1\"", &t, &n, &v)); + check_string(t, "form-data"); + check_string(n, "name"); + check_string(v, "field1"); + insist(!mime_rfc2388_content_disposition("inline", &t, &n, &v)); + check_string(t, "inline"); + insist(n == 0); + insist(v == 0); + + /* Current versions of the code only understand a single arg to these + * headers. This is a bug at the level they work at but suffices for + * DisOrder's current purposes. */ + + insist(!mime_rfc2388_content_disposition( + "attachment; filename=genome.jpeg;\n" + "modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"", + &t, &n, &v)); + check_string(t, "attachment"); + check_string(n, "filename"); + check_string(v, "genome.jpeg"); + + vector_init(parts); + insist(mime_multipart("--outer\r\n" + "Content-Type: text/plain\r\n" + "Content-Disposition: inline\r\n" + "Content-Description: text-part-1\r\n" + "\r\n" + "Some text goes here\r\n" + "\r\n" + "--outer\r\n" + "Content-Type: multipart/mixed; boundary=inner\r\n" + "Content-Disposition: attachment\r\n" + "Content-Description: multipart-2\r\n" + "\r\n" + "--inner\r\n" + "Content-Type: text/plain\r\n" + "Content-Disposition: inline\r\n" + "Content-Description: text-part-2\r\n" + "\r\n" + "Some more text here.\r\n" + "\r\n" + "--inner\r\n" + "Content-Type: image/jpeg\r\n" + "Content-Disposition: attachment\r\n" + "Content-Description: jpeg-1\r\n" + "\r\n" + "\r\n" + "--inner--\r\n" + "--outer--\r\n", + test_multipart_callback, + "outer", + parts) == 0); + check_integer(parts->nvec, 2); + check_string(parts->vec[0], + "Content-Type: text/plain\r\n" + "Content-Disposition: inline\r\n" + "Content-Description: text-part-1\r\n" + "\r\n" + "Some text goes here\r\n"); + check_string(parts->vec[1], + "Content-Type: multipart/mixed; boundary=inner\r\n" + "Content-Disposition: attachment\r\n" + "Content-Description: multipart-2\r\n" + "\r\n" + "--inner\r\n" + "Content-Type: text/plain\r\n" + "Content-Disposition: inline\r\n" + "Content-Description: text-part-2\r\n" + "\r\n" + "Some more text here.\r\n" + "\r\n" + "--inner\r\n" + "Content-Type: image/jpeg\r\n" + "Content-Disposition: attachment\r\n" + "Content-Description: jpeg-1\r\n" + "\r\n" + "\r\n" + "--inner--"); + /* No trailing CRLF is _correct_ - see RFC2046 5.1.1 note regarding CRLF + * preceding the boundary delimiter line. An implication of this is that we + * must cope with partial lines at the end of the input when recursively + * decomposing a multipart message. */ + vector_init(parts); + insist(mime_multipart("--inner\r\n" + "Content-Type: text/plain\r\n" + "Content-Disposition: inline\r\n" + "Content-Description: text-part-2\r\n" + "\r\n" + "Some more text here.\r\n" + "\r\n" + "--inner\r\n" + "Content-Type: image/jpeg\r\n" + "Content-Disposition: attachment\r\n" + "Content-Description: jpeg-1\r\n" + "\r\n" + "\r\n" + "--inner--", + test_multipart_callback, + "inner", + parts) == 0); + check_integer(parts->nvec, 2); + check_string(parts->vec[0], + "Content-Type: text/plain\r\n" + "Content-Disposition: inline\r\n" + "Content-Description: text-part-2\r\n" + "\r\n" + "Some more text here.\r\n"); + check_string(parts->vec[1], + "Content-Type: image/jpeg\r\n" + "Content-Disposition: attachment\r\n" + "Content-Description: jpeg-1\r\n" + "\r\n" + ""); + /* XXX mime_parse */ - /* XXX mime_multipart */ - /* XXX mime_rfc2388_content_disposition */ check_string(mime_qp(""), ""); check_string(mime_qp("foobar"), "foobar"); @@@ -478,6 -307,52 +478,52 @@@ "\x04\x10\x41" "\x04\x10"); } + static void test_cookies(void) { + struct cookiedata cd[1]; + + fprintf(stderr, "test_cookies\n"); + + /* These are the examples from RFC2109 */ + insist(!parse_cookie("$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\"", cd)); + insist(!strcmp(cd->version, "1")); + insist(cd->ncookies = 1); + insist(find_cookie(cd, "Customer") == &cd->cookies[0]); + check_string(cd->cookies[0].value, "WILE_E_COYOTE"); + check_string(cd->cookies[0].path, "/acme"); + insist(cd->cookies[0].domain == 0); + insist(!parse_cookie("$Version=\"1\";\n" + "Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\";\n" + "Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\"", + cd)); + insist(cd->ncookies = 2); + insist(find_cookie(cd, "Customer") == &cd->cookies[0]); + insist(find_cookie(cd, "Part_Number") == &cd->cookies[1]); + check_string(cd->cookies[0].value, "WILE_E_COYOTE"); + check_string(cd->cookies[0].path, "/acme"); + insist(cd->cookies[0].domain == 0); + check_string(cd->cookies[1].value, "Rocket_Launcher_0001"); + check_string(cd->cookies[1].path, "/acme"); + insist(cd->cookies[1].domain == 0); + insist(!parse_cookie("$Version=\"1\";\n" + "Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\";\n" + "Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\";\n" + "Shipping=\"FedEx\"; $Path=\"/acme\"", + cd)); + insist(cd->ncookies = 3); + insist(find_cookie(cd, "Customer") == &cd->cookies[0]); + insist(find_cookie(cd, "Part_Number") == &cd->cookies[1]); + insist(find_cookie(cd, "Shipping") == &cd->cookies[2]); + check_string(cd->cookies[0].value, "WILE_E_COYOTE"); + check_string(cd->cookies[0].path, "/acme"); + insist(cd->cookies[0].domain == 0); + check_string(cd->cookies[1].value, "Rocket_Launcher_0001"); + check_string(cd->cookies[1].path, "/acme"); + insist(cd->cookies[1].domain == 0); + check_string(cd->cookies[2].value, "FedEx"); + check_string(cd->cookies[2].path, "/acme"); + insist(cd->cookies[2].domain == 0); + } + static void test_hex(void) { unsigned n; static const unsigned char h[] = { 0x00, 0xFF, 0x80, 0x7F }; @@@ -553,8 -428,8 +599,8 @@@ static void test_casefold(void) l = 0x3BC; /* GREEK SMALL LETTER MU */ break; case 0xDF: /* LATIN SMALL LETTER SHARP S */ - insist(!strcmp(canon_folded, "ss")); - insist(!strcmp(compat_folded, "ss")); + check_string(canon_folded, "ss"); + check_string(compat_folded, "ss"); l = 0; break; } @@@ -842,498 -717,9 +888,498 @@@ static void test_unicode(void) fclose(fp); breaktest("auxiliary/GraphemeBreakTest.txt", utf32_is_grapheme_boundary); breaktest("auxiliary/WordBreakTest.txt", utf32_is_word_boundary); + insist(utf32_combining_class(0x40000) == 0); + insist(utf32_combining_class(0xE0000) == 0); +} + +static void test_signame(void) { + fprintf(stderr, "test_signame\n"); + insist(find_signal("SIGTERM") == SIGTERM); + insist(find_signal("SIGHUP") == SIGHUP); + insist(find_signal("SIGINT") == SIGINT); + insist(find_signal("SIGQUIT") == SIGQUIT); + insist(find_signal("SIGKILL") == SIGKILL); + insist(find_signal("SIGYOURMUM") == -1); +} + +static void test_cache(void) { + const struct cache_type t1 = { 1 }, t2 = { 10 }; + const char v11[] = "spong", v12[] = "wibble", v2[] = "blat"; + fprintf(stderr, "test_cache\n"); + cache_put(&t1, "1_1", v11); + cache_put(&t1, "1_2", v12); + cache_put(&t2, "2", v2); + insist(cache_count() == 3); + insist(cache_get(&t2, "2") == v2); + insist(cache_get(&t1, "1_1") == v11); + insist(cache_get(&t1, "1_2") == v12); + insist(cache_get(&t1, "2") == 0); + insist(cache_get(&t2, "1_1") == 0); + insist(cache_get(&t2, "1_2") == 0); + insist(cache_get(&t1, "2") == 0); + insist(cache_get(&t2, "1_1") == 0); + insist(cache_get(&t2, "1_2") == 0); + sleep(2); + cache_expire(); + insist(cache_count() == 1); + insist(cache_get(&t1, "1_1") == 0); + insist(cache_get(&t1, "1_2") == 0); + insist(cache_get(&t2, "2") == v2); + cache_clean(0); + insist(cache_count() == 0); + insist(cache_get(&t2, "2") == 0); +} + +static void test_filepart(void) { + fprintf(stderr, "test_filepart\n"); + check_string(d_dirname("/"), "/"); + check_string(d_dirname("////"), "/"); + check_string(d_dirname("/spong"), "/"); + check_string(d_dirname("////spong"), "/"); + check_string(d_dirname("/foo/bar"), "/foo"); + check_string(d_dirname("////foo/////bar"), "////foo"); + check_string(d_dirname("./bar"), "."); + check_string(d_dirname(".//bar"), "."); + check_string(d_dirname("."), "."); + check_string(d_dirname(".."), "."); + check_string(d_dirname("../blat"), ".."); + check_string(d_dirname("..//blat"), ".."); + check_string(d_dirname("wibble"), "."); + check_string(extension("foo.c"), ".c"); + check_string(extension(".c"), ".c"); + check_string(extension("."), "."); + check_string(extension("foo"), ""); + check_string(extension("./foo"), ""); + check_string(extension("./foo.c"), ".c"); + check_string(strip_extension("foo.c"), "foo"); + check_string(strip_extension("foo.mp3"), "foo"); + check_string(strip_extension("foo.---"), "foo.---"); + check_string(strip_extension("foo.---xyz"), "foo.---xyz"); + check_string(strip_extension("foo.bar/wibble.spong"), "foo.bar/wibble"); +} + +static void test_selection(void) { + hash *h; + fprintf(stderr, "test_selection\n"); + insist((h = selection_new()) != 0); + selection_set(h, "one", 1); + selection_set(h, "two", 1); + selection_set(h, "three", 0); + selection_set(h, "four", 1); + insist(selection_selected(h, "one") == 1); + insist(selection_selected(h, "two") == 1); + insist(selection_selected(h, "three") == 0); + insist(selection_selected(h, "four") == 1); + insist(selection_selected(h, "five") == 0); + insist(hash_count(h) == 3); + selection_flip(h, "one"); + selection_flip(h, "three"); + insist(selection_selected(h, "one") == 0); + insist(selection_selected(h, "three") == 1); + insist(hash_count(h) == 3); + selection_live(h, "one"); + selection_live(h, "two"); + selection_live(h, "three"); + selection_cleanup(h); + insist(selection_selected(h, "one") == 0); + insist(selection_selected(h, "two") == 1); + insist(selection_selected(h, "three") == 1); + insist(selection_selected(h, "four") == 0); + insist(selection_selected(h, "five") == 0); + insist(hash_count(h) == 2); + selection_empty(h); + insist(selection_selected(h, "one") == 0); + insist(selection_selected(h, "two") == 0); + insist(selection_selected(h, "three") == 0); + insist(selection_selected(h, "four") == 0); + insist(selection_selected(h, "five") == 0); + insist(hash_count(h) == 0); +} + +static void test_wstat(void) { + pid_t pid; + int w; + + fprintf(stderr, "test_wstat\n"); + if(!(pid = xfork())) { + _exit(1); + } + while(waitpid(pid, &w, 0) < 0 && errno == EINTR) + ; + check_string(wstat(w), "exited with status 1"); + if(!(pid = xfork())) { + kill(getpid(), SIGTERM); + _exit(-1); + } + while(waitpid(pid, &w, 0) < 0 && errno == EINTR) + ; + check_string_prefix(wstat(w), "terminated by signal 15"); +} + +static void test_kvp(void) { + struct kvp *k; + size_t n; + + fprintf(stderr, "test_kvp\n"); + /* decoding */ +#define KVP_URLDECODE(S) kvp_urldecode((S), strlen(S)) + insist(KVP_URLDECODE("=%zz") == 0); + insist(KVP_URLDECODE("=%0") == 0); + insist(KVP_URLDECODE("=%0z") == 0); + insist(KVP_URLDECODE("=%%") == 0); + insist(KVP_URLDECODE("==%") == 0); + insist(KVP_URLDECODE("wibble") == 0); + insist(KVP_URLDECODE("") == 0); + insist(KVP_URLDECODE("wibble&") == 0); + insist((k = KVP_URLDECODE("one=bl%61t+foo")) != 0); + check_string(kvp_get(k, "one"), "blat foo"); + insist(kvp_get(k, "ONE") == 0); + insist(k->next == 0); + insist((k = KVP_URLDECODE("wibble=splat&bar=spong")) != 0); + check_string(kvp_get(k, "wibble"), "splat"); + check_string(kvp_get(k, "bar"), "spong"); + insist(kvp_get(k, "ONE") == 0); + insist(k->next->next == 0); + /* encoding */ + insist(kvp_set(&k, "bar", "spong") == 0); + insist(kvp_set(&k, "bar", "foo") == 1); + insist(kvp_set(&k, "zog", "%") == 1); + insist(kvp_set(&k, "wibble", 0) == 1); + insist(kvp_set(&k, "wibble", 0) == 0); + check_string(kvp_urlencode(k, 0), + "bar=foo&zog=%25"); + check_string(kvp_urlencode(k, &n), + "bar=foo&zog=%25"); + insist(n == strlen("bar=foo&zog=%25")); + check_string(urlencodestring("abc% +\n"), + "abc%25%20%2b%0a"); +} + +static void test_sink(void) { + struct sink *s; + struct dynstr d[1]; + FILE *fp; + char *l; + + fprintf(stderr, "test_sink\n"); + + fp = tmpfile(); + assert(fp != 0); + s = sink_stdio("tmpfile", fp); + insist(sink_printf(s, "test: %d\n", 999) == 10); + insist(sink_printf(s, "wibble: %s\n", "foobar") == 15); + rewind(fp); + insist(inputline("tmpfile", fp, &l, '\n') == 0); + check_string(l, "test: 999"); + insist(inputline("tmpfile", fp, &l, '\n') == 0); + check_string(l, "wibble: foobar"); + insist(inputline("tmpfile", fp, &l, '\n') == -1); + + dynstr_init(d); + s = sink_dynstr(d); + insist(sink_printf(s, "test: %d\n", 999) == 10); + insist(sink_printf(s, "wibble: %s\n", "foobar") == 15); + dynstr_terminate(d); + check_string(d->vec, "test: 999\nwibble: foobar\n"); +} + +static const char *do_printf(const char *fmt, ...) { + va_list ap; + char *s; + int rc; + + va_start(ap, fmt); + rc = byte_vasprintf(&s, fmt, ap); + va_end(ap); + if(rc < 0) + return 0; + return s; +} + +static void test_printf(void) { + char c; + short s; + int i; + long l; + long long ll; + intmax_t m; + ssize_t ssz; + ptrdiff_t p; + char *cp; + char buffer[16]; + + fprintf(stderr, "test_printf\n"); + check_string(do_printf("%d", 999), "999"); + check_string(do_printf("%d", -999), "-999"); + check_string(do_printf("%i", 999), "999"); + check_string(do_printf("%i", -999), "-999"); + check_string(do_printf("%u", 999), "999"); + check_string(do_printf("%2u", 999), "999"); + check_string(do_printf("%10u", 999), " 999"); + check_string(do_printf("%-10u", 999), "999 "); + check_string(do_printf("%010u", 999), "0000000999"); + check_string(do_printf("%-10d", -999), "-999 "); + check_string(do_printf("%-010d", -999), "-999 "); /* "-" beats "0" */ + check_string(do_printf("%66u", 999), " 999"); + check_string(do_printf("%o", 999), "1747"); + check_string(do_printf("%#o", 999), "01747"); + check_string(do_printf("%#o", 0), "0"); + check_string(do_printf("%x", 999), "3e7"); + check_string(do_printf("%#x", 999), "0x3e7"); + check_string(do_printf("%#X", 999), "0X3E7"); + check_string(do_printf("%#x", 0), "0"); + check_string(do_printf("%hd", (short)999), "999"); + check_string(do_printf("%hhd", (short)99), "99"); + check_string(do_printf("%ld", 100000L), "100000"); + check_string(do_printf("%lld", 10000000000LL), "10000000000"); + check_string(do_printf("%qd", 10000000000LL), "10000000000"); + check_string(do_printf("%jd", (intmax_t)10000000000LL), "10000000000"); + check_string(do_printf("%zd", (ssize_t)2000000000), "2000000000"); + check_string(do_printf("%td", (ptrdiff_t)2000000000), "2000000000"); + check_string(do_printf("%hu", (short)999), "999"); + check_string(do_printf("%hhu", (short)99), "99"); + check_string(do_printf("%lu", 100000L), "100000"); + check_string(do_printf("%llu", 10000000000LL), "10000000000"); + check_string(do_printf("%ju", (uintmax_t)10000000000LL), "10000000000"); + check_string(do_printf("%zu", (size_t)2000000000), "2000000000"); + check_string(do_printf("%tu", (ptrdiff_t)2000000000), "2000000000"); + check_string(do_printf("%p", (void *)0x100), "0x100"); + check_string(do_printf("%s", "wibble"), "wibble"); + check_string(do_printf("%s-%s", "wibble", "wobble"), "wibble-wobble"); + check_string(do_printf("%10s", "wibble"), " wibble"); + check_string(do_printf("%010s", "wibble"), " wibble"); /* 0 ignored for %s */ + check_string(do_printf("%-10s", "wibble"), "wibble "); + check_string(do_printf("%2s", "wibble"), "wibble"); + check_string(do_printf("%.2s", "wibble"), "wi"); + check_string(do_printf("%.2s", "w"), "w"); + check_string(do_printf("%4.2s", "wibble"), " wi"); + check_string(do_printf("%c", 'a'), "a"); + check_string(do_printf("%4c", 'a'), " a"); + check_string(do_printf("%-4c", 'a'), "a "); + check_string(do_printf("%*c", 0, 'a'), "a"); + check_string(do_printf("x%hhny", &c), "xy"); + insist(c == 1); + check_string(do_printf("xx%hnyy", &s), "xxyy"); + insist(s == 2); + check_string(do_printf("xxx%nyyy", &i), "xxxyyy"); + insist(i == 3); + check_string(do_printf("xxxx%lnyyyy", &l), "xxxxyyyy"); + insist(l == 4); + check_string(do_printf("xxxxx%llnyyyyy", &ll), "xxxxxyyyyy"); + insist(ll == 5); + check_string(do_printf("xxxxxx%jnyyyyyy", &m), "xxxxxxyyyyyy"); + insist(m == 6); + check_string(do_printf("xxxxxxx%znyyyyyyy", &ssz), "xxxxxxxyyyyyyy"); + insist(ssz == 7); + check_string(do_printf("xxxxxxxx%tnyyyyyyyy", &p), "xxxxxxxxyyyyyyyy"); + insist(p == 8); + check_string(do_printf("%*d", 5, 99), " 99"); + check_string(do_printf("%*d", -5, 99), "99 "); + check_string(do_printf("%.*d", 5, 99), "00099"); + check_string(do_printf("%.*d", -5, 99), "99"); + check_string(do_printf("%.0d", 0), ""); + check_string(do_printf("%.d", 0), ""); + check_string(do_printf("%.d", 0), ""); + check_string(do_printf("%%"), "%"); + check_string(do_printf("wibble"), "wibble"); + insist(do_printf("%") == 0); + insist(do_printf("%=") == 0); + i = byte_asprintf(&cp, "xyzzy %d", 999); + insist(i == 9); + check_string(cp, "xyzzy 999"); + i = byte_snprintf(buffer, sizeof buffer, "xyzzy %d", 999); + insist(i == 9); + check_string(buffer, "xyzzy 999"); + i = byte_snprintf(buffer, sizeof buffer, "%*d", 32, 99); + insist(i == 32); + check_string(buffer, " "); + { + /* bizarre workaround for compiler checking of format strings */ + char f[] = "xyzzy %"; + i = byte_asprintf(&cp, f); + insist(i == -1); + } +} + +static void test_basen(void) { + unsigned long v[64]; + char buffer[1024]; + + fprintf(stderr, "test_basen\n"); + v[0] = 999; + insist(basen(v, 1, buffer, sizeof buffer, 10) == 0); + check_string(buffer, "999"); + + v[0] = 1+2*7+3*7*7+4*7*7*7; + insist(basen(v, 1, buffer, sizeof buffer, 7) == 0); + check_string(buffer, "4321"); + + v[0] = 0x00010203; + v[1] = 0x04050607; + v[2] = 0x08090A0B; + v[3] = 0x0C0D0E0F; + insist(basen(v, 4, buffer, sizeof buffer, 256) == 0); + check_string(buffer, "123456789abcdef"); + + v[0] = 0x00010203; + v[1] = 0x04050607; + v[2] = 0x08090A0B; + v[3] = 0x0C0D0E0F; + insist(basen(v, 4, buffer, sizeof buffer, 16) == 0); + check_string(buffer, "102030405060708090a0b0c0d0e0f"); + + v[0] = 0x00010203; + v[1] = 0x04050607; + v[2] = 0x08090A0B; + v[3] = 0x0C0D0E0F; + insist(basen(v, 4, buffer, 10, 16) == -1); +} + +static void test_split(void) { + char **v; + int nv; + + fprintf(stderr, "test_split\n"); + insist(split("\"misquoted", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0); + insist(split("\'misquoted", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0); + insist(split("\'misquoted\\", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0); + insist(split("\'misquoted\\\"", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0); + insist(split("\'mis\\escaped\'", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0) == 0); + + insist((v = split("", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0))); + check_integer(nv, 0); + insist(*v == 0); + + insist((v = split("wibble", &nv, SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0))); + check_integer(nv, 1); + check_string(v[0], "wibble"); + insist(v[1] == 0); + + insist((v = split(" wibble \t\r\n wobble ", &nv, + SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0))); + check_integer(nv, 2); + check_string(v[0], "wibble"); + check_string(v[1], "wobble"); + insist(v[2] == 0); + + insist((v = split("wibble wobble #splat", &nv, + SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0))); + check_integer(nv, 2); + check_string(v[0], "wibble"); + check_string(v[1], "wobble"); + insist(v[2] == 0); + + insist((v = split("\"wibble wobble\" #splat", &nv, + SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0))); + check_integer(nv, 1); + check_string(v[0], "wibble wobble"); + insist(v[1] == 0); + + insist((v = split("\"wibble \\\"\\nwobble\"", &nv, + SPLIT_COMMENTS|SPLIT_QUOTES, 0, 0))); + check_integer(nv, 1); + check_string(v[0], "wibble \"\nwobble"); + insist(v[1] == 0); + + insist((v = split("\"wibble wobble\" #splat", &nv, + SPLIT_QUOTES, 0, 0))); + check_integer(nv, 2); + check_string(v[0], "wibble wobble"); + check_string(v[1], "#splat"); + insist(v[2] == 0); + + insist((v = split("\"wibble wobble\" #splat", &nv, + SPLIT_COMMENTS, 0, 0))); + check_integer(nv, 2); + check_string(v[0], "\"wibble"); + check_string(v[1], "wobble\""); + insist(v[2] == 0); + + check_string(quoteutf8("wibble"), "wibble"); + check_string(quoteutf8(" wibble "), "\" wibble \""); + check_string(quoteutf8("wibble wobble"), "\"wibble wobble\""); + check_string(quoteutf8("wibble\"wobble"), "\"wibble\\\"wobble\""); + check_string(quoteutf8("wibble\nwobble"), "\"wibble\\nwobble\""); + check_string(quoteutf8("wibble\\wobble"), "\"wibble\\\\wobble\""); + check_string(quoteutf8("wibble'wobble"), "\"wibble'wobble\""); +} + +static void test_hash(void) { + hash *h; + int i, *ip; + char **keys; + + fprintf(stderr, "test_hash\n"); + h = hash_new(sizeof(int)); + for(i = 0; i < 10000; ++i) + insist(hash_add(h, do_printf("%d", i), &i, HASH_INSERT) == 0); + check_integer(hash_count(h), 10000); + for(i = 0; i < 10000; ++i) { + insist((ip = hash_find(h, do_printf("%d", i))) != 0); + check_integer(*ip, i); + insist(hash_add(h, do_printf("%d", i), &i, HASH_REPLACE) == 0); + } + check_integer(hash_count(h), 10000); + keys = hash_keys(h); + for(i = 0; i < 10000; ++i) + insist(keys[i] != 0); + insist(keys[10000] == 0); + for(i = 0; i < 10000; ++i) + insist(hash_remove(h, do_printf("%d", i)) == 0); + check_integer(hash_count(h), 0); +} + +static void test_addr(void) { + struct stringlist a; + const char *s[2]; + struct addrinfo *ai; + char *name; + const struct sockaddr_in *sin; + + static const struct addrinfo pref = { + AI_PASSIVE, + PF_INET, + SOCK_STREAM, + 0, + 0, + 0, + 0, + 0 + }; + + a.n = 1; + a.s = (char **)s; + s[0] = "smtp"; + ai = get_address(&a, &pref, &name); + insist(ai != 0); + check_integer(ai->ai_family, PF_INET); + check_integer(ai->ai_socktype, SOCK_STREAM); + check_integer(ai->ai_protocol, IPPROTO_TCP); + check_integer(ai->ai_addrlen, sizeof(struct sockaddr_in)); + sin = (const struct sockaddr_in *)ai->ai_addr; + check_integer(sin->sin_family, AF_INET); + check_integer(sin->sin_addr.s_addr, 0); + check_integer(ntohs(sin->sin_port), 25); + check_string(name, "host * service smtp"); + + a.n = 2; + s[0] = "localhost"; + s[1] = "nntp"; + ai = get_address(&a, &pref, &name); + insist(ai != 0); + check_integer(ai->ai_family, PF_INET); + check_integer(ai->ai_socktype, SOCK_STREAM); + check_integer(ai->ai_protocol, IPPROTO_TCP); + check_integer(ai->ai_addrlen, sizeof(struct sockaddr_in)); + sin = (const struct sockaddr_in *)ai->ai_addr; + check_integer(sin->sin_family, AF_INET); + check_integer(ntohl(sin->sin_addr.s_addr), 0x7F000001); + check_integer(ntohs(sin->sin_port), 119); + check_string(name, "host localhost service nntp"); } int main(void) { + mem_init(); fail_first = !!getenv("FAIL_FIRST"); insist('\n' == 0x0A); insist('\r' == 0x0D); @@@ -1345,17 -731,13 +1391,17 @@@ insist('a' == 0x61); insist('z' == 0x7A); /* addr.c */ + test_addr(); /* asprintf.c */ /* authhash.c */ /* basen.c */ + test_basen(); /* charset.c */ /* client.c */ /* configuration.c */ /* event.c */ + /* filepart.c */ + test_filepart(); /* fprintf.c */ /* heap.c */ test_heap(); @@@ -1363,21 -745,18 +1409,22 @@@ test_hex(); /* inputline.c */ /* kvp.c */ + test_kvp(); /* log.c */ /* mem.c */ /* mime.c */ test_mime(); + test_cookies(); /* mixer.c */ /* plugin.c */ /* printf.c */ + test_printf(); /* queue.c */ /* sink.c */ + test_sink(); /* snprintf.c */ /* split.c */ + test_split(); /* syscalls.c */ /* table.c */ /* unicode.c */ @@@ -1388,15 -767,8 +1435,15 @@@ /* words.c */ test_casefold(); test_words(); - /* XXX words() */ /* wstat.c */ + test_wstat(); + /* signame.c */ + test_signame(); + /* cache.c */ + test_cache(); + /* selection.c */ + test_selection(); + test_hash(); fprintf(stderr, "%d errors out of %d tests\n", errors, tests); return !!errors; }