X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/blobdiff_plain/763d5e6ad88ef3ba1cd1d7742d060e4f1e54c6b8..62ef2216d2c7c1c563ea163e2a0fdacccb54e31e:/lib/mime.c diff --git a/lib/mime.c b/lib/mime.c index b188214..7ba4254 100644 --- a/lib/mime.c +++ b/lib/mime.c @@ -1,6 +1,6 @@ /* * This file is part of DisOrder - * Copyright (C) 2005 Richard Kettlewell + * Copyright (C) 2005, 2007 Richard Kettlewell * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,7 +17,9 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ - +/** @file lib/mime.c + * @brief Support for MIME and allied protocols + */ #include #include "types.h" @@ -25,11 +27,16 @@ #include #include +#include + #include "mem.h" #include "mime.h" #include "vector.h" #include "hex.h" +#include "log.h" +#include "base64.h" +/** @brief Match whitespace characters */ static int whitespace(int c) { switch(c) { case ' ': @@ -42,6 +49,7 @@ static int whitespace(int c) { } } +/** @brief Match RFC2045 tspecial characters */ static int tspecial(int c) { switch(c) { case '(': @@ -65,7 +73,45 @@ static int tspecial(int c) { } } -static const char *skipwhite(const char *s) { +/** @brief Match RFC2616 seprator characters */ +static int http_separator(int c) { + switch(c) { + case '(': + case ')': + case '<': + case '>': + case '@': + case ',': + case ';': + case ':': + case '\\': + case '"': + case '/': + case '[': + case ']': + case '?': + case '=': + case '{': + case '}': + case ' ': + case '\t': + return 1; + default: + return 0; + } +} + +/** @brief Match CRLF */ +static int iscrlf(const char *ptr) { + return ptr[0] == '\r' && ptr[1] == '\n'; +} + +/** @brief Skip whitespace + * @param s Pointer into string + * @param rfc822_comments If true, skip RFC822 nested comments + * @return Pointer into string after whitespace + */ +static const char *skipwhite(const char *s, int rfc822_comments) { int c, depth; for(;;) { @@ -77,6 +123,8 @@ static const char *skipwhite(const char *s) { ++s; break; case '(': + if(!rfc822_comments) + return s; ++s; depth = 1; while(*s && depth) { @@ -98,66 +146,105 @@ static const char *skipwhite(const char *s) { } } -static const char *parsestring(const char *s, char **valuep) { - struct dynstr value; +/** @brief Test for a word character + * @param c Character to test + * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @return 1 if @p c is a word character, else 0 + */ +static int iswordchar(int c, int (*special)(int)) { + return !(c <= ' ' || c > '~' || special(c)); +} + +/** @brief Parse an RFC1521/RFC2616 word + * @param s Pointer to start of word + * @param valuep Where to store value + * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @return Pointer just after end of word or NULL if there's no word + * + * A word is a token or a quoted-string. + */ +static const char *parseword(const char *s, char **valuep, + int (*special)(int)) { + struct dynstr value[1]; int c; - dynstr_init(&value); - ++s; - while((c = *s++) != '"') { - switch(c) { - case '\\': - if(!(c = *s++)) return 0; - default: - dynstr_append(&value, c); - break; + dynstr_init(value); + if(*s == '"') { + ++s; + while((c = *s++) != '"') { + switch(c) { + case '\\': + if(!(c = *s++)) return 0; + default: + dynstr_append(value, c); + break; + } } + if(!c) return 0; + } else { + if(!iswordchar((unsigned char)*s, special)) + return NULL; + dynstr_init(value); + while(iswordchar((unsigned char)*s, special)) + dynstr_append(value, *s++); } - if(!c) return 0; - dynstr_terminate(&value); - *valuep = value.vec; + dynstr_terminate(value); + *valuep = value->vec; return s; } +/** @brief Parse an RFC1521/RFC2616 token + * @param s Pointer to start of token + * @param valuep Where to store value + * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @return Pointer just after end of token or NULL if there's no token + */ +static const char *parsetoken(const char *s, char **valuep, + int (*special)(int)) { + if(*s == '"') return 0; + return parseword(s, valuep, special); +} + +/** @brief Parse a MIME content-type field + * @param s Start of field + * @param typep Where to store type + * @param parameternamep Where to store parameter name + * @param parametervaluep Wher to store parameter value + * @return 0 on success, non-0 on error + * + * See RFC 2045 s5. + */ int mime_content_type(const char *s, char **typep, char **parameternamep, char **parametervaluep) { - struct dynstr type, parametername, parametervalue; + struct dynstr type, parametername; dynstr_init(&type); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '/') return -1; dynstr_append(&type, '/'); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s == ';') { dynstr_init(¶metername); ++s; - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '=') return -1; - if(!(s = skipwhite(s))) return -1; - if(*s == '"') { - if(!(s = parsestring(s, parametervaluep))) return -1; - } else { - dynstr_init(¶metervalue); - while(*s && !tspecial(*s) && !whitespace(*s)) - dynstr_append(¶metervalue, *s++); - dynstr_terminate(¶metervalue); - *parametervaluep = parametervalue.vec; - } - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; + if(!(s = parseword(s, parametervaluep, tspecial))) return -1; + if(!(s = skipwhite(s, 1))) return -1; dynstr_terminate(¶metername); *parameternamep = parametername.vec; } else @@ -167,10 +254,17 @@ int mime_content_type(const char *s, return 0; } -static int iscrlf(const char *ptr) { - return ptr[0] == '\r' && ptr[1] == '\n'; -} - +/** @brief Parse a MIME message + * @param s Start of message + * @param callback Called for each header field + * @param u Passed to callback + * @return Pointer to decoded body (might be in original string), or NULL on error + * + * This does an RFC 822-style parse and honors Content-Transfer-Encoding as + * described in RFC 2045 + * s6. @p callback is called for each header field encountered, in order, + * with ASCII characters in the header name forced to lower case. + */ const char *mime_parse(const char *s, int (*callback)(const char *name, const char *value, void *u), @@ -183,7 +277,7 @@ const char *mime_parse(const char *s, dynstr_init(&value); while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&name, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return 0; + if(!(s = skipwhite(s, 1))) return 0; if(*s != ':') return 0; ++s; while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t'))) @@ -200,12 +294,13 @@ const char *mime_parse(const char *s, } if(*s) s += 2; if(cte) { - if(!strcmp(cte, "base64")) return mime_base64(s); + if(!strcmp(cte, "base64")) return mime_base64(s, 0); if(!strcmp(cte, "quoted-printable")) return mime_qp(s); } return s; } +/** @brief Match the boundary string */ static int isboundary(const char *ptr, const char *boundary, size_t bl) { return (ptr[0] == '-' && ptr[1] == '-' @@ -213,18 +308,30 @@ static int isboundary(const char *ptr, const char *boundary, size_t bl) { && (iscrlf(ptr + bl + 2) || (ptr[bl + 2] == '-' && ptr[bl + 3] == '-' - && iscrlf(ptr + bl + 4)))); + && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)))); } +/** @brief Match the final boundary string */ static int isfinal(const char *ptr, const char *boundary, size_t bl) { return (ptr[0] == '-' && ptr[1] == '-' && !strncmp(ptr + 2, boundary, bl) && ptr[bl + 2] == '-' && ptr[bl + 3] == '-' - && iscrlf(ptr + bl + 4)); + && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)); } +/** @brief Parse a multipart MIME body + * @param s Start of message + * @param callback Callback for each part + * @param boundary Boundary string + * @param u Passed to callback + * @return 0 on success, non-0 on error + * + * See RFC 2046 + * s5.1. @p callback is called for each part (not yet decoded in any way) + * in succession; you should probably call mime_parse() for each part. + */ int mime_multipart(const char *s, int (*callback)(const char *s, void *u), const char *boundary, @@ -233,12 +340,16 @@ int mime_multipart(const char *s, const char *start, *e; int ret; - if(!isboundary(s, boundary, bl)) return -1; + /* We must start with a boundary string */ + if(!isboundary(s, boundary, bl)) + return -1; + /* Keep going until we hit a final boundary */ while(!isfinal(s, boundary, bl)) { s = strstr(s, "\r\n") + 2; start = s; while(!isboundary(s, boundary, bl)) { - if(!(e = strstr(s, "\r\n"))) return -1; + if(!(e = strstr(s, "\r\n"))) + return -1; s = e + 2; } if((ret = callback(xstrndup(start, @@ -249,39 +360,41 @@ int mime_multipart(const char *s, return 0; } +/** @brief Parse an RFC2388-style content-disposition field + * @param s Start of field + * @param dispositionp Where to store disposition + * @param parameternamep Where to store parameter name + * @param parametervaluep Wher to store parameter value + * @return 0 on success, non-0 on error + * + * See RFC 2388 s3 + * and RFC 2183. + */ int mime_rfc2388_content_disposition(const char *s, char **dispositionp, char **parameternamep, char **parametervaluep) { - struct dynstr disposition, parametername, parametervalue; + struct dynstr disposition, parametername; dynstr_init(&disposition); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&disposition, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s == ';') { dynstr_init(¶metername); ++s; - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '=') return -1; - if(!(s = skipwhite(s))) return -1; - if(*s == '"') { - if(!(s = parsestring(s, parametervaluep))) return -1; - } else { - dynstr_init(¶metervalue); - while(*s && !tspecial(*s) && !whitespace(*s)) - dynstr_append(¶metervalue, *s++); - dynstr_terminate(¶metervalue); - *parametervaluep = parametervalue.vec; - } - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; + if(!(s = parseword(s, parametervaluep, tspecial))) return -1; + if(!(s = skipwhite(s, 1))) return -1; dynstr_terminate(¶metername); *parameternamep = parametername.vec; } else @@ -291,6 +404,13 @@ int mime_rfc2388_content_disposition(const char *s, return 0; } +/** @brief Convert MIME quoted-printable + * @param s Quoted-printable data + * @return Decoded data + * + * See RFC 2045 + * s6.7. + */ char *mime_qp(const char *s) { struct dynstr d; int c, a, b; @@ -333,35 +453,80 @@ char *mime_qp(const char *s) { return d.vec; } -char *mime_base64(const char *s) { - struct dynstr d; - const char *t; - int b[4], n, c; - static const char table[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +/** @brief Parse a RFC2109 Cookie: header + * @param s Header field value + * @param cd Where to store result + * @return 0 on success, non-0 on error + */ +int parse_cookie(const char *s, + struct cookiedata *cd) { + char *n = 0, *v = 0; - dynstr_init(&d); - n = 0; - while((c = (unsigned char)*s++)) { - if((t = strchr(table, c))) { - b[n++] = t - table; - if(n == 4) { - dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); - dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); - dynstr_append(&d, (b[2] << 6) + b[3]); - n = 0; - } - } else if(c == '=') { - if(n >= 2) { - dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); - if(n == 3) - dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); + memset(cd, 0, sizeof *cd); + s = skipwhite(s, 0); + while(*s) { + /* Skip separators */ + if(*s == ';' || *s == ',') { + ++s; + s = skipwhite(s, 0); + continue; + } + if(!(s = parsetoken(s, &n, http_separator))) return -1; + s = skipwhite(s, 0); + if(*s++ != '=') return -1; + s = skipwhite(s, 0); + if(!(s = parseword(s, &v, http_separator))) return -1; + if(n[0] == '$') { + /* Some bit of meta-information */ + if(!strcmp(n, "$Version")) + cd->version = v; + else if(!strcmp(n, "$Path")) { + if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].path == 0) + cd->cookies[cd->ncookies-1].path = v; + else { + error(0, "redundant $Path in Cookie: header"); + return -1; + } + } else if(!strcmp(n, "$Domain")) { + if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].domain == 0) + cd->cookies[cd->ncookies-1].domain = v; + else { + error(0, "redundant $Domain in Cookie: header"); + return -1; + } } - break; + } else { + /* It's a new cookie */ + cd->cookies = xrealloc(cd->cookies, + (cd->ncookies + 1) * sizeof (struct cookie)); + cd->cookies[cd->ncookies].name = n; + cd->cookies[cd->ncookies].value = v; + cd->cookies[cd->ncookies].path = 0; + cd->cookies[cd->ncookies].domain = 0; + ++cd->ncookies; + } + s = skipwhite(s, 0); + if(*s && (*s != ',' && *s != ';')) { + error(0, "missing separator in Cookie: header"); + return -1; } } - dynstr_terminate(&d); - return d.vec; + return 0; +} + +/** @brief Find a named cookie + * @param cd Parse cookie data + * @param name Name of cookie + * @return Cookie structure or NULL if not found + */ +const struct cookie *find_cookie(const struct cookiedata *cd, + const char *name) { + int n; + + for(n = 0; n < cd->ncookies; ++n) + if(!strcmp(cd->cookies[n].name, name)) + return &cd->cookies[n]; + return 0; } /*