X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/blobdiff_plain/39d4aa6b1c612305bf06760529e2a1532b9818a5..16fb2830d52c1420afdee555a566d72a065d9616:/lib/mime.c diff --git a/lib/mime.c b/lib/mime.c index c8ffe31..6bff61d 100644 --- a/lib/mime.c +++ b/lib/mime.c @@ -1,30 +1,26 @@ /* * This file is part of DisOrder - * Copyright (C) 2005, 2007 Richard Kettlewell + * Copyright (C) 2005, 2007, 2008 Richard Kettlewell * - * This program is free software; you can redistribute it and/or modify + * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA + * along with this program. If not, see . */ /** @file lib/mime.c * @brief Support for MIME and allied protocols */ -#include -#include "types.h" +#include "common.h" -#include #include #include "mem.h" @@ -32,6 +28,8 @@ #include "vector.h" #include "hex.h" #include "log.h" +#include "base64.h" +#include "kvp.h" /** @brief Match whitespace characters */ static int whitespace(int c) { @@ -47,7 +45,7 @@ static int whitespace(int c) { } /** @brief Match RFC2045 tspecial characters */ -static int tspecial(int c) { +int mime_tspecial(int c) { switch(c) { case '(': case ')': @@ -70,8 +68,8 @@ static int tspecial(int c) { } } -/** @brief Mathc RFC2616 seprator characters */ -static int http_separator(int c) { +/** @brief Match RFC2616 separator characters */ +int mime_http_separator(int c) { switch(c) { case '(': case ')': @@ -104,7 +102,9 @@ static int iscrlf(const char *ptr) { } /** @brief Skip whitespace + * @param s Pointer into string * @param rfc822_comments If true, skip RFC822 nested comments + * @return Pointer into string after whitespace */ static const char *skipwhite(const char *s, int rfc822_comments) { int c, depth; @@ -128,12 +128,14 @@ static const char *skipwhite(const char *s, int rfc822_comments) { case '(': ++depth; break; case ')': --depth; break; case '\\': - if(!*s) return 0; + if(!*s) + return 0; ++s; break; } } - if(depth) return 0; + if(depth) + return 0; break; default: return s; @@ -143,7 +145,7 @@ static const char *skipwhite(const char *s, int rfc822_comments) { /** @brief Test for a word character * @param c Character to test - * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @param special mime_tspecial() (MIME/RFC2405) or mime_http_separator() (HTTP/RFC2616) * @return 1 if @p c is a word character, else 0 */ static int iswordchar(int c, int (*special)(int)) { @@ -153,13 +155,13 @@ static int iswordchar(int c, int (*special)(int)) { /** @brief Parse an RFC1521/RFC2616 word * @param s Pointer to start of word * @param valuep Where to store value - * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @param special mime_tspecial() (MIME/RFC2405) or mime_http_separator() (HTTP/RFC2616) * @return Pointer just after end of word or NULL if there's no word * * A word is a token or a quoted-string. */ -static const char *parseword(const char *s, char **valuep, - int (*special)(int)) { +const char *mime_parse_word(const char *s, char **valuep, + int (*special)(int)) { struct dynstr value[1]; int c; @@ -169,13 +171,15 @@ static const char *parseword(const char *s, char **valuep, while((c = *s++) != '"') { switch(c) { case '\\': - if(!(c = *s++)) return 0; + if(!(c = *s++)) + return 0; default: dynstr_append(value, c); break; } } - if(!c) return 0; + if(!c) + return 0; } else { if(!iswordchar((unsigned char)*s, special)) return NULL; @@ -191,59 +195,75 @@ static const char *parseword(const char *s, char **valuep, /** @brief Parse an RFC1521/RFC2616 token * @param s Pointer to start of token * @param valuep Where to store value - * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @param special mime_tspecial() (MIME/RFC2405) or mime_http_separator() (HTTP/RFC2616) * @return Pointer just after end of token or NULL if there's no token */ static const char *parsetoken(const char *s, char **valuep, int (*special)(int)) { - if(*s == '"') return 0; - return parseword(s, valuep, special); + if(*s == '"') + return 0; + return mime_parse_word(s, valuep, special); } /** @brief Parse a MIME content-type field * @param s Start of field * @param typep Where to store type - * @param parameternamep Where to store parameter name - * @param parameternvaluep Wher to store parameter value + * @param parametersp Where to store parameter list * @return 0 on success, non-0 on error + * + * See RFC 2045 s5. */ int mime_content_type(const char *s, char **typep, - char **parameternamep, - char **parametervaluep) { + struct kvp **parametersp) { struct dynstr type, parametername; + struct kvp *parameters = 0; + char *parametervalue; dynstr_init(&type); - if(!(s = skipwhite(s, 1))) return -1; - if(!*s) return -1; - while(*s && !tspecial(*s) && !whitespace(*s)) + if(!(s = skipwhite(s, 1))) + return -1; + if(!*s) + return -1; + while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s, 1))) return -1; - if(*s++ != '/') return -1; + if(!(s = skipwhite(s, 1))) + return -1; + if(*s++ != '/') + return -1; dynstr_append(&type, '/'); - if(!(s = skipwhite(s, 1))) return -1; - while(*s && !tspecial(*s) && !whitespace(*s)) + if(!(s = skipwhite(s, 1))) + return -1; + while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s, 1))) return -1; + if(!(s = skipwhite(s, 1))) + return -1; - if(*s == ';') { + while(*s == ';') { dynstr_init(¶metername); ++s; - if(!(s = skipwhite(s, 1))) return -1; - if(!*s) return -1; - while(*s && !tspecial(*s) && !whitespace(*s)) + if(!(s = skipwhite(s, 1))) + return -1; + if(!*s) + return -1; + while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s, 1))) return -1; - if(*s++ != '=') return -1; - if(!(s = skipwhite(s, 1))) return -1; - if(!(s = parseword(s, parametervaluep, tspecial))) return -1; - if(!(s = skipwhite(s, 1))) return -1; + if(!(s = skipwhite(s, 1))) + return -1; + if(*s++ != '=') + return -1; + if(!(s = skipwhite(s, 1))) + return -1; + if(!(s = mime_parse_word(s, ¶metervalue, mime_tspecial))) + return -1; + if(!(s = skipwhite(s, 1))) + return -1; dynstr_terminate(¶metername); - *parameternamep = parametername.vec; - } else - *parametervaluep = *parameternamep = 0; + kvp_set(¶meters, parametername.vec, parametervalue); + } dynstr_terminate(&type); *typep = type.vec; + *parametersp = parameters; return 0; } @@ -251,7 +271,12 @@ int mime_content_type(const char *s, * @param s Start of message * @param callback Called for each header field * @param u Passed to callback - * @return Pointer to decoded body (might be in original string) + * @return Pointer to decoded body (might be in original string), or NULL on error + * + * This does an RFC 822-style parse and honors Content-Transfer-Encoding as + * described in RFC 2045 + * s6. @p callback is called for each header field encountered, in order, + * with ASCII characters in the header name forced to lower case. */ const char *mime_parse(const char *s, int (*callback)(const char *name, const char *value, @@ -263,14 +288,27 @@ const char *mime_parse(const char *s, while(*s && !iscrlf(s)) { dynstr_init(&name); dynstr_init(&value); - while(*s && !tspecial(*s) && !whitespace(*s)) + while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(&name, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s, 1))) return 0; - if(*s != ':') return 0; + if(!(s = skipwhite(s, 1))) + return 0; + if(*s != ':') + return 0; ++s; - while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t'))) - dynstr_append(&value, *s++); - if(*s) ++s; + while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t'))) { + const int c = *s++; + /* Strip leading whitespace */ + if(value.nvec || !(c == ' ' || c == '\t' || c == '\n' || c == '\r')) + dynstr_append(&value, c); + } + /* Strip trailing whitespace */ + while(value.nvec > 0 && (value.vec[value.nvec - 1] == ' ' + || value.vec[value.nvec - 1] == '\t' + || value.vec[value.nvec - 1] == '\n' + || value.vec[value.nvec - 1] == '\r')) + --value.nvec; + if(*s) + ++s; dynstr_terminate(&name); dynstr_terminate(&value); if(!strcmp(name.vec, "content-transfer-encoding")) { @@ -278,16 +316,25 @@ const char *mime_parse(const char *s, for(p = cte; *p; p++) *p = tolower((unsigned char)*p); } - if(callback(name.vec, value.vec, u)) return 0; + if(callback(name.vec, value.vec, u)) + return 0; } - if(*s) s += 2; + if(*s) + s += 2; if(cte) { - if(!strcmp(cte, "base64")) return mime_base64(s); - if(!strcmp(cte, "quoted-printable")) return mime_qp(s); + if(!strcmp(cte, "base64")) + return mime_base64(s, 0); + if(!strcmp(cte, "quoted-printable")) + return mime_qp(s); + if(!strcmp(cte, "7bit") || !strcmp(cte, "8bit")) + return s; + error(0, "unknown content-transfer-encoding '%s'", cte); + return 0; } return s; } +/** @brief Match the boundary string */ static int isboundary(const char *ptr, const char *boundary, size_t bl) { return (ptr[0] == '-' && ptr[1] == '-' @@ -295,24 +342,29 @@ static int isboundary(const char *ptr, const char *boundary, size_t bl) { && (iscrlf(ptr + bl + 2) || (ptr[bl + 2] == '-' && ptr[bl + 3] == '-' - && iscrlf(ptr + bl + 4)))); + && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)))); } +/** @brief Match the final boundary string */ static int isfinal(const char *ptr, const char *boundary, size_t bl) { return (ptr[0] == '-' && ptr[1] == '-' && !strncmp(ptr + 2, boundary, bl) && ptr[bl + 2] == '-' && ptr[bl + 3] == '-' - && iscrlf(ptr + bl + 4)); + && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)); } /** @brief Parse a multipart MIME body * @param s Start of message - * @param callback CAllback for each part + * @param callback Callback for each part * @param boundary Boundary string * @param u Passed to callback * @return 0 on success, non-0 on error + * + * See RFC 2046 + * s5.1. @p callback is called for each part (not yet decoded in any way) + * in succession; you should probably call mime_parse() for each part. */ int mime_multipart(const char *s, int (*callback)(const char *s, void *u), @@ -322,12 +374,20 @@ int mime_multipart(const char *s, const char *start, *e; int ret; - if(!isboundary(s, boundary, bl)) return -1; + /* We must start with a boundary string */ + if(!isboundary(s, boundary, bl)) { + error(0, "mime_multipart: first line is not the boundary string"); + return -1; + } + /* Keep going until we hit a final boundary */ while(!isfinal(s, boundary, bl)) { s = strstr(s, "\r\n") + 2; start = s; while(!isboundary(s, boundary, bl)) { - if(!(e = strstr(s, "\r\n"))) return -1; + if(!(e = strstr(s, "\r\n"))) { + error(0, "mime_multipart: line does not end CRLF"); + return -1; + } s = e + 2; } if((ret = callback(xstrndup(start, @@ -340,10 +400,13 @@ int mime_multipart(const char *s, /** @brief Parse an RFC2388-style content-disposition field * @param s Start of field - * @param typep Where to store type + * @param dispositionp Where to store disposition * @param parameternamep Where to store parameter name - * @param parameternvaluep Wher to store parameter value + * @param parametervaluep Wher to store parameter value * @return 0 on success, non-0 on error + * + * See RFC 2388 s3 + * and RFC 2183. */ int mime_rfc2388_content_disposition(const char *s, char **dispositionp, @@ -352,24 +415,34 @@ int mime_rfc2388_content_disposition(const char *s, struct dynstr disposition, parametername; dynstr_init(&disposition); - if(!(s = skipwhite(s, 1))) return -1; - if(!*s) return -1; - while(*s && !tspecial(*s) && !whitespace(*s)) + if(!(s = skipwhite(s, 1))) + return -1; + if(!*s) + return -1; + while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(&disposition, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s, 1))) return -1; + if(!(s = skipwhite(s, 1))) + return -1; if(*s == ';') { dynstr_init(¶metername); ++s; - if(!(s = skipwhite(s, 1))) return -1; - if(!*s) return -1; - while(*s && !tspecial(*s) && !whitespace(*s)) + if(!(s = skipwhite(s, 1))) + return -1; + if(!*s) + return -1; + while(*s && !mime_tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s, 1))) return -1; - if(*s++ != '=') return -1; - if(!(s = skipwhite(s, 1))) return -1; - if(!(s = parseword(s, parametervaluep, tspecial))) return -1; - if(!(s = skipwhite(s, 1))) return -1; + if(!(s = skipwhite(s, 1))) + return -1; + if(*s++ != '=') + return -1; + if(!(s = skipwhite(s, 1))) + return -1; + if(!(s = mime_parse_word(s, parametervaluep, mime_tspecial))) + return -1; + if(!(s = skipwhite(s, 1))) + return -1; dynstr_terminate(¶metername); *parameternamep = parametername.vec; } else @@ -382,6 +455,9 @@ int mime_rfc2388_content_disposition(const char *s, /** @brief Convert MIME quoted-printable * @param s Quoted-printable data * @return Decoded data + * + * See RFC 2045 + * s6.7. */ char *mime_qp(const char *s) { struct dynstr d; @@ -425,45 +501,56 @@ char *mime_qp(const char *s) { return d.vec; } -/** @brief Convert MIME base64 - * @param s base64 data - * @return Decoded data +/** @brief Match cookie separator characters + * + * This is a subset of the RFC2616 specials, and technically is in breach of + * the specification. However rejecting (in particular) slashes is + * unreasonably strict and has broken at least one (admittedly somewhat + * obscure) browser, so we're more forgiving. */ -char *mime_base64(const char *s) { - struct dynstr d; - const char *t; - int b[4], n, c; - static const char table[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static int cookie_separator(int c) { + switch(c) { + case '(': + case ')': + case ',': + case ';': + case '=': + case ' ': + case '"': + case '\t': + return 1; - dynstr_init(&d); - n = 0; - while((c = (unsigned char)*s++)) { - if((t = strchr(table, c))) { - b[n++] = t - table; - if(n == 4) { - dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); - dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); - dynstr_append(&d, (b[2] << 6) + b[3]); - n = 0; - } - } else if(c == '=') { - if(n >= 2) { - dynstr_append(&d, (b[0] << 2) + (b[1] >> 4)); - if(n == 3) - dynstr_append(&d, (b[1] << 4) + (b[2] >> 2)); - } - break; - } + default: + return 0; + } +} + +/** @brief Match cookie value separator characters + * + * Same as cookie_separator() but allows for @c = in cookie values. + */ +static int cookie_value_separator(int c) { + switch(c) { + case '(': + case ')': + case ',': + case ';': + case ' ': + case '"': + case '\t': + return 1; + + default: + return 0; } - dynstr_terminate(&d); - return d.vec; } /** @brief Parse a RFC2109 Cookie: header * @param s Header field value * @param cd Where to store result * @return 0 on success, non-0 on error + * + * See RFC 2109. */ int parse_cookie(const char *s, struct cookiedata *cd) { @@ -478,11 +565,20 @@ int parse_cookie(const char *s, s = skipwhite(s, 0); continue; } - if(!(s = parsetoken(s, &n, http_separator))) return -1; + if(!(s = parsetoken(s, &n, cookie_separator))) { + error(0, "parse_cookie: cannot parse attribute name"); + return -1; + } s = skipwhite(s, 0); - if(*s++ != '=') return -1; + if(*s++ != '=') { + error(0, "parse_cookie: did not find expected '='"); + return -1; + } s = skipwhite(s, 0); - if(!(s = parseword(s, &v, http_separator))) return -1; + if(!(s = mime_parse_word(s, &v, cookie_value_separator))) { + error(0, "parse_cookie: cannot parse value for '%s'", n); + return -1; + } if(n[0] == '$') { /* Some bit of meta-information */ if(!strcmp(n, "$Version")) @@ -536,6 +632,132 @@ const struct cookie *find_cookie(const struct cookiedata *cd, return 0; } +/** @brief RFC822 quoting + * @param s String to quote + * @param force If non-0, always quote + * @return Possibly quoted string + */ +char *quote822(const char *s, int force) { + const char *t; + struct dynstr d[1]; + int c; + + if(!force) { + /* See if we need to quote */ + for(t = s; (c = (unsigned char)*t); ++t) { + if(mime_tspecial(c) || mime_http_separator(c) || whitespace(c)) + break; + } + if(*t) + force = 1; + } + + if(!force) + return xstrdup(s); + + dynstr_init(d); + dynstr_append(d, '"'); + for(t = s; (c = (unsigned char)*t); ++t) { + if(c == '"' || c == '\\') + dynstr_append(d, '\\'); + dynstr_append(d, c); + } + dynstr_append(d, '"'); + dynstr_terminate(d); + return d->vec; +} + +/** @brief Return true if @p ptr points at trailing space */ +static int is_trailing_space(const char *ptr) { + if(*ptr == ' ' || *ptr == '\t') { + while(*ptr == ' ' || *ptr == '\t') + ++ptr; + return *ptr == '\n' || *ptr == 0; + } else + return 0; +} + +/** @brief Encoding text as quoted-printable + * @param text String to encode + * @return Encoded string + * + * See RFC2045 + * s6.7. + */ +char *mime_to_qp(const char *text) { + struct dynstr d[1]; + int linelength = 0; /* length of current line */ + char buffer[10]; + + dynstr_init(d); + /* The rules are: + * 1. Anything except newline can be replaced with =%02X + * 2. Newline, 33-60 and 62-126 stand for themselves (i.e. not '=') + * 3. Non-trailing space/tab stand for themselves. + * 4. Output lines are limited to 76 chars, with = being used + * as a soft line break + * 5. Newlines aren't counted towards the 76 char limit. + */ + while(*text) { + const int c = (unsigned char)*text; + if(c == '\n') { + /* Newline stands as itself */ + dynstr_append(d, '\n'); + linelength = 0; + } else if((c >= 33 && c <= 126 && c != '=') + || ((c == ' ' || c == '\t') + && !is_trailing_space(text))) { + /* Things that can stand for themselves */ + dynstr_append(d, c); + ++linelength; + } else { + /* Anything else that needs encoding */ + snprintf(buffer, sizeof buffer, "=%02X", c); + dynstr_append_string(d, buffer); + linelength += 3; + } + ++text; + if(linelength > 73 && *text && *text != '\n') { + /* Next character might overflow 76 character limit if encoded, so we + * insert a soft break */ + dynstr_append_string(d, "=\n"); + linelength = 0; + } + } + /* Ensure there is a final newline */ + if(linelength) + dynstr_append(d, '\n'); + /* That's all */ + dynstr_terminate(d); + return d->vec; +} + +/** @brief Encode text + * @param text Underlying UTF-8 text + * @param charsetp Where to store charset string + * @param encodingp Where to store encoding string + * @return Encoded text (might be @p text) + */ +const char *mime_encode_text(const char *text, + const char **charsetp, + const char **encodingp) { + const char *ptr; + + /* See if there are in fact any non-ASCII characters */ + for(ptr = text; *ptr; ++ptr) + if((unsigned char)*ptr >= 128) + break; + if(!*ptr) { + /* Plain old ASCII, no encoding required */ + *charsetp = "us-ascii"; + *encodingp = "7bit"; + return text; + } + *charsetp = "utf-8"; + *encodingp = "quoted-printable"; + return mime_to_qp(text); +} + /* Local Variables: c-basic-offset:2