From 39d4aa6b1c612305bf06760529e2a1532b9818a5 Mon Sep 17 00:00:00 2001 Message-Id: <39d4aa6b1c612305bf06760529e2a1532b9818a5.1715337865.git.mdw@distorted.org.uk> From: Mark Wooding Date: Sun, 25 Nov 2007 14:55:00 +0000 Subject: [PATCH] cookie header parser Organization: Straylight/Edgeware From: Richard Kettlewell --- lib/mime.c | 284 ++++++++++++++++++++++++++++++++++++++++++----------- lib/mime.h | 39 +++++++- lib/test.c | 47 +++++++++ 3 files changed, 313 insertions(+), 57 deletions(-) diff --git a/lib/mime.c b/lib/mime.c index b188214..c8ffe31 100644 --- a/lib/mime.c +++ b/lib/mime.c @@ -1,6 +1,6 @@ /* * This file is part of DisOrder - * Copyright (C) 2005 Richard Kettlewell + * Copyright (C) 2005, 2007 Richard Kettlewell * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,7 +17,9 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ - +/** @file lib/mime.c + * @brief Support for MIME and allied protocols + */ #include #include "types.h" @@ -29,7 +31,9 @@ #include "mime.h" #include "vector.h" #include "hex.h" +#include "log.h" +/** @brief Match whitespace characters */ static int whitespace(int c) { switch(c) { case ' ': @@ -42,6 +46,7 @@ static int whitespace(int c) { } } +/** @brief Match RFC2045 tspecial characters */ static int tspecial(int c) { switch(c) { case '(': @@ -65,7 +70,43 @@ static int tspecial(int c) { } } -static const char *skipwhite(const char *s) { +/** @brief Mathc RFC2616 seprator characters */ +static int http_separator(int c) { + switch(c) { + case '(': + case ')': + case '<': + case '>': + case '@': + case ',': + case ';': + case ':': + case '\\': + case '"': + case '/': + case '[': + case ']': + case '?': + case '=': + case '{': + case '}': + case ' ': + case '\t': + return 1; + default: + return 0; + } +} + +/** @brief Match CRLF */ +static int iscrlf(const char *ptr) { + return ptr[0] == '\r' && ptr[1] == '\n'; +} + +/** @brief Skip whitespace + * @param rfc822_comments If true, skip RFC822 nested comments + */ +static const char *skipwhite(const char *s, int rfc822_comments) { int c, depth; for(;;) { @@ -77,6 +118,8 @@ static const char *skipwhite(const char *s) { ++s; break; case '(': + if(!rfc822_comments) + return s; ++s; depth = 1; while(*s && depth) { @@ -98,66 +141,103 @@ static const char *skipwhite(const char *s) { } } -static const char *parsestring(const char *s, char **valuep) { - struct dynstr value; +/** @brief Test for a word character + * @param c Character to test + * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @return 1 if @p c is a word character, else 0 + */ +static int iswordchar(int c, int (*special)(int)) { + return !(c <= ' ' || c > '~' || special(c)); +} + +/** @brief Parse an RFC1521/RFC2616 word + * @param s Pointer to start of word + * @param valuep Where to store value + * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @return Pointer just after end of word or NULL if there's no word + * + * A word is a token or a quoted-string. + */ +static const char *parseword(const char *s, char **valuep, + int (*special)(int)) { + struct dynstr value[1]; int c; - dynstr_init(&value); - ++s; - while((c = *s++) != '"') { - switch(c) { - case '\\': - if(!(c = *s++)) return 0; - default: - dynstr_append(&value, c); - break; + dynstr_init(value); + if(*s == '"') { + ++s; + while((c = *s++) != '"') { + switch(c) { + case '\\': + if(!(c = *s++)) return 0; + default: + dynstr_append(value, c); + break; + } } + if(!c) return 0; + } else { + if(!iswordchar((unsigned char)*s, special)) + return NULL; + dynstr_init(value); + while(iswordchar((unsigned char)*s, special)) + dynstr_append(value, *s++); } - if(!c) return 0; - dynstr_terminate(&value); - *valuep = value.vec; + dynstr_terminate(value); + *valuep = value->vec; return s; } +/** @brief Parse an RFC1521/RFC2616 token + * @param s Pointer to start of token + * @param valuep Where to store value + * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) + * @return Pointer just after end of token or NULL if there's no token + */ +static const char *parsetoken(const char *s, char **valuep, + int (*special)(int)) { + if(*s == '"') return 0; + return parseword(s, valuep, special); +} + +/** @brief Parse a MIME content-type field + * @param s Start of field + * @param typep Where to store type + * @param parameternamep Where to store parameter name + * @param parameternvaluep Wher to store parameter value + * @return 0 on success, non-0 on error + */ int mime_content_type(const char *s, char **typep, char **parameternamep, char **parametervaluep) { - struct dynstr type, parametername, parametervalue; + struct dynstr type, parametername; dynstr_init(&type); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '/') return -1; dynstr_append(&type, '/'); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&type, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s == ';') { dynstr_init(¶metername); ++s; - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '=') return -1; - if(!(s = skipwhite(s))) return -1; - if(*s == '"') { - if(!(s = parsestring(s, parametervaluep))) return -1; - } else { - dynstr_init(¶metervalue); - while(*s && !tspecial(*s) && !whitespace(*s)) - dynstr_append(¶metervalue, *s++); - dynstr_terminate(¶metervalue); - *parametervaluep = parametervalue.vec; - } - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; + if(!(s = parseword(s, parametervaluep, tspecial))) return -1; + if(!(s = skipwhite(s, 1))) return -1; dynstr_terminate(¶metername); *parameternamep = parametername.vec; } else @@ -167,10 +247,12 @@ int mime_content_type(const char *s, return 0; } -static int iscrlf(const char *ptr) { - return ptr[0] == '\r' && ptr[1] == '\n'; -} - +/** @brief Parse a MIME message + * @param s Start of message + * @param callback Called for each header field + * @param u Passed to callback + * @return Pointer to decoded body (might be in original string) + */ const char *mime_parse(const char *s, int (*callback)(const char *name, const char *value, void *u), @@ -183,7 +265,7 @@ const char *mime_parse(const char *s, dynstr_init(&value); while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&name, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return 0; + if(!(s = skipwhite(s, 1))) return 0; if(*s != ':') return 0; ++s; while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t'))) @@ -225,6 +307,13 @@ static int isfinal(const char *ptr, const char *boundary, size_t bl) { && iscrlf(ptr + bl + 4)); } +/** @brief Parse a multipart MIME body + * @param s Start of message + * @param callback CAllback for each part + * @param boundary Boundary string + * @param u Passed to callback + * @return 0 on success, non-0 on error + */ int mime_multipart(const char *s, int (*callback)(const char *s, void *u), const char *boundary, @@ -249,39 +338,38 @@ int mime_multipart(const char *s, return 0; } +/** @brief Parse an RFC2388-style content-disposition field + * @param s Start of field + * @param typep Where to store type + * @param parameternamep Where to store parameter name + * @param parameternvaluep Wher to store parameter value + * @return 0 on success, non-0 on error + */ int mime_rfc2388_content_disposition(const char *s, char **dispositionp, char **parameternamep, char **parametervaluep) { - struct dynstr disposition, parametername, parametervalue; + struct dynstr disposition, parametername; dynstr_init(&disposition); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(&disposition, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s == ';') { dynstr_init(¶metername); ++s; - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(!*s) return -1; while(*s && !tspecial(*s) && !whitespace(*s)) dynstr_append(¶metername, tolower((unsigned char)*s++)); - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; if(*s++ != '=') return -1; - if(!(s = skipwhite(s))) return -1; - if(*s == '"') { - if(!(s = parsestring(s, parametervaluep))) return -1; - } else { - dynstr_init(¶metervalue); - while(*s && !tspecial(*s) && !whitespace(*s)) - dynstr_append(¶metervalue, *s++); - dynstr_terminate(¶metervalue); - *parametervaluep = parametervalue.vec; - } - if(!(s = skipwhite(s))) return -1; + if(!(s = skipwhite(s, 1))) return -1; + if(!(s = parseword(s, parametervaluep, tspecial))) return -1; + if(!(s = skipwhite(s, 1))) return -1; dynstr_terminate(¶metername); *parameternamep = parametername.vec; } else @@ -291,6 +379,10 @@ int mime_rfc2388_content_disposition(const char *s, return 0; } +/** @brief Convert MIME quoted-printable + * @param s Quoted-printable data + * @return Decoded data + */ char *mime_qp(const char *s) { struct dynstr d; int c, a, b; @@ -333,6 +425,10 @@ char *mime_qp(const char *s) { return d.vec; } +/** @brief Convert MIME base64 + * @param s base64 data + * @return Decoded data + */ char *mime_base64(const char *s) { struct dynstr d; const char *t; @@ -364,6 +460,82 @@ char *mime_base64(const char *s) { return d.vec; } +/** @brief Parse a RFC2109 Cookie: header + * @param s Header field value + * @param cd Where to store result + * @return 0 on success, non-0 on error + */ +int parse_cookie(const char *s, + struct cookiedata *cd) { + char *n = 0, *v = 0; + + memset(cd, 0, sizeof *cd); + s = skipwhite(s, 0); + while(*s) { + /* Skip separators */ + if(*s == ';' || *s == ',') { + ++s; + s = skipwhite(s, 0); + continue; + } + if(!(s = parsetoken(s, &n, http_separator))) return -1; + s = skipwhite(s, 0); + if(*s++ != '=') return -1; + s = skipwhite(s, 0); + if(!(s = parseword(s, &v, http_separator))) return -1; + if(n[0] == '$') { + /* Some bit of meta-information */ + if(!strcmp(n, "$Version")) + cd->version = v; + else if(!strcmp(n, "$Path")) { + if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].path == 0) + cd->cookies[cd->ncookies-1].path = v; + else { + error(0, "redundant $Path in Cookie: header"); + return -1; + } + } else if(!strcmp(n, "$Domain")) { + if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].domain == 0) + cd->cookies[cd->ncookies-1].domain = v; + else { + error(0, "redundant $Domain in Cookie: header"); + return -1; + } + } + } else { + /* It's a new cookie */ + cd->cookies = xrealloc(cd->cookies, + (cd->ncookies + 1) * sizeof (struct cookie)); + cd->cookies[cd->ncookies].name = n; + cd->cookies[cd->ncookies].value = v; + cd->cookies[cd->ncookies].path = 0; + cd->cookies[cd->ncookies].domain = 0; + ++cd->ncookies; + } + s = skipwhite(s, 0); + if(*s && (*s != ',' && *s != ';')) { + error(0, "missing separator in Cookie: header"); + return -1; + } + } + return 0; +} + +/** @brief Find a named cookie + * @param cd Parse cookie data + * @param name Name of cookie + * @return Cookie structure or NULL if not found + */ +const struct cookie *find_cookie(const struct cookiedata *cd, + const char *name) { + int n; + + for(n = 0; n < cd->ncookies; ++n) + if(!strcmp(cd->cookies[n].name, name)) + return &cd->cookies[n]; + return 0; +} + /* Local Variables: c-basic-offset:2 diff --git a/lib/mime.h b/lib/mime.h index 177d4a4..3300989 100644 --- a/lib/mime.h +++ b/lib/mime.h @@ -1,6 +1,6 @@ /* * This file is part of DisOrder - * Copyright (C) 2005 Richard Kettlewell + * Copyright (C) 2005, 2007 Richard Kettlewell * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,6 +17,9 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ +/** @file lib/mime.h + * @brief Support for MIME and allied protocols + */ #ifndef MIME_H #define MIME_H @@ -54,6 +57,40 @@ char *mime_qp(const char *s); char *mime_base64(const char *s); /* convert quoted-printable or base64 data */ +/** @brief Parsed form of an HTTP Cookie: header field */ +struct cookiedata { + /** @brief @c $Version or NULL if not set */ + char *version; + + /** @brief List of cookies */ + struct cookie *cookies; + + /** @brief Number of cookies */ + int ncookies; +}; + +/** @brief A parsed cookie */ +struct cookie { + /** @brief Cookie name */ + char *name; + + /** @brief Cookie value */ + char *value; + + /** @brief Cookie path */ + char *path; + + /** @brief Cookie domain */ + char *domain; + +}; + +int parse_cookie(const char *s, + struct cookiedata *cd); +const struct cookie *find_cookie(const struct cookiedata *cd, + const char *name); + + #endif /* MIME_H */ /* diff --git a/lib/test.c b/lib/test.c index f4043b9..3ae97c8 100644 --- a/lib/test.c +++ b/lib/test.c @@ -307,6 +307,52 @@ static void test_mime(void) { "\x04\x10\x41" "\x04\x10"); } +static void test_cookies(void) { + struct cookiedata cd[1]; + + fprintf(stderr, "test_cookies\n"); + + /* These are the examples from RFC2109 */ + insist(!parse_cookie("$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\"", cd)); + insist(!strcmp(cd->version, "1")); + insist(cd->ncookies = 1); + insist(find_cookie(cd, "Customer") == &cd->cookies[0]); + check_string(cd->cookies[0].value, "WILE_E_COYOTE"); + check_string(cd->cookies[0].path, "/acme"); + insist(cd->cookies[0].domain == 0); + insist(!parse_cookie("$Version=\"1\";\n" + "Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\";\n" + "Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\"", + cd)); + insist(cd->ncookies = 2); + insist(find_cookie(cd, "Customer") == &cd->cookies[0]); + insist(find_cookie(cd, "Part_Number") == &cd->cookies[1]); + check_string(cd->cookies[0].value, "WILE_E_COYOTE"); + check_string(cd->cookies[0].path, "/acme"); + insist(cd->cookies[0].domain == 0); + check_string(cd->cookies[1].value, "Rocket_Launcher_0001"); + check_string(cd->cookies[1].path, "/acme"); + insist(cd->cookies[1].domain == 0); + insist(!parse_cookie("$Version=\"1\";\n" + "Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\";\n" + "Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\";\n" + "Shipping=\"FedEx\"; $Path=\"/acme\"", + cd)); + insist(cd->ncookies = 3); + insist(find_cookie(cd, "Customer") == &cd->cookies[0]); + insist(find_cookie(cd, "Part_Number") == &cd->cookies[1]); + insist(find_cookie(cd, "Shipping") == &cd->cookies[2]); + check_string(cd->cookies[0].value, "WILE_E_COYOTE"); + check_string(cd->cookies[0].path, "/acme"); + insist(cd->cookies[0].domain == 0); + check_string(cd->cookies[1].value, "Rocket_Launcher_0001"); + check_string(cd->cookies[1].path, "/acme"); + insist(cd->cookies[1].domain == 0); + check_string(cd->cookies[2].value, "FedEx"); + check_string(cd->cookies[2].path, "/acme"); + insist(cd->cookies[2].domain == 0); +} + static void test_hex(void) { unsigned n; static const unsigned char h[] = { 0x00, 0xFF, 0x80, 0x7F }; @@ -703,6 +749,7 @@ int main(void) { /* mem.c */ /* mime.c */ test_mime(); + test_cookies(); /* mixer.c */ /* plugin.c */ /* printf.c */ -- [mdw]