Commit | Line | Data |
---|---|---|
460b9539 | 1 | /* |
2 | * This file is part of DisOrder | |
39d4aa6b | 3 | * Copyright (C) 2005, 2007 Richard Kettlewell |
460b9539 | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License as published by | |
7 | * the Free Software Foundation; either version 2 of the License, or | |
8 | * (at your option) any later version. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License | |
16 | * along with this program; if not, write to the Free Software | |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | |
18 | * USA | |
19 | */ | |
39d4aa6b RK |
20 | /** @file lib/mime.c |
21 | * @brief Support for MIME and allied protocols | |
22 | */ | |
460b9539 | 23 | |
24 | #include <config.h> | |
25 | #include "types.h" | |
26 | ||
27 | #include <string.h> | |
28 | #include <ctype.h> | |
29 | ||
22896b25 RK |
30 | #include <stdio.h> |
31 | ||
460b9539 | 32 | #include "mem.h" |
33 | #include "mime.h" | |
34 | #include "vector.h" | |
35 | #include "hex.h" | |
39d4aa6b | 36 | #include "log.h" |
fce810c2 | 37 | #include "base64.h" |
460b9539 | 38 | |
39d4aa6b | 39 | /** @brief Match whitespace characters */ |
460b9539 | 40 | static int whitespace(int c) { |
41 | switch(c) { | |
42 | case ' ': | |
43 | case '\t': | |
44 | case '\r': | |
45 | case '\n': | |
46 | return 1; | |
47 | default: | |
48 | return 0; | |
49 | } | |
50 | } | |
51 | ||
39d4aa6b | 52 | /** @brief Match RFC2045 tspecial characters */ |
460b9539 | 53 | static int tspecial(int c) { |
54 | switch(c) { | |
55 | case '(': | |
56 | case ')': | |
57 | case '<': | |
58 | case '>': | |
59 | case '@': | |
60 | case ',': | |
61 | case ';': | |
62 | case ':': | |
63 | case '\\': | |
64 | case '"': | |
65 | case '/': | |
66 | case '[': | |
67 | case ']': | |
68 | case '?': | |
69 | case '=': | |
70 | return 1; | |
71 | default: | |
72 | return 0; | |
73 | } | |
74 | } | |
75 | ||
5818980a | 76 | /** @brief Match RFC2616 seprator characters */ |
39d4aa6b RK |
77 | static int http_separator(int c) { |
78 | switch(c) { | |
79 | case '(': | |
80 | case ')': | |
81 | case '<': | |
82 | case '>': | |
83 | case '@': | |
84 | case ',': | |
85 | case ';': | |
86 | case ':': | |
87 | case '\\': | |
88 | case '"': | |
89 | case '/': | |
90 | case '[': | |
91 | case ']': | |
92 | case '?': | |
93 | case '=': | |
94 | case '{': | |
95 | case '}': | |
96 | case ' ': | |
97 | case '\t': | |
98 | return 1; | |
99 | default: | |
100 | return 0; | |
101 | } | |
102 | } | |
103 | ||
104 | /** @brief Match CRLF */ | |
105 | static int iscrlf(const char *ptr) { | |
106 | return ptr[0] == '\r' && ptr[1] == '\n'; | |
107 | } | |
108 | ||
109 | /** @brief Skip whitespace | |
110 | * @param rfc822_comments If true, skip RFC822 nested comments | |
111 | */ | |
112 | static const char *skipwhite(const char *s, int rfc822_comments) { | |
460b9539 | 113 | int c, depth; |
114 | ||
115 | for(;;) { | |
116 | switch(c = *s) { | |
117 | case ' ': | |
118 | case '\t': | |
119 | case '\r': | |
120 | case '\n': | |
121 | ++s; | |
122 | break; | |
123 | case '(': | |
39d4aa6b RK |
124 | if(!rfc822_comments) |
125 | return s; | |
460b9539 | 126 | ++s; |
127 | depth = 1; | |
128 | while(*s && depth) { | |
129 | c = *s++; | |
130 | switch(c) { | |
131 | case '(': ++depth; break; | |
132 | case ')': --depth; break; | |
133 | case '\\': | |
134 | if(!*s) return 0; | |
135 | ++s; | |
136 | break; | |
137 | } | |
138 | } | |
139 | if(depth) return 0; | |
140 | break; | |
141 | default: | |
142 | return s; | |
143 | } | |
144 | } | |
145 | } | |
146 | ||
39d4aa6b RK |
147 | /** @brief Test for a word character |
148 | * @param c Character to test | |
149 | * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) | |
150 | * @return 1 if @p c is a word character, else 0 | |
151 | */ | |
152 | static int iswordchar(int c, int (*special)(int)) { | |
153 | return !(c <= ' ' || c > '~' || special(c)); | |
154 | } | |
155 | ||
156 | /** @brief Parse an RFC1521/RFC2616 word | |
157 | * @param s Pointer to start of word | |
158 | * @param valuep Where to store value | |
159 | * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) | |
160 | * @return Pointer just after end of word or NULL if there's no word | |
161 | * | |
162 | * A word is a token or a quoted-string. | |
163 | */ | |
164 | static const char *parseword(const char *s, char **valuep, | |
165 | int (*special)(int)) { | |
166 | struct dynstr value[1]; | |
460b9539 | 167 | int c; |
168 | ||
39d4aa6b RK |
169 | dynstr_init(value); |
170 | if(*s == '"') { | |
171 | ++s; | |
172 | while((c = *s++) != '"') { | |
173 | switch(c) { | |
174 | case '\\': | |
175 | if(!(c = *s++)) return 0; | |
176 | default: | |
177 | dynstr_append(value, c); | |
178 | break; | |
179 | } | |
460b9539 | 180 | } |
39d4aa6b RK |
181 | if(!c) return 0; |
182 | } else { | |
183 | if(!iswordchar((unsigned char)*s, special)) | |
184 | return NULL; | |
185 | dynstr_init(value); | |
186 | while(iswordchar((unsigned char)*s, special)) | |
187 | dynstr_append(value, *s++); | |
460b9539 | 188 | } |
39d4aa6b RK |
189 | dynstr_terminate(value); |
190 | *valuep = value->vec; | |
460b9539 | 191 | return s; |
192 | } | |
193 | ||
39d4aa6b RK |
194 | /** @brief Parse an RFC1521/RFC2616 token |
195 | * @param s Pointer to start of token | |
196 | * @param valuep Where to store value | |
197 | * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616) | |
198 | * @return Pointer just after end of token or NULL if there's no token | |
199 | */ | |
200 | static const char *parsetoken(const char *s, char **valuep, | |
201 | int (*special)(int)) { | |
202 | if(*s == '"') return 0; | |
203 | return parseword(s, valuep, special); | |
204 | } | |
205 | ||
206 | /** @brief Parse a MIME content-type field | |
207 | * @param s Start of field | |
208 | * @param typep Where to store type | |
209 | * @param parameternamep Where to store parameter name | |
210 | * @param parameternvaluep Wher to store parameter value | |
211 | * @return 0 on success, non-0 on error | |
78d8e29d RK |
212 | * |
213 | * See <a href="http://tools.ietf.org/html/rfc2045#section-5">RFC 2045 s5</a>. | |
39d4aa6b | 214 | */ |
460b9539 | 215 | int mime_content_type(const char *s, |
216 | char **typep, | |
217 | char **parameternamep, | |
218 | char **parametervaluep) { | |
39d4aa6b | 219 | struct dynstr type, parametername; |
460b9539 | 220 | |
221 | dynstr_init(&type); | |
39d4aa6b | 222 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 223 | if(!*s) return -1; |
224 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
225 | dynstr_append(&type, tolower((unsigned char)*s++)); | |
39d4aa6b | 226 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 227 | if(*s++ != '/') return -1; |
228 | dynstr_append(&type, '/'); | |
39d4aa6b | 229 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 230 | while(*s && !tspecial(*s) && !whitespace(*s)) |
231 | dynstr_append(&type, tolower((unsigned char)*s++)); | |
39d4aa6b | 232 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 233 | |
234 | if(*s == ';') { | |
235 | dynstr_init(¶metername); | |
236 | ++s; | |
39d4aa6b | 237 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 238 | if(!*s) return -1; |
239 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
240 | dynstr_append(¶metername, tolower((unsigned char)*s++)); | |
39d4aa6b | 241 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 242 | if(*s++ != '=') return -1; |
39d4aa6b RK |
243 | if(!(s = skipwhite(s, 1))) return -1; |
244 | if(!(s = parseword(s, parametervaluep, tspecial))) return -1; | |
245 | if(!(s = skipwhite(s, 1))) return -1; | |
460b9539 | 246 | dynstr_terminate(¶metername); |
247 | *parameternamep = parametername.vec; | |
248 | } else | |
249 | *parametervaluep = *parameternamep = 0; | |
250 | dynstr_terminate(&type); | |
251 | *typep = type.vec; | |
252 | return 0; | |
253 | } | |
254 | ||
39d4aa6b RK |
255 | /** @brief Parse a MIME message |
256 | * @param s Start of message | |
257 | * @param callback Called for each header field | |
258 | * @param u Passed to callback | |
78d8e29d RK |
259 | * @return Pointer to decoded body (might be in original string), or NULL on error |
260 | * | |
261 | * This does an RFC 822-style parse and honors Content-Transfer-Encoding as | |
262 | * described in <a href="http://tools.ietf.org/html/rfc2045#section-6">RFC 2045 | |
263 | * s6</a>. @p callback is called for each header field encountered, in order, | |
264 | * with ASCII characters in the header name forced to lower case. | |
39d4aa6b | 265 | */ |
460b9539 | 266 | const char *mime_parse(const char *s, |
267 | int (*callback)(const char *name, const char *value, | |
268 | void *u), | |
269 | void *u) { | |
270 | struct dynstr name, value; | |
271 | char *cte = 0, *p; | |
272 | ||
273 | while(*s && !iscrlf(s)) { | |
274 | dynstr_init(&name); | |
275 | dynstr_init(&value); | |
276 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
277 | dynstr_append(&name, tolower((unsigned char)*s++)); | |
39d4aa6b | 278 | if(!(s = skipwhite(s, 1))) return 0; |
460b9539 | 279 | if(*s != ':') return 0; |
280 | ++s; | |
281 | while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t'))) | |
282 | dynstr_append(&value, *s++); | |
283 | if(*s) ++s; | |
284 | dynstr_terminate(&name); | |
285 | dynstr_terminate(&value); | |
286 | if(!strcmp(name.vec, "content-transfer-encoding")) { | |
287 | cte = xstrdup(value.vec); | |
288 | for(p = cte; *p; p++) | |
289 | *p = tolower((unsigned char)*p); | |
290 | } | |
291 | if(callback(name.vec, value.vec, u)) return 0; | |
292 | } | |
293 | if(*s) s += 2; | |
294 | if(cte) { | |
8a7ccdfe | 295 | if(!strcmp(cte, "base64")) return mime_base64(s, 0); |
460b9539 | 296 | if(!strcmp(cte, "quoted-printable")) return mime_qp(s); |
297 | } | |
298 | return s; | |
299 | } | |
300 | ||
78d8e29d | 301 | /** @brief Match the boundary string */ |
460b9539 | 302 | static int isboundary(const char *ptr, const char *boundary, size_t bl) { |
303 | return (ptr[0] == '-' | |
304 | && ptr[1] == '-' | |
305 | && !strncmp(ptr + 2, boundary, bl) | |
306 | && (iscrlf(ptr + bl + 2) | |
307 | || (ptr[bl + 2] == '-' | |
308 | && ptr[bl + 3] == '-' | |
22896b25 | 309 | && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)))); |
460b9539 | 310 | } |
311 | ||
78d8e29d | 312 | /** @brief Match the final boundary string */ |
460b9539 | 313 | static int isfinal(const char *ptr, const char *boundary, size_t bl) { |
314 | return (ptr[0] == '-' | |
315 | && ptr[1] == '-' | |
316 | && !strncmp(ptr + 2, boundary, bl) | |
317 | && ptr[bl + 2] == '-' | |
318 | && ptr[bl + 3] == '-' | |
22896b25 | 319 | && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0)); |
460b9539 | 320 | } |
321 | ||
39d4aa6b RK |
322 | /** @brief Parse a multipart MIME body |
323 | * @param s Start of message | |
78d8e29d | 324 | * @param callback Callback for each part |
39d4aa6b RK |
325 | * @param boundary Boundary string |
326 | * @param u Passed to callback | |
327 | * @return 0 on success, non-0 on error | |
78d8e29d RK |
328 | * |
329 | * See <a href="http://tools.ietf.org/html/rfc2046#section-5.1">RFC 2046 | |
330 | * s5.1</a>. @p callback is called for each part (not yet decoded in any way) | |
331 | * in succession; you should probably call mime_parse() for each part. | |
39d4aa6b | 332 | */ |
460b9539 | 333 | int mime_multipart(const char *s, |
334 | int (*callback)(const char *s, void *u), | |
335 | const char *boundary, | |
336 | void *u) { | |
337 | size_t bl = strlen(boundary); | |
338 | const char *start, *e; | |
339 | int ret; | |
340 | ||
22896b25 RK |
341 | /* We must start with a boundary string */ |
342 | if(!isboundary(s, boundary, bl)) | |
343 | return -1; | |
344 | /* Keep going until we hit a final boundary */ | |
460b9539 | 345 | while(!isfinal(s, boundary, bl)) { |
346 | s = strstr(s, "\r\n") + 2; | |
347 | start = s; | |
348 | while(!isboundary(s, boundary, bl)) { | |
22896b25 RK |
349 | if(!(e = strstr(s, "\r\n"))) |
350 | return -1; | |
460b9539 | 351 | s = e + 2; |
352 | } | |
353 | if((ret = callback(xstrndup(start, | |
354 | s == start ? 0 : s - start - 2), | |
355 | u))) | |
356 | return ret; | |
357 | } | |
358 | return 0; | |
359 | } | |
360 | ||
39d4aa6b RK |
361 | /** @brief Parse an RFC2388-style content-disposition field |
362 | * @param s Start of field | |
363 | * @param typep Where to store type | |
364 | * @param parameternamep Where to store parameter name | |
365 | * @param parameternvaluep Wher to store parameter value | |
366 | * @return 0 on success, non-0 on error | |
78d8e29d RK |
367 | * |
368 | * See <a href="http://tools.ietf.org/html/rfc2388#section-3">RFC 2388 s3</a> | |
369 | * and <a href="http://tools.ietf.org/html/rfc2183">RFC 2183</a>. | |
39d4aa6b | 370 | */ |
460b9539 | 371 | int mime_rfc2388_content_disposition(const char *s, |
372 | char **dispositionp, | |
373 | char **parameternamep, | |
374 | char **parametervaluep) { | |
39d4aa6b | 375 | struct dynstr disposition, parametername; |
460b9539 | 376 | |
377 | dynstr_init(&disposition); | |
39d4aa6b | 378 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 379 | if(!*s) return -1; |
380 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
381 | dynstr_append(&disposition, tolower((unsigned char)*s++)); | |
39d4aa6b | 382 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 383 | |
384 | if(*s == ';') { | |
385 | dynstr_init(¶metername); | |
386 | ++s; | |
39d4aa6b | 387 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 388 | if(!*s) return -1; |
389 | while(*s && !tspecial(*s) && !whitespace(*s)) | |
390 | dynstr_append(¶metername, tolower((unsigned char)*s++)); | |
39d4aa6b | 391 | if(!(s = skipwhite(s, 1))) return -1; |
460b9539 | 392 | if(*s++ != '=') return -1; |
39d4aa6b RK |
393 | if(!(s = skipwhite(s, 1))) return -1; |
394 | if(!(s = parseword(s, parametervaluep, tspecial))) return -1; | |
395 | if(!(s = skipwhite(s, 1))) return -1; | |
460b9539 | 396 | dynstr_terminate(¶metername); |
397 | *parameternamep = parametername.vec; | |
398 | } else | |
399 | *parametervaluep = *parameternamep = 0; | |
400 | dynstr_terminate(&disposition); | |
401 | *dispositionp = disposition.vec; | |
402 | return 0; | |
403 | } | |
404 | ||
39d4aa6b RK |
405 | /** @brief Convert MIME quoted-printable |
406 | * @param s Quoted-printable data | |
407 | * @return Decoded data | |
78d8e29d RK |
408 | * |
409 | * See <a href="http://tools.ietf.org/html/rfc2045#section-6.7">RFC 2045 | |
410 | * s6.7</a>. | |
39d4aa6b | 411 | */ |
460b9539 | 412 | char *mime_qp(const char *s) { |
413 | struct dynstr d; | |
414 | int c, a, b; | |
415 | const char *t; | |
416 | ||
417 | dynstr_init(&d); | |
418 | while((c = *s++)) { | |
419 | switch(c) { | |
420 | case '=': | |
421 | if((a = unhexdigitq(s[0])) != -1 | |
422 | && (b = unhexdigitq(s[1])) != -1) { | |
423 | dynstr_append(&d, a * 16 + b); | |
424 | s += 2; | |
425 | } else { | |
426 | t = s; | |
427 | while(*t == ' ' || *t == '\t') ++t; | |
428 | if(iscrlf(t)) { | |
429 | /* soft line break */ | |
430 | s = t + 2; | |
431 | } else | |
432 | return 0; | |
433 | } | |
434 | break; | |
435 | case ' ': | |
436 | case '\t': | |
437 | t = s; | |
438 | while(*t == ' ' || *t == '\t') ++t; | |
439 | if(iscrlf(t)) | |
440 | /* trailing space is always eliminated */ | |
441 | s = t; | |
442 | else | |
443 | dynstr_append(&d, c); | |
444 | break; | |
445 | default: | |
446 | dynstr_append(&d, c); | |
447 | break; | |
448 | } | |
449 | } | |
450 | dynstr_terminate(&d); | |
451 | return d.vec; | |
452 | } | |
453 | ||
39d4aa6b RK |
454 | /** @brief Parse a RFC2109 Cookie: header |
455 | * @param s Header field value | |
456 | * @param cd Where to store result | |
457 | * @return 0 on success, non-0 on error | |
458 | */ | |
459 | int parse_cookie(const char *s, | |
460 | struct cookiedata *cd) { | |
461 | char *n = 0, *v = 0; | |
462 | ||
463 | memset(cd, 0, sizeof *cd); | |
464 | s = skipwhite(s, 0); | |
465 | while(*s) { | |
466 | /* Skip separators */ | |
467 | if(*s == ';' || *s == ',') { | |
468 | ++s; | |
469 | s = skipwhite(s, 0); | |
470 | continue; | |
471 | } | |
472 | if(!(s = parsetoken(s, &n, http_separator))) return -1; | |
473 | s = skipwhite(s, 0); | |
474 | if(*s++ != '=') return -1; | |
475 | s = skipwhite(s, 0); | |
476 | if(!(s = parseword(s, &v, http_separator))) return -1; | |
477 | if(n[0] == '$') { | |
478 | /* Some bit of meta-information */ | |
479 | if(!strcmp(n, "$Version")) | |
480 | cd->version = v; | |
481 | else if(!strcmp(n, "$Path")) { | |
482 | if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].path == 0) | |
483 | cd->cookies[cd->ncookies-1].path = v; | |
484 | else { | |
485 | error(0, "redundant $Path in Cookie: header"); | |
486 | return -1; | |
487 | } | |
488 | } else if(!strcmp(n, "$Domain")) { | |
489 | if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].domain == 0) | |
490 | cd->cookies[cd->ncookies-1].domain = v; | |
491 | else { | |
492 | error(0, "redundant $Domain in Cookie: header"); | |
493 | return -1; | |
494 | } | |
495 | } | |
496 | } else { | |
497 | /* It's a new cookie */ | |
498 | cd->cookies = xrealloc(cd->cookies, | |
499 | (cd->ncookies + 1) * sizeof (struct cookie)); | |
500 | cd->cookies[cd->ncookies].name = n; | |
501 | cd->cookies[cd->ncookies].value = v; | |
502 | cd->cookies[cd->ncookies].path = 0; | |
503 | cd->cookies[cd->ncookies].domain = 0; | |
504 | ++cd->ncookies; | |
505 | } | |
506 | s = skipwhite(s, 0); | |
507 | if(*s && (*s != ',' && *s != ';')) { | |
508 | error(0, "missing separator in Cookie: header"); | |
509 | return -1; | |
510 | } | |
511 | } | |
512 | return 0; | |
513 | } | |
514 | ||
515 | /** @brief Find a named cookie | |
516 | * @param cd Parse cookie data | |
517 | * @param name Name of cookie | |
518 | * @return Cookie structure or NULL if not found | |
519 | */ | |
520 | const struct cookie *find_cookie(const struct cookiedata *cd, | |
521 | const char *name) { | |
522 | int n; | |
523 | ||
524 | for(n = 0; n < cd->ncookies; ++n) | |
525 | if(!strcmp(cd->cookies[n].name, name)) | |
526 | return &cd->cookies[n]; | |
527 | return 0; | |
528 | } | |
529 | ||
460b9539 | 530 | /* |
531 | Local Variables: | |
532 | c-basic-offset:2 | |
533 | comment-column:40 | |
534 | fill-column:79 | |
535 | End: | |
536 | */ |