chiark / gitweb /
more consistent language use in docs
[disorder] / lib / mime.c
CommitLineData
460b9539 1/*
2 * This file is part of DisOrder
39d4aa6b 3 * Copyright (C) 2005, 2007 Richard Kettlewell
460b9539 4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18 * USA
19 */
39d4aa6b
RK
20/** @file lib/mime.c
21 * @brief Support for MIME and allied protocols
22 */
460b9539 23
24#include <config.h>
25#include "types.h"
26
27#include <string.h>
28#include <ctype.h>
29
22896b25
RK
30#include <stdio.h>
31
460b9539 32#include "mem.h"
33#include "mime.h"
34#include "vector.h"
35#include "hex.h"
39d4aa6b 36#include "log.h"
460b9539 37
39d4aa6b 38/** @brief Match whitespace characters */
460b9539 39static int whitespace(int c) {
40 switch(c) {
41 case ' ':
42 case '\t':
43 case '\r':
44 case '\n':
45 return 1;
46 default:
47 return 0;
48 }
49}
50
39d4aa6b 51/** @brief Match RFC2045 tspecial characters */
460b9539 52static int tspecial(int c) {
53 switch(c) {
54 case '(':
55 case ')':
56 case '<':
57 case '>':
58 case '@':
59 case ',':
60 case ';':
61 case ':':
62 case '\\':
63 case '"':
64 case '/':
65 case '[':
66 case ']':
67 case '?':
68 case '=':
69 return 1;
70 default:
71 return 0;
72 }
73}
74
5818980a 75/** @brief Match RFC2616 seprator characters */
39d4aa6b
RK
76static int http_separator(int c) {
77 switch(c) {
78 case '(':
79 case ')':
80 case '<':
81 case '>':
82 case '@':
83 case ',':
84 case ';':
85 case ':':
86 case '\\':
87 case '"':
88 case '/':
89 case '[':
90 case ']':
91 case '?':
92 case '=':
93 case '{':
94 case '}':
95 case ' ':
96 case '\t':
97 return 1;
98 default:
99 return 0;
100 }
101}
102
103/** @brief Match CRLF */
104static int iscrlf(const char *ptr) {
105 return ptr[0] == '\r' && ptr[1] == '\n';
106}
107
108/** @brief Skip whitespace
109 * @param rfc822_comments If true, skip RFC822 nested comments
110 */
111static const char *skipwhite(const char *s, int rfc822_comments) {
460b9539 112 int c, depth;
113
114 for(;;) {
115 switch(c = *s) {
116 case ' ':
117 case '\t':
118 case '\r':
119 case '\n':
120 ++s;
121 break;
122 case '(':
39d4aa6b
RK
123 if(!rfc822_comments)
124 return s;
460b9539 125 ++s;
126 depth = 1;
127 while(*s && depth) {
128 c = *s++;
129 switch(c) {
130 case '(': ++depth; break;
131 case ')': --depth; break;
132 case '\\':
133 if(!*s) return 0;
134 ++s;
135 break;
136 }
137 }
138 if(depth) return 0;
139 break;
140 default:
141 return s;
142 }
143 }
144}
145
39d4aa6b
RK
146/** @brief Test for a word character
147 * @param c Character to test
148 * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616)
149 * @return 1 if @p c is a word character, else 0
150 */
151static int iswordchar(int c, int (*special)(int)) {
152 return !(c <= ' ' || c > '~' || special(c));
153}
154
155/** @brief Parse an RFC1521/RFC2616 word
156 * @param s Pointer to start of word
157 * @param valuep Where to store value
158 * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616)
159 * @return Pointer just after end of word or NULL if there's no word
160 *
161 * A word is a token or a quoted-string.
162 */
163static const char *parseword(const char *s, char **valuep,
164 int (*special)(int)) {
165 struct dynstr value[1];
460b9539 166 int c;
167
39d4aa6b
RK
168 dynstr_init(value);
169 if(*s == '"') {
170 ++s;
171 while((c = *s++) != '"') {
172 switch(c) {
173 case '\\':
174 if(!(c = *s++)) return 0;
175 default:
176 dynstr_append(value, c);
177 break;
178 }
460b9539 179 }
39d4aa6b
RK
180 if(!c) return 0;
181 } else {
182 if(!iswordchar((unsigned char)*s, special))
183 return NULL;
184 dynstr_init(value);
185 while(iswordchar((unsigned char)*s, special))
186 dynstr_append(value, *s++);
460b9539 187 }
39d4aa6b
RK
188 dynstr_terminate(value);
189 *valuep = value->vec;
460b9539 190 return s;
191}
192
39d4aa6b
RK
193/** @brief Parse an RFC1521/RFC2616 token
194 * @param s Pointer to start of token
195 * @param valuep Where to store value
196 * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616)
197 * @return Pointer just after end of token or NULL if there's no token
198 */
199static const char *parsetoken(const char *s, char **valuep,
200 int (*special)(int)) {
201 if(*s == '"') return 0;
202 return parseword(s, valuep, special);
203}
204
205/** @brief Parse a MIME content-type field
206 * @param s Start of field
207 * @param typep Where to store type
208 * @param parameternamep Where to store parameter name
209 * @param parameternvaluep Wher to store parameter value
210 * @return 0 on success, non-0 on error
211 */
460b9539 212int mime_content_type(const char *s,
213 char **typep,
214 char **parameternamep,
215 char **parametervaluep) {
39d4aa6b 216 struct dynstr type, parametername;
460b9539 217
218 dynstr_init(&type);
39d4aa6b 219 if(!(s = skipwhite(s, 1))) return -1;
460b9539 220 if(!*s) return -1;
221 while(*s && !tspecial(*s) && !whitespace(*s))
222 dynstr_append(&type, tolower((unsigned char)*s++));
39d4aa6b 223 if(!(s = skipwhite(s, 1))) return -1;
460b9539 224 if(*s++ != '/') return -1;
225 dynstr_append(&type, '/');
39d4aa6b 226 if(!(s = skipwhite(s, 1))) return -1;
460b9539 227 while(*s && !tspecial(*s) && !whitespace(*s))
228 dynstr_append(&type, tolower((unsigned char)*s++));
39d4aa6b 229 if(!(s = skipwhite(s, 1))) return -1;
460b9539 230
231 if(*s == ';') {
232 dynstr_init(&parametername);
233 ++s;
39d4aa6b 234 if(!(s = skipwhite(s, 1))) return -1;
460b9539 235 if(!*s) return -1;
236 while(*s && !tspecial(*s) && !whitespace(*s))
237 dynstr_append(&parametername, tolower((unsigned char)*s++));
39d4aa6b 238 if(!(s = skipwhite(s, 1))) return -1;
460b9539 239 if(*s++ != '=') return -1;
39d4aa6b
RK
240 if(!(s = skipwhite(s, 1))) return -1;
241 if(!(s = parseword(s, parametervaluep, tspecial))) return -1;
242 if(!(s = skipwhite(s, 1))) return -1;
460b9539 243 dynstr_terminate(&parametername);
244 *parameternamep = parametername.vec;
245 } else
246 *parametervaluep = *parameternamep = 0;
247 dynstr_terminate(&type);
248 *typep = type.vec;
249 return 0;
250}
251
39d4aa6b
RK
252/** @brief Parse a MIME message
253 * @param s Start of message
254 * @param callback Called for each header field
255 * @param u Passed to callback
256 * @return Pointer to decoded body (might be in original string)
257 */
460b9539 258const char *mime_parse(const char *s,
259 int (*callback)(const char *name, const char *value,
260 void *u),
261 void *u) {
262 struct dynstr name, value;
263 char *cte = 0, *p;
264
265 while(*s && !iscrlf(s)) {
266 dynstr_init(&name);
267 dynstr_init(&value);
268 while(*s && !tspecial(*s) && !whitespace(*s))
269 dynstr_append(&name, tolower((unsigned char)*s++));
39d4aa6b 270 if(!(s = skipwhite(s, 1))) return 0;
460b9539 271 if(*s != ':') return 0;
272 ++s;
273 while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t')))
274 dynstr_append(&value, *s++);
275 if(*s) ++s;
276 dynstr_terminate(&name);
277 dynstr_terminate(&value);
278 if(!strcmp(name.vec, "content-transfer-encoding")) {
279 cte = xstrdup(value.vec);
280 for(p = cte; *p; p++)
281 *p = tolower((unsigned char)*p);
282 }
283 if(callback(name.vec, value.vec, u)) return 0;
284 }
285 if(*s) s += 2;
286 if(cte) {
8a7ccdfe 287 if(!strcmp(cte, "base64")) return mime_base64(s, 0);
460b9539 288 if(!strcmp(cte, "quoted-printable")) return mime_qp(s);
289 }
290 return s;
291}
292
293static int isboundary(const char *ptr, const char *boundary, size_t bl) {
294 return (ptr[0] == '-'
295 && ptr[1] == '-'
296 && !strncmp(ptr + 2, boundary, bl)
297 && (iscrlf(ptr + bl + 2)
298 || (ptr[bl + 2] == '-'
299 && ptr[bl + 3] == '-'
22896b25 300 && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0))));
460b9539 301}
302
303static int isfinal(const char *ptr, const char *boundary, size_t bl) {
304 return (ptr[0] == '-'
305 && ptr[1] == '-'
306 && !strncmp(ptr + 2, boundary, bl)
307 && ptr[bl + 2] == '-'
308 && ptr[bl + 3] == '-'
22896b25 309 && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0));
460b9539 310}
311
39d4aa6b
RK
312/** @brief Parse a multipart MIME body
313 * @param s Start of message
314 * @param callback CAllback for each part
315 * @param boundary Boundary string
316 * @param u Passed to callback
317 * @return 0 on success, non-0 on error
318 */
460b9539 319int mime_multipart(const char *s,
320 int (*callback)(const char *s, void *u),
321 const char *boundary,
322 void *u) {
323 size_t bl = strlen(boundary);
324 const char *start, *e;
325 int ret;
326
22896b25
RK
327 /* We must start with a boundary string */
328 if(!isboundary(s, boundary, bl))
329 return -1;
330 /* Keep going until we hit a final boundary */
460b9539 331 while(!isfinal(s, boundary, bl)) {
332 s = strstr(s, "\r\n") + 2;
333 start = s;
334 while(!isboundary(s, boundary, bl)) {
22896b25
RK
335 if(!(e = strstr(s, "\r\n")))
336 return -1;
460b9539 337 s = e + 2;
338 }
339 if((ret = callback(xstrndup(start,
340 s == start ? 0 : s - start - 2),
341 u)))
342 return ret;
343 }
344 return 0;
345}
346
39d4aa6b
RK
347/** @brief Parse an RFC2388-style content-disposition field
348 * @param s Start of field
349 * @param typep Where to store type
350 * @param parameternamep Where to store parameter name
351 * @param parameternvaluep Wher to store parameter value
352 * @return 0 on success, non-0 on error
353 */
460b9539 354int mime_rfc2388_content_disposition(const char *s,
355 char **dispositionp,
356 char **parameternamep,
357 char **parametervaluep) {
39d4aa6b 358 struct dynstr disposition, parametername;
460b9539 359
360 dynstr_init(&disposition);
39d4aa6b 361 if(!(s = skipwhite(s, 1))) return -1;
460b9539 362 if(!*s) return -1;
363 while(*s && !tspecial(*s) && !whitespace(*s))
364 dynstr_append(&disposition, tolower((unsigned char)*s++));
39d4aa6b 365 if(!(s = skipwhite(s, 1))) return -1;
460b9539 366
367 if(*s == ';') {
368 dynstr_init(&parametername);
369 ++s;
39d4aa6b 370 if(!(s = skipwhite(s, 1))) return -1;
460b9539 371 if(!*s) return -1;
372 while(*s && !tspecial(*s) && !whitespace(*s))
373 dynstr_append(&parametername, tolower((unsigned char)*s++));
39d4aa6b 374 if(!(s = skipwhite(s, 1))) return -1;
460b9539 375 if(*s++ != '=') return -1;
39d4aa6b
RK
376 if(!(s = skipwhite(s, 1))) return -1;
377 if(!(s = parseword(s, parametervaluep, tspecial))) return -1;
378 if(!(s = skipwhite(s, 1))) return -1;
460b9539 379 dynstr_terminate(&parametername);
380 *parameternamep = parametername.vec;
381 } else
382 *parametervaluep = *parameternamep = 0;
383 dynstr_terminate(&disposition);
384 *dispositionp = disposition.vec;
385 return 0;
386}
387
39d4aa6b
RK
388/** @brief Convert MIME quoted-printable
389 * @param s Quoted-printable data
390 * @return Decoded data
391 */
460b9539 392char *mime_qp(const char *s) {
393 struct dynstr d;
394 int c, a, b;
395 const char *t;
396
397 dynstr_init(&d);
398 while((c = *s++)) {
399 switch(c) {
400 case '=':
401 if((a = unhexdigitq(s[0])) != -1
402 && (b = unhexdigitq(s[1])) != -1) {
403 dynstr_append(&d, a * 16 + b);
404 s += 2;
405 } else {
406 t = s;
407 while(*t == ' ' || *t == '\t') ++t;
408 if(iscrlf(t)) {
409 /* soft line break */
410 s = t + 2;
411 } else
412 return 0;
413 }
414 break;
415 case ' ':
416 case '\t':
417 t = s;
418 while(*t == ' ' || *t == '\t') ++t;
419 if(iscrlf(t))
420 /* trailing space is always eliminated */
421 s = t;
422 else
423 dynstr_append(&d, c);
424 break;
425 default:
426 dynstr_append(&d, c);
427 break;
428 }
429 }
430 dynstr_terminate(&d);
431 return d.vec;
432}
433
8a7ccdfe
RK
434static const char mime_base64_table[] =
435 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
436
39d4aa6b
RK
437/** @brief Convert MIME base64
438 * @param s base64 data
439 * @return Decoded data
440 */
8a7ccdfe 441char *mime_base64(const char *s, size_t *nsp) {
460b9539 442 struct dynstr d;
443 const char *t;
444 int b[4], n, c;
460b9539 445
446 dynstr_init(&d);
447 n = 0;
448 while((c = (unsigned char)*s++)) {
8a7ccdfe
RK
449 if((t = strchr(mime_base64_table, c))) {
450 b[n++] = t - mime_base64_table;
460b9539 451 if(n == 4) {
452 dynstr_append(&d, (b[0] << 2) + (b[1] >> 4));
453 dynstr_append(&d, (b[1] << 4) + (b[2] >> 2));
454 dynstr_append(&d, (b[2] << 6) + b[3]);
455 n = 0;
456 }
457 } else if(c == '=') {
458 if(n >= 2) {
459 dynstr_append(&d, (b[0] << 2) + (b[1] >> 4));
460 if(n == 3)
461 dynstr_append(&d, (b[1] << 4) + (b[2] >> 2));
462 }
463 break;
464 }
465 }
8a7ccdfe
RK
466 if(nsp)
467 *nsp = d.nvec;
460b9539 468 dynstr_terminate(&d);
469 return d.vec;
470}
471
8a7ccdfe
RK
472/** @brief Convert a binary string to base64
473 * @param s Bytes to convert
474 * @param ns Number of bytes to convert
475 * @return Encoded data
476 *
477 * This function does not attempt to split up lines.
478 */
479char *mime_to_base64(const uint8_t *s, size_t ns) {
480 struct dynstr d[1];
481
482 dynstr_init(d);
483 while(ns >= 3) {
484 /* Input bytes with output bits: AAAAAABB BBBBCCCC CCDDDDDD */
485 /* Output bytes with input bits: 000000 001111 111122 222222 */
486 dynstr_append(d, mime_base64_table[s[0] >> 2]);
487 dynstr_append(d, mime_base64_table[((s[0] & 3) << 4)
488 + (s[1] >> 4)]);
489 dynstr_append(d, mime_base64_table[((s[1] & 15) << 2)
490 + (s[2] >> 6)]);
491 dynstr_append(d, mime_base64_table[s[2] & 63]);
492 ns -= 3;
493 s += 3;
494 }
495 if(ns > 0) {
496 dynstr_append(d, mime_base64_table[s[0] >> 2]);
497 switch(ns) {
498 case 1:
499 dynstr_append(d, mime_base64_table[(s[0] & 3) << 4]);
500 dynstr_append(d, '=');
501 dynstr_append(d, '=');
502 break;
503 case 2:
504 dynstr_append(d, mime_base64_table[((s[0] & 3) << 4)
505 + (s[1] >> 4)]);
506 dynstr_append(d, mime_base64_table[(s[1] & 15) << 2]);
507 dynstr_append(d, '=');
508 break;
509 }
510 }
511 dynstr_terminate(d);
512 return d->vec;
513}
514
39d4aa6b
RK
515/** @brief Parse a RFC2109 Cookie: header
516 * @param s Header field value
517 * @param cd Where to store result
518 * @return 0 on success, non-0 on error
519 */
520int parse_cookie(const char *s,
521 struct cookiedata *cd) {
522 char *n = 0, *v = 0;
523
524 memset(cd, 0, sizeof *cd);
525 s = skipwhite(s, 0);
526 while(*s) {
527 /* Skip separators */
528 if(*s == ';' || *s == ',') {
529 ++s;
530 s = skipwhite(s, 0);
531 continue;
532 }
533 if(!(s = parsetoken(s, &n, http_separator))) return -1;
534 s = skipwhite(s, 0);
535 if(*s++ != '=') return -1;
536 s = skipwhite(s, 0);
537 if(!(s = parseword(s, &v, http_separator))) return -1;
538 if(n[0] == '$') {
539 /* Some bit of meta-information */
540 if(!strcmp(n, "$Version"))
541 cd->version = v;
542 else if(!strcmp(n, "$Path")) {
543 if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].path == 0)
544 cd->cookies[cd->ncookies-1].path = v;
545 else {
546 error(0, "redundant $Path in Cookie: header");
547 return -1;
548 }
549 } else if(!strcmp(n, "$Domain")) {
550 if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].domain == 0)
551 cd->cookies[cd->ncookies-1].domain = v;
552 else {
553 error(0, "redundant $Domain in Cookie: header");
554 return -1;
555 }
556 }
557 } else {
558 /* It's a new cookie */
559 cd->cookies = xrealloc(cd->cookies,
560 (cd->ncookies + 1) * sizeof (struct cookie));
561 cd->cookies[cd->ncookies].name = n;
562 cd->cookies[cd->ncookies].value = v;
563 cd->cookies[cd->ncookies].path = 0;
564 cd->cookies[cd->ncookies].domain = 0;
565 ++cd->ncookies;
566 }
567 s = skipwhite(s, 0);
568 if(*s && (*s != ',' && *s != ';')) {
569 error(0, "missing separator in Cookie: header");
570 return -1;
571 }
572 }
573 return 0;
574}
575
576/** @brief Find a named cookie
577 * @param cd Parse cookie data
578 * @param name Name of cookie
579 * @return Cookie structure or NULL if not found
580 */
581const struct cookie *find_cookie(const struct cookiedata *cd,
582 const char *name) {
583 int n;
584
585 for(n = 0; n < cd->ncookies; ++n)
586 if(!strcmp(cd->cookies[n].name, name))
587 return &cd->cookies[n];
588 return 0;
589}
590
460b9539 591/*
592Local Variables:
593c-basic-offset:2
594comment-column:40
595fill-column:79
596End:
597*/