chiark / gitweb /
merge extra MIME parsing
[disorder] / lib / mime.c
... / ...
CommitLineData
1/*
2 * This file is part of DisOrder
3 * Copyright (C) 2005, 2007 Richard Kettlewell
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18 * USA
19 */
20/** @file lib/mime.c
21 * @brief Support for MIME and allied protocols
22 */
23
24#include <config.h>
25#include "types.h"
26
27#include <string.h>
28#include <ctype.h>
29
30#include <stdio.h>
31
32#include "mem.h"
33#include "mime.h"
34#include "vector.h"
35#include "hex.h"
36#include "log.h"
37
38/** @brief Match whitespace characters */
39static int whitespace(int c) {
40 switch(c) {
41 case ' ':
42 case '\t':
43 case '\r':
44 case '\n':
45 return 1;
46 default:
47 return 0;
48 }
49}
50
51/** @brief Match RFC2045 tspecial characters */
52static int tspecial(int c) {
53 switch(c) {
54 case '(':
55 case ')':
56 case '<':
57 case '>':
58 case '@':
59 case ',':
60 case ';':
61 case ':':
62 case '\\':
63 case '"':
64 case '/':
65 case '[':
66 case ']':
67 case '?':
68 case '=':
69 return 1;
70 default:
71 return 0;
72 }
73}
74
75/** @brief Mathc RFC2616 seprator characters */
76static int http_separator(int c) {
77 switch(c) {
78 case '(':
79 case ')':
80 case '<':
81 case '>':
82 case '@':
83 case ',':
84 case ';':
85 case ':':
86 case '\\':
87 case '"':
88 case '/':
89 case '[':
90 case ']':
91 case '?':
92 case '=':
93 case '{':
94 case '}':
95 case ' ':
96 case '\t':
97 return 1;
98 default:
99 return 0;
100 }
101}
102
103/** @brief Match CRLF */
104static int iscrlf(const char *ptr) {
105 return ptr[0] == '\r' && ptr[1] == '\n';
106}
107
108/** @brief Skip whitespace
109 * @param rfc822_comments If true, skip RFC822 nested comments
110 */
111static const char *skipwhite(const char *s, int rfc822_comments) {
112 int c, depth;
113
114 for(;;) {
115 switch(c = *s) {
116 case ' ':
117 case '\t':
118 case '\r':
119 case '\n':
120 ++s;
121 break;
122 case '(':
123 if(!rfc822_comments)
124 return s;
125 ++s;
126 depth = 1;
127 while(*s && depth) {
128 c = *s++;
129 switch(c) {
130 case '(': ++depth; break;
131 case ')': --depth; break;
132 case '\\':
133 if(!*s) return 0;
134 ++s;
135 break;
136 }
137 }
138 if(depth) return 0;
139 break;
140 default:
141 return s;
142 }
143 }
144}
145
146/** @brief Test for a word character
147 * @param c Character to test
148 * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616)
149 * @return 1 if @p c is a word character, else 0
150 */
151static int iswordchar(int c, int (*special)(int)) {
152 return !(c <= ' ' || c > '~' || special(c));
153}
154
155/** @brief Parse an RFC1521/RFC2616 word
156 * @param s Pointer to start of word
157 * @param valuep Where to store value
158 * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616)
159 * @return Pointer just after end of word or NULL if there's no word
160 *
161 * A word is a token or a quoted-string.
162 */
163static const char *parseword(const char *s, char **valuep,
164 int (*special)(int)) {
165 struct dynstr value[1];
166 int c;
167
168 dynstr_init(value);
169 if(*s == '"') {
170 ++s;
171 while((c = *s++) != '"') {
172 switch(c) {
173 case '\\':
174 if(!(c = *s++)) return 0;
175 default:
176 dynstr_append(value, c);
177 break;
178 }
179 }
180 if(!c) return 0;
181 } else {
182 if(!iswordchar((unsigned char)*s, special))
183 return NULL;
184 dynstr_init(value);
185 while(iswordchar((unsigned char)*s, special))
186 dynstr_append(value, *s++);
187 }
188 dynstr_terminate(value);
189 *valuep = value->vec;
190 return s;
191}
192
193/** @brief Parse an RFC1521/RFC2616 token
194 * @param s Pointer to start of token
195 * @param valuep Where to store value
196 * @param special tspecial() (MIME/RFC2405) or http_separator() (HTTP/RFC2616)
197 * @return Pointer just after end of token or NULL if there's no token
198 */
199static const char *parsetoken(const char *s, char **valuep,
200 int (*special)(int)) {
201 if(*s == '"') return 0;
202 return parseword(s, valuep, special);
203}
204
205/** @brief Parse a MIME content-type field
206 * @param s Start of field
207 * @param typep Where to store type
208 * @param parameternamep Where to store parameter name
209 * @param parameternvaluep Wher to store parameter value
210 * @return 0 on success, non-0 on error
211 */
212int mime_content_type(const char *s,
213 char **typep,
214 char **parameternamep,
215 char **parametervaluep) {
216 struct dynstr type, parametername;
217
218 dynstr_init(&type);
219 if(!(s = skipwhite(s, 1))) return -1;
220 if(!*s) return -1;
221 while(*s && !tspecial(*s) && !whitespace(*s))
222 dynstr_append(&type, tolower((unsigned char)*s++));
223 if(!(s = skipwhite(s, 1))) return -1;
224 if(*s++ != '/') return -1;
225 dynstr_append(&type, '/');
226 if(!(s = skipwhite(s, 1))) return -1;
227 while(*s && !tspecial(*s) && !whitespace(*s))
228 dynstr_append(&type, tolower((unsigned char)*s++));
229 if(!(s = skipwhite(s, 1))) return -1;
230
231 if(*s == ';') {
232 dynstr_init(&parametername);
233 ++s;
234 if(!(s = skipwhite(s, 1))) return -1;
235 if(!*s) return -1;
236 while(*s && !tspecial(*s) && !whitespace(*s))
237 dynstr_append(&parametername, tolower((unsigned char)*s++));
238 if(!(s = skipwhite(s, 1))) return -1;
239 if(*s++ != '=') return -1;
240 if(!(s = skipwhite(s, 1))) return -1;
241 if(!(s = parseword(s, parametervaluep, tspecial))) return -1;
242 if(!(s = skipwhite(s, 1))) return -1;
243 dynstr_terminate(&parametername);
244 *parameternamep = parametername.vec;
245 } else
246 *parametervaluep = *parameternamep = 0;
247 dynstr_terminate(&type);
248 *typep = type.vec;
249 return 0;
250}
251
252/** @brief Parse a MIME message
253 * @param s Start of message
254 * @param callback Called for each header field
255 * @param u Passed to callback
256 * @return Pointer to decoded body (might be in original string)
257 */
258const char *mime_parse(const char *s,
259 int (*callback)(const char *name, const char *value,
260 void *u),
261 void *u) {
262 struct dynstr name, value;
263 char *cte = 0, *p;
264
265 while(*s && !iscrlf(s)) {
266 dynstr_init(&name);
267 dynstr_init(&value);
268 while(*s && !tspecial(*s) && !whitespace(*s))
269 dynstr_append(&name, tolower((unsigned char)*s++));
270 if(!(s = skipwhite(s, 1))) return 0;
271 if(*s != ':') return 0;
272 ++s;
273 while(*s && !(*s == '\n' && !(s[1] == ' ' || s[1] == '\t')))
274 dynstr_append(&value, *s++);
275 if(*s) ++s;
276 dynstr_terminate(&name);
277 dynstr_terminate(&value);
278 if(!strcmp(name.vec, "content-transfer-encoding")) {
279 cte = xstrdup(value.vec);
280 for(p = cte; *p; p++)
281 *p = tolower((unsigned char)*p);
282 }
283 if(callback(name.vec, value.vec, u)) return 0;
284 }
285 if(*s) s += 2;
286 if(cte) {
287 if(!strcmp(cte, "base64")) return mime_base64(s);
288 if(!strcmp(cte, "quoted-printable")) return mime_qp(s);
289 }
290 return s;
291}
292
293static int isboundary(const char *ptr, const char *boundary, size_t bl) {
294 return (ptr[0] == '-'
295 && ptr[1] == '-'
296 && !strncmp(ptr + 2, boundary, bl)
297 && (iscrlf(ptr + bl + 2)
298 || (ptr[bl + 2] == '-'
299 && ptr[bl + 3] == '-'
300 && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0))));
301}
302
303static int isfinal(const char *ptr, const char *boundary, size_t bl) {
304 return (ptr[0] == '-'
305 && ptr[1] == '-'
306 && !strncmp(ptr + 2, boundary, bl)
307 && ptr[bl + 2] == '-'
308 && ptr[bl + 3] == '-'
309 && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0));
310}
311
312/** @brief Parse a multipart MIME body
313 * @param s Start of message
314 * @param callback CAllback for each part
315 * @param boundary Boundary string
316 * @param u Passed to callback
317 * @return 0 on success, non-0 on error
318 */
319int mime_multipart(const char *s,
320 int (*callback)(const char *s, void *u),
321 const char *boundary,
322 void *u) {
323 size_t bl = strlen(boundary);
324 const char *start, *e;
325 int ret;
326
327 /* We must start with a boundary string */
328 if(!isboundary(s, boundary, bl))
329 return -1;
330 /* Keep going until we hit a final boundary */
331 while(!isfinal(s, boundary, bl)) {
332 s = strstr(s, "\r\n") + 2;
333 start = s;
334 while(!isboundary(s, boundary, bl)) {
335 if(!(e = strstr(s, "\r\n")))
336 return -1;
337 s = e + 2;
338 }
339 if((ret = callback(xstrndup(start,
340 s == start ? 0 : s - start - 2),
341 u)))
342 return ret;
343 }
344 return 0;
345}
346
347/** @brief Parse an RFC2388-style content-disposition field
348 * @param s Start of field
349 * @param typep Where to store type
350 * @param parameternamep Where to store parameter name
351 * @param parameternvaluep Wher to store parameter value
352 * @return 0 on success, non-0 on error
353 */
354int mime_rfc2388_content_disposition(const char *s,
355 char **dispositionp,
356 char **parameternamep,
357 char **parametervaluep) {
358 struct dynstr disposition, parametername;
359
360 dynstr_init(&disposition);
361 if(!(s = skipwhite(s, 1))) return -1;
362 if(!*s) return -1;
363 while(*s && !tspecial(*s) && !whitespace(*s))
364 dynstr_append(&disposition, tolower((unsigned char)*s++));
365 if(!(s = skipwhite(s, 1))) return -1;
366
367 if(*s == ';') {
368 dynstr_init(&parametername);
369 ++s;
370 if(!(s = skipwhite(s, 1))) return -1;
371 if(!*s) return -1;
372 while(*s && !tspecial(*s) && !whitespace(*s))
373 dynstr_append(&parametername, tolower((unsigned char)*s++));
374 if(!(s = skipwhite(s, 1))) return -1;
375 if(*s++ != '=') return -1;
376 if(!(s = skipwhite(s, 1))) return -1;
377 if(!(s = parseword(s, parametervaluep, tspecial))) return -1;
378 if(!(s = skipwhite(s, 1))) return -1;
379 dynstr_terminate(&parametername);
380 *parameternamep = parametername.vec;
381 } else
382 *parametervaluep = *parameternamep = 0;
383 dynstr_terminate(&disposition);
384 *dispositionp = disposition.vec;
385 return 0;
386}
387
388/** @brief Convert MIME quoted-printable
389 * @param s Quoted-printable data
390 * @return Decoded data
391 */
392char *mime_qp(const char *s) {
393 struct dynstr d;
394 int c, a, b;
395 const char *t;
396
397 dynstr_init(&d);
398 while((c = *s++)) {
399 switch(c) {
400 case '=':
401 if((a = unhexdigitq(s[0])) != -1
402 && (b = unhexdigitq(s[1])) != -1) {
403 dynstr_append(&d, a * 16 + b);
404 s += 2;
405 } else {
406 t = s;
407 while(*t == ' ' || *t == '\t') ++t;
408 if(iscrlf(t)) {
409 /* soft line break */
410 s = t + 2;
411 } else
412 return 0;
413 }
414 break;
415 case ' ':
416 case '\t':
417 t = s;
418 while(*t == ' ' || *t == '\t') ++t;
419 if(iscrlf(t))
420 /* trailing space is always eliminated */
421 s = t;
422 else
423 dynstr_append(&d, c);
424 break;
425 default:
426 dynstr_append(&d, c);
427 break;
428 }
429 }
430 dynstr_terminate(&d);
431 return d.vec;
432}
433
434/** @brief Convert MIME base64
435 * @param s base64 data
436 * @return Decoded data
437 */
438char *mime_base64(const char *s) {
439 struct dynstr d;
440 const char *t;
441 int b[4], n, c;
442 static const char table[] =
443 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
444
445 dynstr_init(&d);
446 n = 0;
447 while((c = (unsigned char)*s++)) {
448 if((t = strchr(table, c))) {
449 b[n++] = t - table;
450 if(n == 4) {
451 dynstr_append(&d, (b[0] << 2) + (b[1] >> 4));
452 dynstr_append(&d, (b[1] << 4) + (b[2] >> 2));
453 dynstr_append(&d, (b[2] << 6) + b[3]);
454 n = 0;
455 }
456 } else if(c == '=') {
457 if(n >= 2) {
458 dynstr_append(&d, (b[0] << 2) + (b[1] >> 4));
459 if(n == 3)
460 dynstr_append(&d, (b[1] << 4) + (b[2] >> 2));
461 }
462 break;
463 }
464 }
465 dynstr_terminate(&d);
466 return d.vec;
467}
468
469/** @brief Parse a RFC2109 Cookie: header
470 * @param s Header field value
471 * @param cd Where to store result
472 * @return 0 on success, non-0 on error
473 */
474int parse_cookie(const char *s,
475 struct cookiedata *cd) {
476 char *n = 0, *v = 0;
477
478 memset(cd, 0, sizeof *cd);
479 s = skipwhite(s, 0);
480 while(*s) {
481 /* Skip separators */
482 if(*s == ';' || *s == ',') {
483 ++s;
484 s = skipwhite(s, 0);
485 continue;
486 }
487 if(!(s = parsetoken(s, &n, http_separator))) return -1;
488 s = skipwhite(s, 0);
489 if(*s++ != '=') return -1;
490 s = skipwhite(s, 0);
491 if(!(s = parseword(s, &v, http_separator))) return -1;
492 if(n[0] == '$') {
493 /* Some bit of meta-information */
494 if(!strcmp(n, "$Version"))
495 cd->version = v;
496 else if(!strcmp(n, "$Path")) {
497 if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].path == 0)
498 cd->cookies[cd->ncookies-1].path = v;
499 else {
500 error(0, "redundant $Path in Cookie: header");
501 return -1;
502 }
503 } else if(!strcmp(n, "$Domain")) {
504 if(cd->ncookies > 0 && cd->cookies[cd->ncookies-1].domain == 0)
505 cd->cookies[cd->ncookies-1].domain = v;
506 else {
507 error(0, "redundant $Domain in Cookie: header");
508 return -1;
509 }
510 }
511 } else {
512 /* It's a new cookie */
513 cd->cookies = xrealloc(cd->cookies,
514 (cd->ncookies + 1) * sizeof (struct cookie));
515 cd->cookies[cd->ncookies].name = n;
516 cd->cookies[cd->ncookies].value = v;
517 cd->cookies[cd->ncookies].path = 0;
518 cd->cookies[cd->ncookies].domain = 0;
519 ++cd->ncookies;
520 }
521 s = skipwhite(s, 0);
522 if(*s && (*s != ',' && *s != ';')) {
523 error(0, "missing separator in Cookie: header");
524 return -1;
525 }
526 }
527 return 0;
528}
529
530/** @brief Find a named cookie
531 * @param cd Parse cookie data
532 * @param name Name of cookie
533 * @return Cookie structure or NULL if not found
534 */
535const struct cookie *find_cookie(const struct cookiedata *cd,
536 const char *name) {
537 int n;
538
539 for(n = 0; n < cd->ncookies; ++n)
540 if(!strcmp(cd->cookies[n].name, name))
541 return &cd->cookies[n];
542 return 0;
543}
544
545/*
546Local Variables:
547c-basic-offset:2
548comment-column:40
549fill-column:79
550End:
551*/