1 /* mime-parser.c - Parse MIME structures (high level rfc822 parser).
2 * Copyright (C) 2016 g10 Code GmbH
4 * This file is part of GnuPG.
6 * GnuPG is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
11 * GnuPG is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <https://www.gnu.org/licenses/>.
26 #include "rfc822parse.h"
27 #include "mime-parser.h"
33 PGPMIME_WAIT_ENCVERSION,
34 PGPMIME_IN_ENCVERSION,
38 PGPMIME_WAIT_SIGNEDDATA,
39 PGPMIME_IN_SIGNEDDATA,
40 PGPMIME_WAIT_SIGNATURE,
42 PGPMIME_GOT_SIGNATURE,
47 /* Definition of the mime parser object. */
48 struct mime_parser_context_s
50 void *cookie; /* Cookie passed to all callbacks. */
52 /* The callback to announce a new part. */
53 gpg_error_t (*new_part) (void *cookie,
54 const char *mediatype,
55 const char *mediasubtype);
56 /* The callback to return data of a part. */
57 gpg_error_t (*part_data) (void *cookie,
60 /* The callback to collect encrypted data. */
61 gpg_error_t (*collect_encrypted) (void *cookie, const char *data);
62 /* The callback to collect signed data. */
63 gpg_error_t (*collect_signeddata) (void *cookie, const char *data);
64 /* The callback to collect a signature. */
65 gpg_error_t (*collect_signature) (void *cookie, const char *data);
67 /* The RFC822 parser context is stored here during callbacks. */
70 /* Helper to convey error codes from user callbacks. */
73 int nesting_level; /* The current nesting level. */
74 int hashing_at_level; /* The nesting level at which we are hashing. */
75 enum pgpmime_states pgpmime; /* Current PGP/MIME state. */
76 unsigned int delay_hashing:1;/* Helper for PGPMIME_IN_SIGNEDDATA. */
77 unsigned int want_part:1; /* Return the current part. */
78 unsigned int decode_part:2; /* Decode the part. 1 = QP, 2 = Base64. */
80 unsigned int verbose:1; /* Enable verbose mode. */
81 unsigned int debug:1; /* Enable debug mode. */
83 /* Flags to help with debug output. */
85 unsigned int n_skip; /* Skip showing these number of lines. */
86 unsigned int header:1; /* Show the header lines. */
87 unsigned int data:1; /* Show the data lines. */
88 unsigned int as_note:1; /* Show the next data line as a note. */
89 unsigned int boundary : 1;
92 struct b64state *b64state; /* NULL or malloced Base64 decoder state. */
94 /* A buffer for reading a mail line, */
99 /* Print the event received by the parser for debugging. */
101 show_message_parser_event (rfc822parse_event_t event)
107 case RFC822PARSE_OPEN: s= "Open"; break;
108 case RFC822PARSE_CLOSE: s= "Close"; break;
109 case RFC822PARSE_CANCEL: s= "Cancel"; break;
110 case RFC822PARSE_T2BODY: s= "T2Body"; break;
111 case RFC822PARSE_FINISH: s= "Finish"; break;
112 case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break;
113 case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break;
114 case RFC822PARSE_LEVEL_UP: s= "Level_Up"; break;
115 case RFC822PARSE_BOUNDARY: s= "Boundary"; break;
116 case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break;
117 case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break;
118 case RFC822PARSE_PREAMBLE: s= "Preamble"; break;
119 case RFC822PARSE_EPILOGUE: s= "Epilogue"; break;
120 default: s= "[unknown event]"; break;
122 log_debug ("*** RFC822 event %s\n", s);
126 /* Do in-place decoding of quoted-printable data of LENGTH in BUFFER.
127 Returns the new length of the buffer and stores true at R_SLBRK if
128 the line ended with a soft line break; false is stored if not.
129 This function asssumes that a complete line is passed in
132 qp_decode (char *buffer, size_t length, int *r_slbrk)
139 /* Fixme: We should remove trailing white space first. */
140 for (s=d=buffer; length; length--)
144 if (length > 2 && hexdigitp (s+1) && hexdigitp (s+2))
147 *(unsigned char*)d++ = xtoi_2 (s);
151 else if (length > 2 && s[1] == '\r' && s[2] == '\n')
153 /* Soft line break. */
156 if (r_slbrk && length == 1)
159 else if (length > 1 && s[1] == '\n')
161 /* Soft line break with only a Unix line terminator. */
164 if (r_slbrk && length == 1)
167 else if (length == 1)
169 /* Soft line break at the end of the line. */
185 /* This function is called by parse_mail to communicate events. This
186 * callback communicates with the caller using a structure passed in
187 * OPAQUE. Should return 0 on success or set ERRNO and return -1. */
189 parse_message_cb (void *opaque, rfc822parse_event_t event, rfc822parse_t msg)
191 mime_parser_t ctx = opaque;
195 /* Make the RFC822 parser context availabale for callbacks. */
199 show_message_parser_event (event);
201 if (event == RFC822PARSE_BEGIN_HEADER || event == RFC822PARSE_T2BODY)
203 /* We need to check here whether to start collecting signed data
204 * because attachments might come without header lines and thus
205 * we won't see the BEGIN_HEADER event. */
206 if (ctx->pgpmime == PGPMIME_WAIT_SIGNEDDATA)
209 log_debug ("begin_hash\n");
210 ctx->hashing_at_level = ctx->nesting_level;
211 ctx->pgpmime = PGPMIME_IN_SIGNEDDATA;
212 ctx->delay_hashing = 0;
216 if (event == RFC822PARSE_OPEN)
218 /* Initialize for a new message. */
219 ctx->show.header = 1;
221 else if (event == RFC822PARSE_T2BODY)
223 rfc822parse_field_t field;
226 ctx->decode_part = 0;
227 field = rfc822parse_parse_field (msg, "Content-Type", -1);
232 s1 = rfc822parse_query_media_type (field, &s2);
236 log_debug ("h media: %*s%s %s\n",
237 ctx->nesting_level*2, "", s1, s2);
238 if (ctx->pgpmime == PGPMIME_WAIT_ENCVERSION)
240 if (!strcmp (s1, "application")
241 && !strcmp (s2, "pgp-encrypted"))
244 log_debug ("c begin_encversion\n");
245 ctx->pgpmime = PGPMIME_IN_ENCVERSION;
249 log_error ("invalid PGP/MIME structure;"
250 " expected '%s', got '%s/%s'\n",
251 "application/pgp-encrypted", s1, s2);
252 ctx->pgpmime = PGPMIME_INVALID;
255 else if (ctx->pgpmime == PGPMIME_WAIT_ENCDATA)
257 if (!strcmp (s1, "application")
258 && !strcmp (s2, "octet-stream"))
261 log_debug ("c begin_encdata\n");
262 ctx->pgpmime = PGPMIME_IN_ENCDATA;
266 log_error ("invalid PGP/MIME structure;"
267 " expected '%s', got '%s/%s'\n",
268 "application/octet-stream", s1, s2);
269 ctx->pgpmime = PGPMIME_INVALID;
272 else if (ctx->pgpmime == PGPMIME_WAIT_SIGNATURE)
274 if (!strcmp (s1, "application")
275 && !strcmp (s2, "pgp-signature"))
278 log_debug ("c begin_signature\n");
279 ctx->pgpmime = PGPMIME_IN_SIGNATURE;
283 log_error ("invalid PGP/MIME structure;"
284 " expected '%s', got '%s/%s'\n",
285 "application/pgp-signature", s1, s2);
286 ctx->pgpmime = PGPMIME_INVALID;
289 else if (!strcmp (s1, "multipart")
290 && !strcmp (s2, "encrypted"))
292 s = rfc822parse_query_parameter (field, "protocol", 0);
296 log_debug ("h encrypted.protocol: %s\n", s);
297 if (!strcmp (s, "application/pgp-encrypted"))
301 "ignoring nested PGP/MIME signature\n");
303 ctx->pgpmime = PGPMIME_WAIT_ENCVERSION;
305 else if (ctx->verbose)
306 log_debug ("# this protocol is not supported\n");
309 else if (!strcmp (s1, "multipart")
310 && !strcmp (s2, "signed"))
312 s = rfc822parse_query_parameter (field, "protocol", 1);
316 log_debug ("h signed.protocol: %s\n", s);
317 if (!strcmp (s, "application/pgp-signature"))
321 "ignoring nested PGP/MIME signature\n");
323 ctx->pgpmime = PGPMIME_WAIT_SIGNEDDATA;
325 else if (ctx->verbose)
326 log_debug ("# this protocol is not supported\n");
329 else if (ctx->new_part)
331 ctx->err = ctx->new_part (ctx->cookie, s1, s2);
334 else if (gpg_err_code (ctx->err) == GPG_ERR_FALSE)
336 else if (gpg_err_code (ctx->err) == GPG_ERR_TRUE)
338 ctx->want_part = ctx->decode_part = 1;
346 log_debug ("h media: %*s none\n", ctx->nesting_level*2, "");
349 ctx->err = ctx->new_part (ctx->cookie, "", "");
352 else if (gpg_err_code (ctx->err) == GPG_ERR_FALSE)
354 else if (gpg_err_code (ctx->err) == GPG_ERR_TRUE)
356 ctx->want_part = ctx->decode_part = 1;
362 rfc822parse_release_field (field);
367 log_debug ("h media: %*stext plain [assumed]\n",
368 ctx->nesting_level*2, "");
371 ctx->err = ctx->new_part (ctx->cookie, "text", "plain");
374 else if (gpg_err_code (ctx->err) == GPG_ERR_FALSE)
376 else if (gpg_err_code (ctx->err) == GPG_ERR_TRUE)
378 ctx->want_part = ctx->decode_part = 1;
384 /* Figure out the encoding if needed. */
385 if (ctx->decode_part)
390 ctx->decode_part = 0; /* Fallback for unknown encoding. */
391 value = rfc822parse_get_field (msg, "Content-Transfer-Encoding", -1,
395 if (!stricmp (value+valueoff, "quoted-printable"))
396 ctx->decode_part = 1;
397 else if (!stricmp (value+valueoff, "base64"))
399 ctx->decode_part = 2;
401 b64dec_finish (ctx->b64state); /* Reuse state. */
404 ctx->b64state = xtrymalloc (sizeof *ctx->b64state);
406 rc = gpg_error_from_syserror ();
409 rc = b64dec_start (ctx->b64state, NULL);
411 free (value); /* Right, we need a plain free. */
415 ctx->show.header = 0;
417 ctx->show.n_skip = 1;
419 else if (event == RFC822PARSE_PREAMBLE)
420 ctx->show.as_note = 1;
421 else if (event == RFC822PARSE_LEVEL_DOWN)
424 log_debug ("b down\n");
425 ctx->nesting_level++;
427 else if (event == RFC822PARSE_LEVEL_UP)
430 log_debug ("b up\n");
431 if (ctx->nesting_level)
432 ctx->nesting_level--;
434 log_error ("invalid structure (bad nesting level)\n");
436 else if (event == RFC822PARSE_BOUNDARY || event == RFC822PARSE_LAST_BOUNDARY)
439 ctx->show.boundary = 1;
440 if (event == RFC822PARSE_BOUNDARY)
442 ctx->show.header = 1;
443 ctx->show.n_skip = 1;
445 log_debug ("b part\n");
448 log_debug ("b last\n");
450 if (ctx->pgpmime == PGPMIME_IN_ENCDATA)
453 log_debug ("c end_encdata\n");
454 ctx->pgpmime = PGPMIME_GOT_ENCDATA;
455 /* FIXME: We should assert (event == LAST_BOUNDARY). */
457 else if (ctx->pgpmime == PGPMIME_IN_SIGNEDDATA
458 && ctx->nesting_level == ctx->hashing_at_level)
461 log_debug ("c end_hash\n");
462 ctx->pgpmime = PGPMIME_WAIT_SIGNATURE;
463 if (ctx->collect_signeddata)
464 ctx->err = ctx->collect_signeddata (ctx->cookie, NULL);
466 else if (ctx->pgpmime == PGPMIME_IN_SIGNATURE)
469 log_debug ("c end_signature\n");
470 ctx->pgpmime = PGPMIME_GOT_SIGNATURE;
471 /* FIXME: We should assert (event == LAST_BOUNDARY). */
473 else if (ctx->want_part)
477 /* FIXME: We may need to flush things. */
478 ctx->err = ctx->part_data (ctx->cookie, NULL, 0);
490 /* Create a new mime parser object. COOKIE is a values which will be
491 * used as first argument for all callbacks registered with this
494 mime_parser_new (mime_parser_t *r_parser, void *cookie)
500 ctx = xtrycalloc (1, sizeof *ctx);
502 return gpg_error_from_syserror ();
503 ctx->cookie = cookie;
510 /* Release a mime parser object. */
512 mime_parser_release (mime_parser_t ctx)
519 b64dec_finish (ctx->b64state);
520 xfree (ctx->b64state);
526 /* Set verbose and debug mode. */
528 mime_parser_set_verbose (mime_parser_t ctx, int level)
544 /* Set the callback used to announce a new part. It will be called
545 * with the media type and media subtype of the part. If no
546 * Content-type header was given both values are the empty string.
547 * The callback should return 0 on success or an error code. The
548 * error code GPG_ERR_FALSE indicates that the caller is not
549 * interested in the part and data shall not be returned via a
550 * registered part_data callback. The error code GPG_ERR_TRUE
551 * indicates that the parts shall be redurned in decoded format
552 * (i.e. base64 or QP encoding is removed). */
554 mime_parser_set_new_part (mime_parser_t ctx,
555 gpg_error_t (*fnc) (void *cookie,
556 const char *mediatype,
557 const char *mediasubtype))
563 /* Set the callback used to return the data of a part to the caller.
564 * The end of the part is indicated by passing NUL for DATA. */
566 mime_parser_set_part_data (mime_parser_t ctx,
567 gpg_error_t (*fnc) (void *cookie,
571 ctx->part_data = fnc;
575 /* Set the callback to collect encrypted data. A NULL passed to the
576 * callback indicates the end of the encrypted data; the callback may
577 * then decrypt the collected data. */
579 mime_parser_set_collect_encrypted (mime_parser_t ctx,
580 gpg_error_t (*fnc) (void *cookie,
583 ctx->collect_encrypted = fnc;
587 /* Set the callback to collect signed data. A NULL passed to the
588 * callback indicates the end of the signed data. */
590 mime_parser_set_collect_signeddata (mime_parser_t ctx,
591 gpg_error_t (*fnc) (void *cookie,
594 ctx->collect_signeddata = fnc;
598 /* Set the callback to collect the signature. A NULL passed to the
599 * callback indicates the end of the signature; the callback may the
600 * verify the signature. */
602 mime_parser_set_collect_signature (mime_parser_t ctx,
603 gpg_error_t (*fnc) (void *cookie,
606 ctx->collect_signature = fnc;
610 /* Return the RFC888 parser context. This is only available inside a
613 mime_parser_rfc822parser (mime_parser_t ctx)
619 /* Helper for mime_parser_parse. */
621 process_part_data (mime_parser_t ctx, char *line, size_t *length)
631 if (ctx->decode_part == 1)
633 *length = qp_decode (line, *length, NULL);
635 else if (ctx->decode_part == 2)
637 log_assert (ctx->b64state);
638 err = b64dec_proc (ctx->b64state, line, *length, &nbytes);
644 return ctx->part_data (ctx->cookie, line, *length);
648 /* Read and parse a message from FP and call the appropriate
651 mime_parser_parse (mime_parser_t ctx, estream_t fp)
654 rfc822parse_t msg = NULL;
655 unsigned int lineno = 0;
661 msg = rfc822parse_open (parse_message_cb, ctx);
664 err = gpg_error_from_syserror ();
665 log_error ("can't open mail parser: %s", gpg_strerror (err));
669 /* Fixme: We should not use fgets because it can't cope with
670 embedded nul characters. */
671 while (es_fgets (ctx->line, sizeof (ctx->line), fp))
674 if (lineno == 1 && !strncmp (line, "From ", 5))
675 continue; /* We better ignore a leading From line. */
677 length = strlen (line);
678 if (length && line[length - 1] == '\n')
681 log_error ("mail parser detected too long or"
682 " non terminated last line (lnr=%u)\n", lineno);
683 if (length && line[length - 1] == '\r')
687 if (rfc822parse_insert (msg, line, length))
689 err = gpg_error_from_syserror ();
690 log_error ("mail parser failed: %s", gpg_strerror (err));
695 /* Error from a callback detected. */
701 /* Debug output. Note that the boundary is shown before n_skip
703 if (ctx->show.boundary)
706 log_debug ("# Boundary: %s\n", line);
707 ctx->show.boundary = 0;
709 if (ctx->show.n_skip)
711 else if (ctx->show.data)
713 if (ctx->show.as_note)
716 log_debug ("# Note: %s\n", line);
717 ctx->show.as_note = 0;
720 log_debug ("# Data: %s\n", line);
722 else if (ctx->show.header && ctx->verbose)
723 log_debug ("# Header: %s\n", line);
725 if (ctx->pgpmime == PGPMIME_IN_ENCVERSION)
727 trim_trailing_spaces (line);
729 ; /* Skip empty lines. */
730 else if (!strcmp (line, "Version: 1"))
731 ctx->pgpmime = PGPMIME_WAIT_ENCDATA;
734 log_error ("invalid PGP/MIME structure;"
735 " garbage in pgp-encrypted part ('%s')\n", line);
736 ctx->pgpmime = PGPMIME_INVALID;
739 else if (ctx->pgpmime == PGPMIME_IN_ENCDATA)
741 if (ctx->collect_encrypted)
743 err = ctx->collect_encrypted (ctx->cookie, line);
745 err = ctx->collect_encrypted (ctx->cookie, "\r\n");
750 else if (ctx->pgpmime == PGPMIME_GOT_ENCDATA)
752 ctx->pgpmime = PGPMIME_NONE;
753 if (ctx->collect_encrypted)
754 ctx->collect_encrypted (ctx->cookie, NULL);
756 else if (ctx->pgpmime == PGPMIME_IN_SIGNEDDATA)
758 /* If we are processing signed data, store the signed data.
759 * We need to delay the hashing of the CR/LF because the
760 * last line ending belongs to the next boundary. This is
761 * the reason why we can't use the PGPMIME state as a
764 log_debug ("# hashing %s'%s'\n",
765 ctx->delay_hashing? "CR,LF+":"", line);
766 if (ctx->collect_signeddata)
768 if (ctx->delay_hashing)
769 ctx->collect_signeddata (ctx->cookie, "\r\n");
770 ctx->collect_signeddata (ctx->cookie, line);
772 ctx->delay_hashing = 1;
774 err = process_part_data (ctx, line, &length);
778 else if (ctx->pgpmime == PGPMIME_IN_SIGNATURE)
780 if (ctx->collect_signeddata)
782 ctx->collect_signature (ctx->cookie, line);
783 ctx->collect_signature (ctx->cookie, "\r\n");
786 else if (ctx->pgpmime == PGPMIME_GOT_SIGNATURE)
788 ctx->pgpmime = PGPMIME_NONE;
789 if (ctx->collect_signeddata)
790 ctx->collect_signature (ctx->cookie, NULL);
794 err = process_part_data (ctx, line, &length);
800 rfc822parse_close (msg);
805 rfc822parse_cancel (msg);