chiark / gitweb /
tests and bug-fixes for mime.c
authorRichard Kettlewell <rjk@greenend.org.uk>
Sat, 1 Dec 2007 15:33:54 +0000 (15:33 +0000)
committerRichard Kettlewell <rjk@greenend.org.uk>
Sat, 1 Dec 2007 15:33:54 +0000 (15:33 +0000)
lib/mime.c
lib/test.c

index b188214..03aa94b 100644 (file)
@@ -25,6 +25,8 @@
 #include <string.h>
 #include <ctype.h>
 
+#include <stdio.h>
+
 #include "mem.h"
 #include "mime.h"
 #include "vector.h"
@@ -213,7 +215,7 @@ static int isboundary(const char *ptr, const char *boundary, size_t bl) {
          && (iscrlf(ptr + bl + 2)
              || (ptr[bl + 2] == '-'
                  && ptr[bl + 3] == '-'
-                 && iscrlf(ptr + bl + 4))));
+                 && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0))));
 }
 
 static int isfinal(const char *ptr, const char *boundary, size_t bl) {
@@ -222,7 +224,7 @@ static int isfinal(const char *ptr, const char *boundary, size_t bl) {
          && !strncmp(ptr + 2, boundary, bl)
          && ptr[bl + 2] == '-'
          && ptr[bl + 3] == '-'
-         && iscrlf(ptr + bl + 4));
+         && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0));
 }
 
 int mime_multipart(const char *s,
@@ -233,12 +235,16 @@ int mime_multipart(const char *s,
   const char *start, *e;
   int ret;
 
-  if(!isboundary(s, boundary, bl)) return -1;
+  /* We must start with a boundary string */
+  if(!isboundary(s, boundary, bl))
+    return -1;
+  /* Keep going until we hit a final boundary */
   while(!isfinal(s, boundary, bl)) {
     s = strstr(s, "\r\n") + 2;
     start = s;
     while(!isboundary(s, boundary, bl)) {
-      if(!(e = strstr(s, "\r\n"))) return -1;
+      if(!(e = strstr(s, "\r\n")))
+       return -1;
       s = e + 2;
     }
     if((ret = callback(xstrndup(start,
index 02deb02..3df6a9f 100644 (file)
@@ -137,6 +137,16 @@ static const char *format_utf32(const uint32_t *s) {
   ++tests;                                                              \
  } while(0)
 
+#define check_integer(GOT, WANT) do {                           \
+  const intmax_t got = GOT, want = WANT;                        \
+  if(got != want) {                                             \
+    fprintf(stderr, "%s:%d: %s returned: %jd  expected: %jd\n", \
+            __FILE__, __LINE__, #GOT, got, want);               \
+    count_error();                                              \
+  }                                                             \
+  ++tests;                                                      \
+} while(0)
+
 static uint32_t *ucs4parse(const char *s) {
   struct dynstr_ucs4 d;
   char *e;
@@ -166,7 +176,7 @@ static void test_utf8(void) {
   insist(!utf32_cmp(w, ucs));                  \
   u8 = utf32_to_utf8(ucs, utf32_len(ucs), 0);   \
   insist(u8 != 0);                             \
-  insist(!strcmp(u8, CHARS));                  \
+  check_string(u8, CHARS);                     \
 } while(0)
 
   fprintf(stderr, "test_utf8\n");
@@ -267,38 +277,163 @@ static void test_utf8(void) {
   insist(!validutf8("\xF8"));
 }
 
+static int test_multipart_callback(const char *s, void *u) {
+  struct vector *parts = u;
+
+  vector_append(parts, (char *)s);
+  return 0;
+}
+
 static void test_mime(void) {
   char *t, *n, *v;
+  struct vector parts[1];
 
   fprintf(stderr, "test_mime\n");
 
   t = n = v = 0;
   insist(!mime_content_type("text/plain", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
+  check_string(t, "text/plain");
   insist(n == 0);
   insist(v == 0);
 
+  insist(mime_content_type("TEXT ((broken) comment", &t, &n, &v) < 0);
+  insist(mime_content_type("TEXT ((broken) comment\\", &t, &n, &v) < 0);
+  
   t = n = v = 0;
-  insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
+  insist(!mime_content_type("TEXT ((nested)\\ comment) /plain", &t, &n, &v));
+  check_string(t, "text/plain");
   insist(n == 0);
   insist(v == 0);
 
   t = n = v = 0;
-  insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
-  insist(!strcmp(n, "charset"));
-  insist(!strcmp(v, "utf-8"));
+  insist(!mime_content_type(" text/plain ; Charset=\"utf-\\8\"", &t, &n, &v));
+  check_string(t, "text/plain");
+  check_string(n, "charset");
+  check_string(v, "utf-8");
 
   t = n = v = 0;
   insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
-  insist(!strcmp(n, "charset"));
-  insist(!strcmp(v, "ISO-8859-1"));
+  check_string(t, "text/plain");
+  check_string(n, "charset");
+  check_string(v, "ISO-8859-1");
+
+  t = n = v = 0;
+  insist(!mime_rfc2388_content_disposition("form-data; name=\"field1\"", &t, &n, &v));
+  check_string(t, "form-data");
+  check_string(n, "name");
+  check_string(v, "field1");
+
+  insist(!mime_rfc2388_content_disposition("inline", &t, &n, &v));
+  check_string(t, "inline");
+  insist(n == 0);
+  insist(v == 0);
 
+  /* Current versions of the code only understand a single arg to these
+   * headers.  This is a bug at the level they work at but suffices for
+   * DisOrder's current purposes. */
+
+  insist(!mime_rfc2388_content_disposition(
+              "attachment; filename=genome.jpeg;\n"
+              "modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"",
+         &t, &n, &v));
+  check_string(t, "attachment");
+  check_string(n, "filename");
+  check_string(v, "genome.jpeg");
+
+  vector_init(parts);
+  insist(mime_multipart("--outer\r\n"
+                        "Content-Type: text/plain\r\n"
+                        "Content-Disposition: inline\r\n"
+                        "Content-Description: text-part-1\r\n"
+                        "\r\n"
+                        "Some text goes here\r\n"
+                        "\r\n"
+                        "--outer\r\n"
+                        "Content-Type: multipart/mixed; boundary=inner\r\n"
+                        "Content-Disposition: attachment\r\n"
+                        "Content-Description: multipart-2\r\n"
+                        "\r\n"
+                        "--inner\r\n"
+                        "Content-Type: text/plain\r\n"
+                        "Content-Disposition: inline\r\n"
+                        "Content-Description: text-part-2\r\n"
+                        "\r\n"
+                        "Some more text here.\r\n"
+                        "\r\n"
+                        "--inner\r\n"
+                        "Content-Type: image/jpeg\r\n"
+                        "Content-Disposition: attachment\r\n"
+                        "Content-Description: jpeg-1\r\n"
+                        "\r\n"
+                        "<jpeg data>\r\n"
+                        "--inner--\r\n"
+                        "--outer--\r\n",
+                        test_multipart_callback,
+                        "outer",
+                        parts) == 0);
+  check_integer(parts->nvec, 2);
+  check_string(parts->vec[0],
+               "Content-Type: text/plain\r\n"
+               "Content-Disposition: inline\r\n"
+               "Content-Description: text-part-1\r\n"
+               "\r\n"
+               "Some text goes here\r\n");
+  check_string(parts->vec[1],
+               "Content-Type: multipart/mixed; boundary=inner\r\n"
+               "Content-Disposition: attachment\r\n"
+               "Content-Description: multipart-2\r\n"
+               "\r\n"
+               "--inner\r\n"
+               "Content-Type: text/plain\r\n"
+               "Content-Disposition: inline\r\n"
+               "Content-Description: text-part-2\r\n"
+               "\r\n"
+               "Some more text here.\r\n"
+               "\r\n"
+               "--inner\r\n"
+               "Content-Type: image/jpeg\r\n"
+               "Content-Disposition: attachment\r\n"
+               "Content-Description: jpeg-1\r\n"
+               "\r\n"
+               "<jpeg data>\r\n"
+               "--inner--");
+  /* No trailing CRLF is _correct_ - see RFC2046 5.1.1 note regarding CRLF
+   * preceding the boundary delimiter line.  An implication of this is that we
+   * must cope with partial lines at the end of the input when recursively
+   * decomposing a multipart message. */
+  vector_init(parts);
+  insist(mime_multipart("--inner\r\n"
+                        "Content-Type: text/plain\r\n"
+                        "Content-Disposition: inline\r\n"
+                        "Content-Description: text-part-2\r\n"
+                        "\r\n"
+                        "Some more text here.\r\n"
+                        "\r\n"
+                        "--inner\r\n"
+                        "Content-Type: image/jpeg\r\n"
+                        "Content-Disposition: attachment\r\n"
+                        "Content-Description: jpeg-1\r\n"
+                        "\r\n"
+                        "<jpeg data>\r\n"
+                        "--inner--",
+                        test_multipart_callback,
+                        "inner",
+                        parts) == 0);
+  check_integer(parts->nvec, 2);
+  check_string(parts->vec[0],
+               "Content-Type: text/plain\r\n"
+               "Content-Disposition: inline\r\n"
+               "Content-Description: text-part-2\r\n"
+               "\r\n"
+               "Some more text here.\r\n");
+  check_string(parts->vec[1],
+               "Content-Type: image/jpeg\r\n"
+               "Content-Disposition: attachment\r\n"
+               "Content-Description: jpeg-1\r\n"
+               "\r\n"
+               "<jpeg data>");
   /* XXX mime_parse */
-  /* XXX mime_multipart */
-  /* XXX mime_rfc2388_content_disposition */
 
   check_string(mime_qp(""), "");
   check_string(mime_qp("foobar"), "foobar");
@@ -410,8 +545,8 @@ static void test_casefold(void) {
       l = 0x3BC;                       /* GREEK SMALL LETTER MU */
       break;
     case 0xDF:                         /* LATIN SMALL LETTER SHARP S */
-      insist(!strcmp(canon_folded, "ss"));
-      insist(!strcmp(compat_folded, "ss"));
+      check_string(canon_folded, "ss");
+      check_string(compat_folded, "ss");
       l = 0;
       break;
     }