tests and bug-fixes for mime.c

author Richard Kettlewell <rjk@greenend.org.uk>

Sat, 1 Dec 2007 15:33:54 +0000 (15:33 +0000)

committer Richard Kettlewell <rjk@greenend.org.uk>

Sat, 1 Dec 2007 15:33:54 +0000 (15:33 +0000)
author Richard Kettlewell <rjk@greenend.org.uk>
Sat, 1 Dec 2007 15:33:54 +0000 (15:33 +0000)
committer Richard Kettlewell <rjk@greenend.org.uk>
Sat, 1 Dec 2007 15:33:54 +0000 (15:33 +0000)
diff --git a/lib/mime.c b/lib/mime.c

index b1882140915927f08d51b2203ccae5710e263b66..03aa94b9a03b3b3b2040aca4f7a1d78ca7999286 100644 (file)
--- a/lib/mime.c
+++ b/lib/mime.c
@@ -25,6 +25,8 @@
  #include <string.h>
  #include <ctype.h>
  
+#include <stdio.h>
+
  #include "mem.h"
  #include "mime.h"
  #include "vector.h"
@@ -213,7 +215,7 @@ static int isboundary(const char *ptr, const char *boundary, size_t bl) {
           && (iscrlf(ptr + bl + 2)
               || (ptr[bl + 2] == '-'
                   && ptr[bl + 3] == '-'
-                 && iscrlf(ptr + bl + 4))));
+                 && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0))));
  }
  
  static int isfinal(const char *ptr, const char *boundary, size_t bl) {
@@ -222,7 +224,7 @@ static int isfinal(const char *ptr, const char *boundary, size_t bl) {
           && !strncmp(ptr + 2, boundary, bl)
           && ptr[bl + 2] == '-'
           && ptr[bl + 3] == '-'
-         && iscrlf(ptr + bl + 4));
+         && (iscrlf(ptr + bl + 4) || *(ptr + bl + 4) == 0));
  }
  
  int mime_multipart(const char *s,
@@ -233,12 +235,16 @@ int mime_multipart(const char *s,
    const char *start, *e;
    int ret;
  
-  if(!isboundary(s, boundary, bl)) return -1;
+  /* We must start with a boundary string */
+  if(!isboundary(s, boundary, bl))
+    return -1;
+  /* Keep going until we hit a final boundary */
    while(!isfinal(s, boundary, bl)) {
      s = strstr(s, "\r\n") + 2;
      start = s;
      while(!isboundary(s, boundary, bl)) {
-      if(!(e = strstr(s, "\r\n"))) return -1;
+      if(!(e = strstr(s, "\r\n")))
+       return -1;
        s = e + 2;
      }
      if((ret = callback(xstrndup(start,
diff --git a/lib/test.c b/lib/test.c

index 02deb029a17a764820a95cdf01299c8100723b19..3df6a9f76a87e6c6cbf9cfae48160db657f11128 100644 (file)
--- a/lib/test.c
+++ b/lib/test.c
@@ -137,6 +137,16 @@ static const char *format_utf32(const uint32_t *s) {
    ++tests;                                                              \
   } while(0)
  
+#define check_integer(GOT, WANT) do {                           \
+  const intmax_t got = GOT, want = WANT;                        \
+  if(got != want) {                                             \
+    fprintf(stderr, "%s:%d: %s returned: %jd  expected: %jd\n", \
+            __FILE__, __LINE__, #GOT, got, want);               \
+    count_error();                                              \
+  }                                                             \
+  ++tests;                                                      \
+} while(0)
+
  static uint32_t *ucs4parse(const char *s) {
    struct dynstr_ucs4 d;
    char *e;
@@ -166,7 +176,7 @@ static void test_utf8(void) {
    insist(!utf32_cmp(w, ucs));                  \
    u8 = utf32_to_utf8(ucs, utf32_len(ucs), 0);   \
    insist(u8 != 0);                             \
-  insist(!strcmp(u8, CHARS));                  \
+  check_string(u8, CHARS);                     \
  } while(0)
  
    fprintf(stderr, "test_utf8\n");
@@ -267,38 +277,163 @@ static void test_utf8(void) {
    insist(!validutf8("\xF8"));
  }
  
+static int test_multipart_callback(const char *s, void *u) {
+  struct vector *parts = u;
+
+  vector_append(parts, (char *)s);
+  return 0;
+}
+
  static void test_mime(void) {
    char *t, *n, *v;
+  struct vector parts[1];
  
    fprintf(stderr, "test_mime\n");
  
    t = n = v = 0;
    insist(!mime_content_type("text/plain", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
+  check_string(t, "text/plain");
    insist(n == 0);
    insist(v == 0);
  
+  insist(mime_content_type("TEXT ((broken) comment", &t, &n, &v) < 0);
+  insist(mime_content_type("TEXT ((broken) comment\\", &t, &n, &v) < 0);
+  
    t = n = v = 0;
-  insist(!mime_content_type("TEXT ((nested) comment) /plain", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
+  insist(!mime_content_type("TEXT ((nested)\\ comment) /plain", &t, &n, &v));
+  check_string(t, "text/plain");
    insist(n == 0);
    insist(v == 0);
  
    t = n = v = 0;
-  insist(!mime_content_type(" text/plain ; Charset=utf-8", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
-  insist(!strcmp(n, "charset"));
-  insist(!strcmp(v, "utf-8"));
+  insist(!mime_content_type(" text/plain ; Charset=\"utf-\\8\"", &t, &n, &v));
+  check_string(t, "text/plain");
+  check_string(n, "charset");
+  check_string(v, "utf-8");
  
    t = n = v = 0;
    insist(!mime_content_type("text/plain;charset = ISO-8859-1 ", &t, &n, &v));
-  insist(!strcmp(t, "text/plain"));
-  insist(!strcmp(n, "charset"));
-  insist(!strcmp(v, "ISO-8859-1"));
+  check_string(t, "text/plain");
+  check_string(n, "charset");
+  check_string(v, "ISO-8859-1");
+
+  t = n = v = 0;
+  insist(!mime_rfc2388_content_disposition("form-data; name=\"field1\"", &t, &n, &v));
+  check_string(t, "form-data");
+  check_string(n, "name");
+  check_string(v, "field1");
+
+  insist(!mime_rfc2388_content_disposition("inline", &t, &n, &v));
+  check_string(t, "inline");
+  insist(n == 0);
+  insist(v == 0);
  
+  /* Current versions of the code only understand a single arg to these
+   * headers.  This is a bug at the level they work at but suffices for
+   * DisOrder's current purposes. */
+
+  insist(!mime_rfc2388_content_disposition(
+              "attachment; filename=genome.jpeg;\n"
+              "modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"",
+         &t, &n, &v));
+  check_string(t, "attachment");
+  check_string(n, "filename");
+  check_string(v, "genome.jpeg");
+
+  vector_init(parts);
+  insist(mime_multipart("--outer\r\n"
+                        "Content-Type: text/plain\r\n"
+                        "Content-Disposition: inline\r\n"
+                        "Content-Description: text-part-1\r\n"
+                        "\r\n"
+                        "Some text goes here\r\n"
+                        "\r\n"
+                        "--outer\r\n"
+                        "Content-Type: multipart/mixed; boundary=inner\r\n"
+                        "Content-Disposition: attachment\r\n"
+                        "Content-Description: multipart-2\r\n"
+                        "\r\n"
+                        "--inner\r\n"
+                        "Content-Type: text/plain\r\n"
+                        "Content-Disposition: inline\r\n"
+                        "Content-Description: text-part-2\r\n"
+                        "\r\n"
+                        "Some more text here.\r\n"
+                        "\r\n"
+                        "--inner\r\n"
+                        "Content-Type: image/jpeg\r\n"
+                        "Content-Disposition: attachment\r\n"
+                        "Content-Description: jpeg-1\r\n"
+                        "\r\n"
+                        "<jpeg data>\r\n"
+                        "--inner--\r\n"
+                        "--outer--\r\n",
+                        test_multipart_callback,
+                        "outer",
+                        parts) == 0);
+  check_integer(parts->nvec, 2);
+  check_string(parts->vec[0],
+               "Content-Type: text/plain\r\n"
+               "Content-Disposition: inline\r\n"
+               "Content-Description: text-part-1\r\n"
+               "\r\n"
+               "Some text goes here\r\n");
+  check_string(parts->vec[1],
+               "Content-Type: multipart/mixed; boundary=inner\r\n"
+               "Content-Disposition: attachment\r\n"
+               "Content-Description: multipart-2\r\n"
+               "\r\n"
+               "--inner\r\n"
+               "Content-Type: text/plain\r\n"
+               "Content-Disposition: inline\r\n"
+               "Content-Description: text-part-2\r\n"
+               "\r\n"
+               "Some more text here.\r\n"
+               "\r\n"
+               "--inner\r\n"
+               "Content-Type: image/jpeg\r\n"
+               "Content-Disposition: attachment\r\n"
+               "Content-Description: jpeg-1\r\n"
+               "\r\n"
+               "<jpeg data>\r\n"
+               "--inner--");
+  /* No trailing CRLF is _correct_ - see RFC2046 5.1.1 note regarding CRLF
+   * preceding the boundary delimiter line.  An implication of this is that we
+   * must cope with partial lines at the end of the input when recursively
+   * decomposing a multipart message. */
+  vector_init(parts);
+  insist(mime_multipart("--inner\r\n"
+                        "Content-Type: text/plain\r\n"
+                        "Content-Disposition: inline\r\n"
+                        "Content-Description: text-part-2\r\n"
+                        "\r\n"
+                        "Some more text here.\r\n"
+                        "\r\n"
+                        "--inner\r\n"
+                        "Content-Type: image/jpeg\r\n"
+                        "Content-Disposition: attachment\r\n"
+                        "Content-Description: jpeg-1\r\n"
+                        "\r\n"
+                        "<jpeg data>\r\n"
+                        "--inner--",
+                        test_multipart_callback,
+                        "inner",
+                        parts) == 0);
+  check_integer(parts->nvec, 2);
+  check_string(parts->vec[0],
+               "Content-Type: text/plain\r\n"
+               "Content-Disposition: inline\r\n"
+               "Content-Description: text-part-2\r\n"
+               "\r\n"
+               "Some more text here.\r\n");
+  check_string(parts->vec[1],
+               "Content-Type: image/jpeg\r\n"
+               "Content-Disposition: attachment\r\n"
+               "Content-Description: jpeg-1\r\n"
+               "\r\n"
+               "<jpeg data>");
+ 
    /* XXX mime_parse */
-  /* XXX mime_multipart */
-  /* XXX mime_rfc2388_content_disposition */
  
    check_string(mime_qp(""), "");
    check_string(mime_qp("foobar"), "foobar");
@@ -410,8 +545,8 @@ static void test_casefold(void) {
        l = 0x3BC;                       /* GREEK SMALL LETTER MU */
        break;
      case 0xDF:                         /* LATIN SMALL LETTER SHARP S */
-      insist(!strcmp(canon_folded, "ss"));
-      insist(!strcmp(compat_folded, "ss"));
+      check_string(canon_folded, "ss");
+      check_string(compat_folded, "ss");
        l = 0;
        break;
      }
author	Richard Kettlewell <rjk@greenend.org.uk>
	Sat, 1 Dec 2007 15:33:54 +0000 (15:33 +0000)
committer	Richard Kettlewell <rjk@greenend.org.uk>
	Sat, 1 Dec 2007 15:33:54 +0000 (15:33 +0000)
lib/mime.c		patch \| blob \| blame \| history
lib/test.c		patch \| blob \| blame \| history