From df9432761e526bfe11e0a1e76609afbd4a2f131e Mon Sep 17 00:00:00 2001
From: Wilfried Goesgens <dothebart@citadel.org>
Date: Sun, 28 Nov 2010 19:46:18 +0100
Subject: [PATCH] Mime-parser rework: parse the mime headers of the current
 container so we can guess the size of the content to jump accross;
 binary/ascii is used to switch the search algorithm for the next boundary.

 - the old mime parser version skipped empty containers; we do this by hand
 - for some reason the numbers addressing the mimeparts grow different, maybe mixed use of old/new may cause confusion...
---
 libcitadel/lib/mime_parser.c | 236 ++++++++++++++++++++++++++---------
 1 file changed, 180 insertions(+), 56 deletions(-)

diff --git a/libcitadel/lib/mime_parser.c b/libcitadel/lib/mime_parser.c
index 1468375b0..aa029b455 100644
--- a/libcitadel/lib/mime_parser.c
+++ b/libcitadel/lib/mime_parser.c
@@ -346,23 +346,31 @@ typedef struct _interesting_mime_headers {
 	long is_multipart;
 } interesting_mime_headers;
 
-interesting_mime_headers *InitInterestingMimes(void)
+
+static void FlushInterestingMimes(interesting_mime_headers *m)
 {
 	int i;
-	interesting_mime_headers *m;
-	m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
 	
 	for (i = 0; i < eMax; i++) {
 	     m->b[i].Key[0] = '\0';
 	     m->b[i].len = 0;
 	}
 	m->content_length = -1;
-	return m;
 }
+static interesting_mime_headers *InitInterestingMimes(void)
+{
+	interesting_mime_headers *m;
+	m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
+
+	FlushInterestingMimes(m);
 
+	return m;
+}
 
 
-long parse_MimeHeaders(interesting_mime_headers *m, char** pcontent_start, char *content_end)
+static long parse_MimeHeaders(interesting_mime_headers *m, 
+			      char** pcontent_start, 
+			      char *content_end)
 {
 	char buf[SIZ];
 	char header[SIZ];
@@ -412,7 +420,7 @@ long parse_MimeHeaders(interesting_mime_headers *m, char** pcontent_start, char
 				m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
 				m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
 				pch = strchr(m->b[disposition].Key, ';');
-				if (pch != NULL) *ptr = '\0';
+				if (pch != NULL) *pch = '\0';
 				m->b[disposition].len = striplt(m->b[disposition].Key);
 			}
 			else if (!strncasecmp(header, "Content-ID:", 11)) {
@@ -450,49 +458,111 @@ long parse_MimeHeaders(interesting_mime_headers *m, char** pcontent_start, char
 	return 0;
 }
 
+
+static int IsAsciiEncoding(interesting_mime_headers *m)
+{
+	if ((m->b[encoding].len != 0) &&
+	    (strcmp(m->b[encoding].Key, "binary") == 0))
+		return 0;
+	else 
+		return 1;
+}
+
+static char *FindNextContent(char *ptr,
+			     char *content_end,
+			     interesting_mime_headers *SubMimeHeaders,
+			     interesting_mime_headers *m)
+{
+	char *next_boundary;
+	char  tmp;
+
+	if (IsAsciiEncoding(SubMimeHeaders)) {
+		tmp = *content_end;
+		*content_end = '\0';
+
+		/** 
+		 * ok, if we have a content length of the mime part, 
+		 * try skipping the content on the search for the next
+		 * boundary. since we don't trust the content_length
+		 * to be all accurate, and suspect it to lose one digit 
+		 * per line with a line length of 80 chars, we need 
+		 * to start searching a little before..
+		 */
+				   
+		if ((SubMimeHeaders->content_length != -1) &&
+		    (SubMimeHeaders->content_length > 10))
+		{
+			char *pptr;
+			long lines;
+					
+			lines = SubMimeHeaders->content_length / 80;
+			pptr = ptr + SubMimeHeaders->content_length - lines - 10;
+			if (pptr < content_end)
+				ptr = pptr;
+		}
+			
+		next_boundary = strstr(ptr, m->b[startary].Key);
+		*content_end = tmp;
+	}
+	else {
+		char *srch;
+		/** 
+		 * ok, if we have a content length of the mime part, 
+		 * try skipping the content on the search for the next
+		 * boundary. since we don't trust the content_length
+		 * to be all accurate, start searching a little before..
+		 */
+				   
+		if ((SubMimeHeaders->content_length != -1) &&
+		    (SubMimeHeaders->content_length > 10))
+		{
+			char *pptr;
+			pptr = ptr + SubMimeHeaders->content_length - 10;
+			if (pptr < content_end)
+				ptr = pptr;
+		}
+		next_boundary = NULL;
+		for (srch=ptr; srch<content_end; ++srch) {
+			if (!memcmp(srch, 
+				    m->b[startary].Key, 
+				    m->b[startary].len)) 
+			{
+				next_boundary = srch;
+				srch = content_end;
+			}
+		}
+
+	}
+	return next_boundary;
+}
+
 /*
  * Break out the components of a multipart message
  * (This function expects to be fed HEADERS + CONTENT)
  * Note: NULL can be supplied as content_end; in this case, the message is
  * considered to have ended when the parser encounters a 0x00 byte.
  */
-void the_mime_parser(char *partnum,
-		     char *content_start, char *content_end,
-		     MimeParserCallBackType CallBack,
-		     MimeParserCallBackType PreMultiPartCallBack,
-		     MimeParserCallBackType PostMultiPartCallBack,
-		     void *userdata,
-		     int dont_decode)
+static void recurseable_mime_parser(char *partnum,
+				    char *content_start, char *content_end,
+				    MimeParserCallBackType CallBack,
+				    MimeParserCallBackType PreMultiPartCallBack,
+				    MimeParserCallBackType PostMultiPartCallBack,
+				    void *userdata,
+				    int dont_decode, 
+				    interesting_mime_headers *m)
 {
+	interesting_mime_headers *SubMimeHeaders;
+	char     *ptr;
+	char     *part_start;
+	char     *part_end = NULL;
+	char     *evaluate_crlf_ptr = NULL;
+	char     *next_boundary;
+	char      nested_partnum[256];
+	int       crlf_in_use = 0;
+	int       part_seq = 0;
+	CBufStr  *chosen_name;
 
-	char *ptr;
-	char *part_start, *part_end = NULL;
-	char *next_boundary;
-	
-	size_t content_length;
-	int part_seq = 0;
-	size_t length;
-	char nested_partnum[256];
-	int crlf_in_use = 0;
-	char *evaluate_crlf_ptr = NULL;
-	
-	interesting_mime_headers *m;
-	CBufStr *chosen_name;
-
-	content_length = 0;
 
-	m = InitInterestingMimes();
-
-
-	/* If the caller didn't supply an endpointer, generate one by measure */
-	if (content_end == NULL) {
-		content_end = &content_start[strlen(content_start)];
-	}
-
-
-	if (parse_MimeHeaders(m, &content_start, content_end) != 0)
-		goto end_parser;
-	
 	/* If this is a multipart message, then recursively process it */
 	ptr = content_start;
 	part_start = NULL;
@@ -515,16 +585,31 @@ void the_mime_parser(char *partnum,
 
 		/* Figure out where the boundaries are */
 		m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
-
+		SubMimeHeaders = InitInterestingMimes ();
+		if (*ptr == '\r')
+			ptr ++;
+		if (*ptr == '\n')
+			ptr ++;
+		if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0)
+			ptr += m->b[startary].len;
+		if (*ptr == '\r')
+			ptr ++;
+		if (*ptr == '\n')
+			ptr ++;
 		part_start = NULL;
 		do {
-			char tmp;
 
-			tmp = *content_end;
-			*content_end = '\0';
+			if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
+				break;
+			part_start = ptr;
 			
-			next_boundary = strstr(ptr, m->b[startary].Key);
-			*content_end = tmp;
+			next_boundary = FindNextContent(ptr,
+							content_end,
+							SubMimeHeaders,
+							m);
+			if ((next_boundary != NULL) && 
+			    (next_boundary - part_start < 3))
+				continue;
 
 			if ( (part_start != NULL) && (next_boundary != NULL) ) {
 				part_end = next_boundary;
@@ -544,14 +629,15 @@ void the_mime_parser(char *partnum,
 						 sizeof nested_partnum,
 						 "%d", ++part_seq);
 				}
-				the_mime_parser(nested_partnum,
-						part_start, 
-						part_end,
-						CallBack,
-						PreMultiPartCallBack,
-						PostMultiPartCallBack,
-						userdata,
-						dont_decode);
+				recurseable_mime_parser(nested_partnum,
+							part_start, 
+							part_end,
+							CallBack,
+							PreMultiPartCallBack,
+							PostMultiPartCallBack,
+							userdata,
+							dont_decode, 
+							SubMimeHeaders);
 			}
 
 			if (next_boundary != NULL) {
@@ -586,8 +672,11 @@ void the_mime_parser(char *partnum,
 				/* Invalid end of multipart.  Bail out! */
 				ptr = content_end;
 			}
+			FlushInterestingMimes(SubMimeHeaders);
 		} while ( (ptr < content_end) && (next_boundary != NULL) );
 
+		free(SubMimeHeaders);
+
 		if (PostMultiPartCallBack != NULL) {
 			PostMultiPartCallBack("", 
 					      "", 
@@ -603,6 +692,7 @@ void the_mime_parser(char *partnum,
 		}
 	} /* If it's not a multipart message, then do something with it */
 	else {
+		size_t length;
 		part_start = ptr;
 		length = content_end - part_start;
 		ptr = part_end = content_end;
@@ -705,11 +795,45 @@ void the_mime_parser(char *partnum,
 
 	}
 
-end_parser:	/* free the buffers!  end the oppression!! */
-	free(m);
 }
 
+/*
+ * Break out the components of a multipart message
+ * (This function expects to be fed HEADERS + CONTENT)
+ * Note: NULL can be supplied as content_end; in this case, the message is
+ * considered to have ended when the parser encounters a 0x00 byte.
+ */
+void the_mime_parser(char *partnum,
+		     char *content_start, char *content_end,
+		     MimeParserCallBackType CallBack,
+		     MimeParserCallBackType PreMultiPartCallBack,
+		     MimeParserCallBackType PostMultiPartCallBack,
+		     void *userdata,
+		     int dont_decode)
+{
+	interesting_mime_headers *m;
 
+	/* If the caller didn't supply an endpointer, generate one by measure */
+	if (content_end == NULL) {
+		content_end = &content_start[strlen(content_start)];
+	}
+
+	m = InitInterestingMimes();
+
+	if (!parse_MimeHeaders(m, &content_start, content_end))
+	{
+
+		recurseable_mime_parser(partnum,
+					content_start, content_end,
+					CallBack,
+					PreMultiPartCallBack,
+					PostMultiPartCallBack,
+					userdata,
+					dont_decode,
+					m);
+	}
+	free(m);
+}
 
 /*
  * Entry point for the MIME parser.
-- 
2.30.2