From: Wilfried Goesgens <dothebart@citadel.org>
Date: Fri, 26 Nov 2010 00:13:32 +0000 (+0100)
Subject: First Step: chop mimeparser into pieces.
X-Git-Tag: v8.01~562
X-Git-Url: https://code.citadel.org/?p=citadel.git;a=commitdiff_plain;h=bab826bb3fa8649d196208fffb4af8fba1a57544

First Step: chop mimeparser into pieces.

 - move the cluster of buffers into a struct compatible to the CKEY macro
 - add the interesting other evaluated flags & numbers to that struct
 - move finding & parsing of the 'interesting headers' into its own function.

by all this we hope to get informations about the mimepart before we have to search for its end
---

diff --git a/libcitadel/lib/libcitadel.h b/libcitadel/lib/libcitadel.h
index 79a16ed19..989945a54 100644
--- a/libcitadel/lib/libcitadel.h
+++ b/libcitadel/lib/libcitadel.h
@@ -131,7 +131,7 @@ void ShutDownLibCitadel(void);
  * MIME parser declarations
  */
 
-void extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd);
+long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd);
 
 
 typedef void (*MimeParserCallBackType)(char *cbname,
diff --git a/libcitadel/lib/mime_parser.c b/libcitadel/lib/mime_parser.c
index 50b514971..b7249f79e 100644
--- a/libcitadel/lib/mime_parser.c
+++ b/libcitadel/lib/mime_parser.c
@@ -52,7 +52,7 @@ const unsigned char FromHexTable [256] = {
 };
 
 
-void extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd)
+long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd)
 {
 	char *sptr, *ptr = NULL;
 	int double_quotes = 0;
@@ -83,7 +83,7 @@ void extract_key(char *target, char *source, long sourcelen, char *key, long key
 	}
 	if (ptr == NULL) {
 		*target = '\0';
-		return;
+		return 0;
 	}
 	strcpy(target, (ptr + RealKeyLen));
 
@@ -106,6 +106,7 @@ void extract_key(char *target, char *source, long sourcelen, char *key, long key
 		}
 	}
 	*ptr = '\0';
+	return ptr - target;
 }
 
 
@@ -319,104 +320,65 @@ int mime_decode_now (char *part_start,
 	return -1;
 }
 
-
-/*
- * Break out the components of a multipart message
- * (This function expects to be fed HEADERS + CONTENT)
- * Note: NULL can be supplied as content_end; in this case, the message is
- * considered to have ended when the parser encounters a 0x00 byte.
- */
-void the_mime_parser(char *partnum,
-		     char *content_start, char *content_end,
-		     MimeParserCallBackType CallBack,
-		     MimeParserCallBackType PreMultiPartCallBack,
-		     MimeParserCallBackType PostMultiPartCallBack,
-		     void *userdata,
-		     int dont_decode)
+typedef enum _eIntMimeHdrs {
+	boundary,
+	startary,
+	endary,
+	content_type,
+	charset,
+	encoding,
+	content_type_name,
+	content_disposition_name,
+	filename,
+	disposition,
+	id,
+	eMax /* don't move ! */
+} eIntMimeHdrs;
+
+typedef struct _CBufStr {
+	char Key[SIZ];
+	long len;
+}CBufStr;
+
+typedef struct _interesting_mime_headers {
+	CBufStr b[eMax];
+	long content_length;
+	long is_multipart;
+} interesting_mime_headers;
+
+interesting_mime_headers *InitInterestingMimes(void)
 {
-
-	char *ptr;
-	char *part_start, *part_end = NULL;
-	char buf[SIZ];
-	char *header;
-	char *boundary;
-	char *startary;
-	size_t startary_len = 0;
-	char *endary;
-	char *next_boundary;
-	char *content_type;
-	char *charset;
-	size_t content_type_len;
-	size_t content_length;
-	char *encoding;
-	char *disposition;
-	size_t disposition_len;
-	char *id;
-	char *name = NULL;
-	char *content_type_name;
-	char *content_disposition_name;
-	char *filename;
-	int is_multipart;
-	int part_seq = 0;
 	int i;
-	size_t length;
-	char nested_partnum[256];
-	int crlf_in_use = 0;
-	char *evaluate_crlf_ptr = NULL;
-	int buflen = 0;
-	int headerlen = 0;
-
-	ptr = content_start;
-	content_length = 0;
-
-	boundary = malloc(SIZ * 12);
-	*boundary = '\0';
-
-	startary = boundary + SIZ * 1;
-	*startary = '\0';
-
-	endary = boundary + SIZ * 2;
-	*endary = '\0';
-
-	header = boundary + SIZ * 3;
-	*header = '\0';
-
-	content_type = boundary + SIZ * 4;
-	*content_type = '\0';
-
-	charset = boundary + SIZ * 5;
-	*charset = '\0';
-
-	encoding = boundary + SIZ * 6;
-	*encoding = '\0';
-
-	content_type_name = boundary + SIZ * 7;
-	*content_type_name = '\0';
-
-	content_disposition_name = boundary + SIZ * 8;
-	*content_disposition_name = '\0';
-
-	filename = boundary + SIZ * 9;
-	*filename = '\0';
+	interesting_mime_headers *m;
+	m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
+	
+	for (i = 0; i < eMax; i++) {
+	     m->b[i].Key[0] = '\0';
+	     m->b[i].len = 0;
+	}
+	m->content_length = -1;
+	return m;
+}
 
-	disposition = boundary + SIZ * 10;
-	*disposition = '\0';
 
-	id = boundary + SIZ * 11;
-	*id = '\0';
 
-	/* If the caller didn't supply an endpointer, generate one by measure */
-	if (content_end == NULL) {
-		content_end = &content_start[strlen(content_start)];
-	}
+long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *content_end)
+{
+	char buf[SIZ];
+	char header[SIZ];
+	long headerlen;
+	char *ptr;
+	int buflen;
+	int i;
 
 	/* Learn interesting things from the headers */
-	strcpy(header, "");
+	ptr = content_start;
+	*header = '\0';
 	headerlen = 0;
 	do {
 		ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
 		if (ptr >= content_end) {
-			goto end_parser;
+			return -1;
 		}
 
 		for (i = 0; i < buflen; ++i) {
@@ -427,41 +389,43 @@ void the_mime_parser(char *partnum,
 
 		if (!isspace(buf[0])) {
 			if (!strncasecmp(header, "Content-type:", 13)) {
-				memcpy (content_type, &header[13], headerlen - 12);
-				content_type_len = striplt (content_type);
+				memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
+				m->b[content_type].len = striplt (m->b[content_type].Key);
 
-				extract_key(content_type_name, content_type, content_type_len, HKEY("name"), '=');
-				extract_key(charset,           content_type, content_type_len, HKEY("charset"), '=');
-				extract_key(boundary,          header,       headerlen,        HKEY("boundary"), '=');
+				m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
+				m->b[charset].len           = extract_key(m->b[charset].Key,           CKEY(m->b[content_type]), HKEY("charset"), '=');
+				m->b[boundary].len          = extract_key(m->b[boundary].Key,          header,       headerlen,  HKEY("boundary"), '=');
 
 				/* Deal with weird headers */
-				if (strchr(content_type, ' '))
-					*(strchr(content_type, ' ')) = '\0';
-				if (strchr(content_type, ';'))
-					*(strchr(content_type, ';')) = '\0';
+				if (strchr(m->b[content_type].Key, ' '))
+					*(strchr(m->b[content_type].Key, ' ')) = '\0';
+				if (strchr(m->b[content_type].Key, ';'))
+					*(strchr(m->b[content_type].Key, ';')) = '\0';
 			}
 			else if (!strncasecmp(header, "Content-Disposition:", 20)) {
-				memcpy (disposition, &header[20], headerlen - 19);
-				disposition_len = striplt(disposition);
-				extract_key(content_disposition_name, disposition, disposition_len,  HKEY("name"), '=');
-				extract_key(filename,                 disposition, disposition_len, HKEY("filename"), '=');
+				memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
+				m->b[disposition].len = striplt(m->b[disposition].Key);
+
+				m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
+				m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
 			}
 			else if (!strncasecmp(header, "Content-ID:", 11)) {
-				strcpy(id, &header[11]);
-				striplt(id);
-				stripallbut(id, '<', '>');
+				memcpy(m->b[id].Key, &header[11], headerlen);
+				striplt(m->b[id].Key);
+				m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
 			}
 			else if (!strncasecmp(header, "Content-length: ", 15)) {
-				char clbuf[10];
-				safestrncpy(clbuf, &header[15], sizeof clbuf);
-				striplt(clbuf);
-				content_length = (size_t) atol(clbuf);
+				char *clbuf;
+				clbuf = &header[15];
+				while (isspace(clbuf))
+					clbuf ++;
+				m->content_length = (size_t) atol(clbuf);
 			}
 			else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
-				strcpy(encoding, &header[26]);
-				striplt(encoding);
+				memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
+				m->b[encoding].len = striplt(m->b[encoding].Key);
 			}
-			strcpy(header, "");
+			*header = '\0';
 			headerlen = 0;
 		}
 		if ((headerlen + buflen + 2) < SIZ) {
@@ -471,33 +435,84 @@ void the_mime_parser(char *partnum,
 		}
 	} while ((!IsEmptyStr(buf)) && (*ptr != 0));
 
-	if (strchr(disposition, ';'))
-		*(strchr(disposition, ';')) = '\0';
-	striplt(disposition);
-	if (strchr(content_type, ';'))
-		*(strchr(content_type, ';')) = '\0';
-	striplt(content_type);
-
-	if (!IsEmptyStr(boundary)) {
-		is_multipart = 1;
-	} else {
-		is_multipart = 0;
+	ptr = strchr(m->b[disposition].Key, ';');
+	if (ptr != NULL) *ptr = '\0';
+	m->b[disposition].len = striplt(m->b[disposition].Key);
+
+	ptr = strchr(m->b[content_type].Key, ';');
+	if (ptr != NULL) *ptr = '\0';
+	m->b[content_type].len = striplt(m->b[content_type].Key);
+
+	m->is_multipart = m->b[boundary].len != 0;
+
+	return 0;
+}
+
+/*
+ * Break out the components of a multipart message
+ * (This function expects to be fed HEADERS + CONTENT)
+ * Note: NULL can be supplied as content_end; in this case, the message is
+ * considered to have ended when the parser encounters a 0x00 byte.
+ */
+void the_mime_parser(char *partnum,
+		     char *content_start, char *content_end,
+		     MimeParserCallBackType CallBack,
+		     MimeParserCallBackType PreMultiPartCallBack,
+		     MimeParserCallBackType PostMultiPartCallBack,
+		     void *userdata,
+		     int dont_decode)
+{
+
+	char *ptr;
+	char *part_start, *part_end = NULL;
+	char *next_boundary;
+	
+	size_t content_length;
+	int part_seq = 0;
+	size_t length;
+	char nested_partnum[256];
+	int crlf_in_use = 0;
+	char *evaluate_crlf_ptr = NULL;
+	
+	interesting_mime_headers *m;
+	CBufStr *chosen_name;
+
+	ptr = content_start;
+	content_length = 0;
+
+	m = InitInterestingMimes();
+
+
+	/* If the caller didn't supply an endpointer, generate one by measure */
+	if (content_end == NULL) {
+		content_end = &content_start[strlen(content_start)];
 	}
 
+
+	if (parse_MimeHeaders(m, content_start, content_end) != 0)
+		goto end_parser;
+	
 	/* If this is a multipart message, then recursively process it */
 	part_start = NULL;
-	if (is_multipart) {
+	if (m->is_multipart) {
 
 		/* Tell the client about this message's multipartedness */
 		if (PreMultiPartCallBack != NULL) {
-			PreMultiPartCallBack("", "", partnum, "",
-				NULL, content_type, charset,
-				0, encoding, id, userdata);
+			PreMultiPartCallBack("", 
+					     "", 
+					     partnum, 
+					     "",
+					     NULL, 
+					     m->b[content_type].Key, 
+					     m->b[charset].Key,
+					     0, 
+					     m->b[encoding].Key, 
+					     m->b[id].Key, 
+					     userdata);
 		}
 
 		/* Figure out where the boundaries are */
-		startary_len = snprintf(startary, SIZ, "--%s", boundary);
-		snprintf(endary, SIZ, "--%s--", boundary);
+		m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
 
 		part_start = NULL;
 		do {
@@ -506,7 +521,7 @@ void the_mime_parser(char *partnum,
 			tmp = *content_end;
 			*content_end = '\0';
 			
-			next_boundary = strstr(ptr, startary);
+			next_boundary = strstr(ptr, m->b[startary].Key);
 			*content_end = tmp;
 
 			if ( (part_start != NULL) && (next_boundary != NULL) ) {
@@ -528,7 +543,8 @@ void the_mime_parser(char *partnum,
 						 "%d", ++part_seq);
 				}
 				the_mime_parser(nested_partnum,
-					    part_start, part_end,
+						part_start, 
+						part_end,
 						CallBack,
 						PreMultiPartCallBack,
 						PostMultiPartCallBack,
@@ -539,7 +555,8 @@ void the_mime_parser(char *partnum,
 			if (next_boundary != NULL) {
 				/* If we pass out of scope, don't attempt to
 				 * read past the end boundary. */
-				if (!strcmp(next_boundary, endary)) {
+				if ((*(next_boundary + m->b[startary].len + 1) == '-') && 
+				    (*(next_boundary + m->b[startary].len + 2) == '-') ){
 					ptr = content_end;
 				}
 				else {
@@ -570,14 +587,23 @@ void the_mime_parser(char *partnum,
 		} while ( (ptr < content_end) && (next_boundary != NULL) );
 
 		if (PostMultiPartCallBack != NULL) {
-			PostMultiPartCallBack("", "", partnum, "", NULL,
-				content_type, charset, 0, encoding, id, userdata);
+			PostMultiPartCallBack("", 
+					      "", 
+					      partnum, 
+					      "", 
+					      NULL,
+					      m->b[content_type].Key, 
+					      m->b[charset].Key,
+					      0, 
+					      m->b[encoding].Key, 
+					      m->b[id].Key, 
+					      userdata);
 		}
 		goto end_parser;
 	}
 
 	/* If it's not a multipart message, then do something with it */
-	if (!is_multipart) {
+	if (!m->is_multipart) {
 		part_start = ptr;
 		length = 0;
 		while (ptr < content_end) {
@@ -600,32 +626,48 @@ void the_mime_parser(char *partnum,
 		 * and sometimes it's tacked on to Content-disposition.  Use
 		 * whichever one we have.
 		 */
-		if (strlen(content_disposition_name) > strlen(content_type_name)) {
-			name = content_disposition_name;
+		if (m->b[content_disposition_name].len > m->b[content_type_name].len) {
+			chosen_name = &m->b[content_disposition_name];
 		}
 		else {
-			name = content_type_name;
+			chosen_name = &m->b[content_type_name];
 		}
 	
 		/* Ok, we've got a non-multipart part here, so do something with it.
 		 */
 		mime_decode(partnum,
-			part_start, length,
-			content_type, charset, encoding, disposition, id,
-			name, filename,
-			CallBack, NULL, NULL,
-			userdata, dont_decode
-		);
+			    part_start, 
+			    length,
+			    m->b[content_type].Key, 
+			    m->b[charset].Key,
+			    m->b[encoding].Key, 
+			    m->b[disposition].Key, 
+			    m->b[id].Key, 
+			    chosen_name->Key, 
+			    m->b[filename].Key,
+			    CallBack, 
+			    NULL, NULL,
+			    userdata, 
+			    dont_decode
+			);
 
 		/*
 		 * Now if it's an encapsulated message/rfc822 then we have to recurse into it
 		 */
-		if (!strcasecmp(content_type, "message/rfc822")) {
+		if (!strcasecmp(&m->b[content_type].Key[0], "message/rfc822")) {
 
 			if (PreMultiPartCallBack != NULL) {
-				PreMultiPartCallBack("", "", partnum, "",
-					NULL, content_type, charset,
-					0, encoding, id, userdata);
+				PreMultiPartCallBack("", 
+						     "", 
+						     partnum, 
+						     "",
+						     NULL, 
+						     m->b[content_type].Key, 
+						     m->b[charset].Key,
+						     0, 
+						     m->b[encoding].Key, 
+						     m->b[id].Key, 
+						     userdata);
 			}
 			if (CallBack != NULL) {
 				if (strlen(partnum) > 0) {
@@ -640,17 +682,27 @@ void the_mime_parser(char *partnum,
 						 "%d", ++part_seq);
 				}
 				the_mime_parser(nested_partnum,
-					part_start, part_end,
-					CallBack,
-					PreMultiPartCallBack,
-					PostMultiPartCallBack,
-					userdata,
-					dont_decode
-				);
+						part_start, 
+						part_end,
+						CallBack,
+						PreMultiPartCallBack,
+						PostMultiPartCallBack,
+						userdata,
+						dont_decode
+					);
 			}
 			if (PostMultiPartCallBack != NULL) {
-				PostMultiPartCallBack("", "", partnum, "", NULL,
-					content_type, charset, 0, encoding, id, userdata);
+				PostMultiPartCallBack("", 
+						      "", 
+						      partnum, 
+						      "", 
+						      NULL,
+						      m->b[content_type].Key, 
+						      m->b[charset].Key,
+						      0, 
+						      m->b[encoding].Key, 
+						      m->b[id].Key, 
+						      userdata);
 			}
 
 
@@ -659,7 +711,7 @@ void the_mime_parser(char *partnum,
 	}
 
 end_parser:	/* free the buffers!  end the oppression!! */
-	free(boundary);
+	free(m);
 }