X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmime_parser.c;h=f7b9681a0ad9e40bf62307ea94b513dc7a335cad;hb=01cc19a4c2da27b4db0e980ccd3ca54d834319c8;hp=9e020298db3b3b664505f6850aec86425ca0bf09;hpb=4d636fc697b2632a816b4ce2dcaf00a9cbd2f4c1;p=citadel.git

diff --git a/citadel/mime_parser.c b/citadel/mime_parser.c
index 9e020298d..f7b9681a0 100644
--- a/citadel/mime_parser.c
+++ b/citadel/mime_parser.c
@@ -1,17 +1,13 @@
 /*
  * $Id$
  *
- * This is the MIME parser for Citadel.  Sometimes it actually works.
+ * This is the MIME parser for Citadel.
  *
- * Copyright (c) 1998-2005 by Art Cancro
- * This code is distributed under the terms of the GNU General Public License.
+ * Copyright (c) 1998-2006 by Art Cancro
+ * This code is distributed under the GNU General Public License v2.
  *
  */
 
-#ifdef DLL_EXPORT
-#define IN_LIBCIT
-#endif
-
 #include <stdlib.h>
 #include <unistd.h>
 #include <stdio.h>
@@ -24,7 +20,6 @@
 
 #include "citadel.h"
 #include "server.h"
-#include "serv_extensions.h"
 #include "sysdep_decls.h"
 #include "tools.h"
 
@@ -66,68 +61,41 @@ char *fixed_partnum(char *supplied_partnum) {
 
 /*
  * Convert "quoted-printable" to binary.  Returns number of bytes decoded.
+ * according to RFC2045 section 6.7
  */
 int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
-	char buf[SIZ];
-	int buf_length = 0;
-	int soft_line_break = 0;
 	unsigned int ch;
 	int decoded_length = 0;
-	int i;
-
-	decoded[0] = 0;
-	decoded_length = 0;
-	buf[0] = 0;
-	buf_length = 0;
-
-	for (i = 0; i < sourcelen; ++i) {
-
-		buf[buf_length++] = encoded[i];
-
-		if ( (encoded[i] == '\n')
-		   || (encoded[i] == 0)
-		   || (i == (sourcelen-1)) ) {
-			buf[buf_length++] = 0;
+	int pos = 0;
 
-			/*** begin -- process one line ***/
-
-			if (buf[strlen(buf)-1] == '\n') {
-				buf[strlen(buf)-1] = 0;
-			}
-			if (buf[strlen(buf)-1] == '\r') {
-				buf[strlen(buf)-1] = 0;
-			}
-			while (isspace(buf[strlen(buf)-1])) {
-				buf[strlen(buf)-1] = 0;
-			}
-			soft_line_break = 0;
-
-			while (strlen(buf) > 0) {
-				if (!strcmp(buf, "=")) {
-					soft_line_break = 1;
-					strcpy(buf, "");
-				} else if ((strlen(buf)>=3) && (buf[0]=='=')) {
-					sscanf(&buf[1], "%02x", &ch);
-					decoded[decoded_length++] = ch;
-					strcpy(buf, &buf[3]);
-				} else {
-					decoded[decoded_length++] = buf[0];
-					strcpy(buf, &buf[1]);
-				}
-			}
-			if (soft_line_break == 0) {
-				decoded[decoded_length++] = '\r';
-				decoded[decoded_length++] = '\n';
-			}
-			buf_length = 0;
-			/*** end -- process one line ***/
+	while (pos < sourcelen)
+	{
+		if (!strncmp(&encoded[pos], "=\r\n", 3))
+		{
+			pos += 3;
+		}
+		else if (!strncmp(&encoded[pos], "=\n", 2))
+		{
+			pos += 2;
+		}
+		else if (encoded[pos] == '=')
+		{
+			ch = 0;
+			sscanf(&encoded[pos+1], "%02x", &ch);
+			pos += 3;
+			decoded[decoded_length++] = ch;
+		}
+		else
+		{
+			decoded[decoded_length++] = encoded[pos];
+			pos += 1;
 		}
 	}
-
-	decoded[decoded_length++] = 0;
+	decoded[decoded_length] = 0;
 	return(decoded_length);
 }
 
+
 /*
  * Given a message or message-part body and a length, handle any necessary
  * decoding and pass the request up the stack.
@@ -196,17 +164,19 @@ void mime_decode(char *partnum,
 		return;
 	}
 	
+	/* Fail silently if we hit an unknown encoding. */
 	if ((strcasecmp(encoding, "base64"))
 	    && (strcasecmp(encoding, "quoted-printable"))) {
 		return;
 	}
+
 	/*
-	 * Allocate a buffer for the decoded data.  The output buffer is the
-	 * same size as the input buffer; this assumes that the decoded data
-	 * will never be larger than the encoded data.  This is a safe
-	 * assumption with base64, uuencode, and quoted-printable.
+	 * Allocate a buffer for the decoded data.  The output buffer is slightly
+	 * larger than the input buffer; this assumes that the decoded data
+	 * will never be significantly larger than the encoded data.  This is a
+	 * safe assumption with base64, uuencode, and quoted-printable.
 	 */
-	decoded = malloc(length+2048);
+	decoded = malloc(length + 32768);
 	if (decoded == NULL) {
 		return;
 	}
@@ -215,8 +185,7 @@ void mime_decode(char *partnum,
 		bytes_decoded = CtdlDecodeBase64(decoded, part_start, length);
 	}
 	else if (!strcasecmp(encoding, "quoted-printable")) {
-		bytes_decoded = CtdlDecodeQuotedPrintable(decoded,
-							part_start, length);
+		bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length);
 	}
 
 	if (bytes_decoded > 0) if (CallBack != NULL) {
@@ -275,11 +244,13 @@ void the_mime_parser(char *partnum,
 {
 
 	char *ptr;
+	char *srch = NULL;
 	char *part_start, *part_end = NULL;
 	char buf[SIZ];
 	char *header;
 	char *boundary;
 	char *startary;
+	size_t startary_len = 0;
 	char *endary;
 	char *next_boundary;
 	char *content_type;
@@ -295,7 +266,9 @@ void the_mime_parser(char *partnum,
 	int part_seq = 0;
 	int i;
 	size_t length;
-	char nested_partnum[SIZ];
+	char nested_partnum[256];
+	int crlf_in_use = 0;
+	char *evaluate_crlf_ptr = NULL;
 
 	ptr = content_start;
 	content_length = 0;
@@ -353,8 +326,9 @@ void the_mime_parser(char *partnum,
 		}
 
 		if (!isspace(buf[0])) {
-			if (!strncasecmp(header, "Content-type: ", 14)) {
-				strcpy(content_type, &header[14]);
+			if (!strncasecmp(header, "Content-type:", 13)) {
+				strcpy(content_type, &header[13]);
+				striplt(content_type);
 				extract_key(content_type_name, content_type, "name");
 				extract_key(charset, content_type, "charset");
 				/* Deal with weird headers */
@@ -363,23 +337,29 @@ void the_mime_parser(char *partnum,
 				if (strchr(content_type, ';'))
 					*(strchr(content_type, ';')) = '\0';
 			}
-			if (!strncasecmp(header, "Content-Disposition: ", 21)) {
-				strcpy(disposition, &header[21]);
+			if (!strncasecmp(header, "Content-Disposition:", 20)) {
+				strcpy(disposition, &header[20]);
+				striplt(disposition);
 				extract_key(content_disposition_name, disposition, "name");
 				extract_key(filename, disposition, "filename");
 			}
-			if (!strncasecmp(header, "Content-length: ", 16)) {
-				content_length = (size_t) atol(&header[16]);
+			if (!strncasecmp(header, "Content-length: ", 15)) {
+				char clbuf[10];
+				safestrncpy(clbuf, &header[15], sizeof clbuf);
+				striplt(clbuf);
+				content_length = (size_t) atol(clbuf);
+			}
+			if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
+				strcpy(encoding, &header[26]);
+				striplt(encoding);
 			}
-			if (!strncasecmp(header,
-				      "Content-transfer-encoding: ", 27))
-				strcpy(encoding, &header[27]);
 			if (strlen(boundary) == 0)
 				extract_key(boundary, header, "boundary");
 			strcpy(header, "");
 		}
-		if ((strlen(header) + strlen(buf) + 2) < SIZ)
+		if ((strlen(header) + strlen(buf) + 2) < SIZ) {
 			strcat(header, buf);
+		}
 	} while ((strlen(buf) > 0) && (*ptr != 0));
 
 	if (strchr(disposition, ';'))
@@ -389,13 +369,6 @@ void the_mime_parser(char *partnum,
 		*(strchr(content_type, ';')) = '\0';
 	striplt(content_type);
 
-	if (!strlen(content_type)) {
-		strcpy(content_type, "text/plain");
-	}
-	if (!strlen(charset)) {
-		strcpy(charset, "us-ascii");
-	}
-
 	if (strlen(boundary) > 0) {
 		is_multipart = 1;
 	} else {
@@ -416,13 +389,24 @@ void the_mime_parser(char *partnum,
 		/* Figure out where the boundaries are */
 		snprintf(startary, SIZ, "--%s", boundary);
 		snprintf(endary, SIZ, "--%s--", boundary);
+		startary_len = strlen(startary);
 
 		part_start = NULL;
 		do {
-			next_boundary = bmstrstr(ptr, startary, strncmp);
+			next_boundary = NULL;
+			for (srch=ptr; srch<content_end; ++srch) {
+				if (!memcmp(srch, startary, startary_len)) {
+					next_boundary = srch;
+					srch = content_end;
+				}
+			}
+
 			if ( (part_start != NULL) && (next_boundary != NULL) ) {
 				part_end = next_boundary;
-				--part_end;
+				--part_end;		/* omit the trailing LF */
+				if (crlf_in_use) {
+					--part_end;	/* omit the trailing CR */
+				}
 
 				if (strlen(partnum) > 0) {
 					snprintf(nested_partnum,
@@ -445,14 +429,26 @@ void the_mime_parser(char *partnum,
 			}
 
 			if (next_boundary != NULL) {
-				/* If we pass out of scope, don't attempt to read
-				 * past the end boundary. */
+				/* If we pass out of scope, don't attempt to
+				 * read past the end boundary. */
 				if (!strcmp(next_boundary, endary)) {
 					ptr = content_end;
 				}
 				else {
 					/* Set up for the next part. */
 					part_start = strstr(next_boundary, "\n");
+					
+					/* Determine whether newlines are LF or CRLF */
+					evaluate_crlf_ptr = part_start;
+					--evaluate_crlf_ptr;
+					if (!memcmp(evaluate_crlf_ptr, "\r\n", 2)) {
+						crlf_in_use = 1;
+					}
+					else {
+						crlf_in_use = 0;
+					}
+
+					/* Advance past the LF ... now we're in the next part */
 					++part_start;
 					ptr = part_start;
 				}
@@ -479,9 +475,14 @@ void the_mime_parser(char *partnum,
 			++length;
 		}
 		part_end = content_end;
-                /* fix an off-by-one error */
-                --part_end;
-                --length;
+
+		/******
+		 * I thought there was an off-by-one error here, but there isn't.
+		 * This probably means that there's an off-by-one error somewhere
+		 * else ... or maybe only in certain messages?
+		--part_end;
+		--length;
+		******/
 		
 		/* Truncate if the header told us to */
 		if ( (content_length > 0) && (length > content_length) ) {
@@ -498,13 +499,59 @@ void the_mime_parser(char *partnum,
 		else {
 			name = content_type_name;
 		}
-		
+	
+		/* lprintf(CTDL_DEBUG, "mime_decode part=%s, len=%d, type=%s, charset=%s, encoding=%s\n",
+			partnum, length, content_type, charset, encoding); */
+
+		/* Ok, we've got a non-multipart part here, so do something with it.
+		 */
 		mime_decode(partnum,
-			    part_start, length,
-			    content_type, charset, encoding, disposition,
-			    name, filename,
-			    CallBack, NULL, NULL,
-			    userdata, dont_decode);
+			part_start, length,
+			content_type, charset, encoding, disposition,
+			name, filename,
+			CallBack, NULL, NULL,
+			userdata, dont_decode
+		);
+
+		/*
+		 * Now if it's an encapsulated message/rfc822 then we have to recurse into it
+		 */
+		if (!strcasecmp(content_type, "message/rfc822")) {
+
+			if (PreMultiPartCallBack != NULL) {
+				PreMultiPartCallBack("", "", partnum, "",
+					NULL, content_type, charset,
+					0, encoding, userdata);
+			}
+			if (CallBack != NULL) {
+				if (strlen(partnum) > 0) {
+					snprintf(nested_partnum,
+						 sizeof nested_partnum,
+						 "%s.%d", partnum,
+						 ++part_seq);
+				}
+				else {
+					snprintf(nested_partnum,
+						 sizeof nested_partnum,
+						 "%d", ++part_seq);
+				}
+				the_mime_parser(nested_partnum,
+					part_start, part_end,
+					CallBack,
+					PreMultiPartCallBack,
+					PostMultiPartCallBack,
+					userdata,
+					dont_decode
+				);
+			}
+			if (PostMultiPartCallBack != NULL) {
+				PostMultiPartCallBack("", "", partnum, "", NULL,
+					content_type, charset, 0, encoding, userdata);
+			}
+
+
+		}
+
 	}
 
 end_parser:	/* free the buffers!  end the oppression!! */