X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=libcitadel%2Flib%2Fmime_parser.c;h=1fc037d43ffd77462434eb93f645001f4dbb251a;hb=8659bf61f03724755374145e6211be6bbfe74fda;hp=b7249f79ec215b941643dc1f0abde569ce282dc8;hpb=bab826bb3fa8649d196208fffb4af8fba1a57544;p=citadel.git

diff --git a/libcitadel/lib/mime_parser.c b/libcitadel/lib/mime_parser.c
index b7249f79e..1fc037d43 100644
--- a/libcitadel/lib/mime_parser.c
+++ b/libcitadel/lib/mime_parser.c
@@ -2,8 +2,20 @@
  * This is the MIME parser for Citadel.
  *
  * Copyright (c) 1998-2010 by the citadel.org development team.
- * This code is distributed under the GNU General Public License v3.
  *
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <stdlib.h>
@@ -123,7 +135,7 @@ char *fixed_partnum(char *supplied_partnum) {
 
 static inline unsigned int _decode_hex(const char *Source)
 {
-	int ret = '?';
+	unsigned int ret = '?';
 	unsigned char LO_NIBBLE;
 	unsigned char HI_NIBBLE;
 
@@ -166,7 +178,6 @@ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
 			}
 			else
 			{
-				ch = 0;
 				ch = _decode_hex(&encoded[pos]);
 				pos += 2;
 				decoded[decoded_length++] = ch;
@@ -205,11 +216,13 @@ void mime_decode(char *partnum,
 
 	/* Some encodings aren't really encodings */
 	if (!strcasecmp(encoding, "7bit"))
-		strcpy(encoding, "");
+		*encoding = '\0';
 	if (!strcasecmp(encoding, "8bit"))
-		strcpy(encoding, "");
+		*encoding = '\0';
 	if (!strcasecmp(encoding, "binary"))
-		strcpy(encoding, "");
+		*encoding = '\0';
+	if (!strcasecmp(encoding, "ISO-8859-1"))
+		*encoding = '\0';
 
 	/* If this part is not encoded, send as-is */
 	if ( (strlen(encoding) == 0) || (dont_decode)) {
@@ -254,9 +267,20 @@ void mime_decode(char *partnum,
 	}
 
 	if (bytes_decoded > 0) if (CallBack != NULL) {
-		CallBack(name, filename, fixed_partnum(partnum),
-			disposition, decoded,
-			content_type, charset, bytes_decoded, "binary", id, userdata);
+			char encoding_buf[SIZ];
+
+			strcpy(encoding_buf, "binary");
+			CallBack(name, 
+				 filename, 
+				 fixed_partnum(partnum),
+				 disposition, 
+				 decoded,
+				 content_type, 
+				 charset, 
+				 bytes_decoded, 
+				 encoding_buf, 
+				 id, 
+				 userdata);
 	}
 
 	free(decoded);
@@ -280,11 +304,11 @@ int mime_decode_now (char *part_start,
 	*decoded = NULL;
 	/* Some encodings aren't really encodings */
 	if (!strcasecmp(encoding, "7bit"))
-		strcpy(encoding, "");
+		*encoding = '\0';
 	if (!strcasecmp(encoding, "8bit"))
-		strcpy(encoding, "");
+		*encoding = '\0';
 	if (!strcasecmp(encoding, "binary"))
-		strcpy(encoding, "");
+		*encoding = '\0';
 
 	/* If this part is not encoded, send as-is */
 	if (strlen(encoding) == 0) {
@@ -346,40 +370,45 @@ typedef struct _interesting_mime_headers {
 	long is_multipart;
 } interesting_mime_headers;
 
-interesting_mime_headers *InitInterestingMimes(void)
+
+static void FlushInterestingMimes(interesting_mime_headers *m)
 {
 	int i;
-	interesting_mime_headers *m;
-	m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
 	
 	for (i = 0; i < eMax; i++) {
 	     m->b[i].Key[0] = '\0';
 	     m->b[i].len = 0;
 	}
 	m->content_length = -1;
-	return m;
 }
+static interesting_mime_headers *InitInterestingMimes(void)
+{
+	interesting_mime_headers *m;
+	m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
 
+	FlushInterestingMimes(m);
 
+	return m;
+}
 
-long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *content_end)
+
+static long parse_MimeHeaders(interesting_mime_headers *m, 
+			      char** pcontent_start, 
+			      char *content_end)
 {
 	char buf[SIZ];
 	char header[SIZ];
 	long headerlen;
-	char *ptr;
-	int buflen;
+	char *ptr, *pch;
+	int buflen = 0;
 	int i;
 
 	/* Learn interesting things from the headers */
-	ptr = content_start;
+	ptr = *pcontent_start;
 	*header = '\0';
 	headerlen = 0;
 	do {
 		ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
-		if (ptr >= content_end) {
-			return -1;
-		}
 
 		for (i = 0; i < buflen; ++i) {
 			if (isspace(buf[i])) {
@@ -387,9 +416,10 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
 			}
 		}
 
-		if (!isspace(buf[0])) {
+		if (!isspace(buf[0]) && (headerlen > 0)) {
 			if (!strncasecmp(header, "Content-type:", 13)) {
 				memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
+				m->b[content_type].Key[headerlen - 12] = '\0';
 				m->b[content_type].len = striplt (m->b[content_type].Key);
 
 				m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
@@ -397,32 +427,44 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
 				m->b[boundary].len          = extract_key(m->b[boundary].Key,          header,       headerlen,  HKEY("boundary"), '=');
 
 				/* Deal with weird headers */
-				if (strchr(m->b[content_type].Key, ' '))
-					*(strchr(m->b[content_type].Key, ' ')) = '\0';
-				if (strchr(m->b[content_type].Key, ';'))
-					*(strchr(m->b[content_type].Key, ';')) = '\0';
+				pch = strchr(m->b[content_type].Key, ' ');
+				if (pch != NULL) {
+					*pch = '\0';
+					m->b[content_type].len = m->b[content_type].Key - pch;
+				}
+				pch = strchr(m->b[content_type].Key, ';');
+				if (pch != NULL) {
+					*pch = '\0';
+					m->b[content_type].len = m->b[content_type].Key - pch;
+				}
 			}
 			else if (!strncasecmp(header, "Content-Disposition:", 20)) {
 				memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
+				m->b[disposition].Key[headerlen - 19] = '\0';
 				m->b[disposition].len = striplt(m->b[disposition].Key);
 
 				m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
 				m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
+				pch = strchr(m->b[disposition].Key, ';');
+				if (pch != NULL) *pch = '\0';
+				m->b[disposition].len = striplt(m->b[disposition].Key);
 			}
 			else if (!strncasecmp(header, "Content-ID:", 11)) {
-				memcpy(m->b[id].Key, &header[11], headerlen);
+				memcpy(m->b[id].Key, &header[11], headerlen - 11);
+				m->b[id].Key[headerlen - 11] = '\0';
 				striplt(m->b[id].Key);
 				m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
 			}
 			else if (!strncasecmp(header, "Content-length: ", 15)) {
 				char *clbuf;
 				clbuf = &header[15];
-				while (isspace(clbuf))
+				while (isspace(*clbuf))
 					clbuf ++;
 				m->content_length = (size_t) atol(clbuf);
 			}
 			else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
 				memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
+				m->b[encoding].Key[headerlen - 26] = '\0';
 				m->b[encoding].len = striplt(m->b[encoding].Key);
 			}
 			*header = '\0';
@@ -433,66 +475,135 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
 			headerlen += buflen;
 			header[headerlen] = '\0';
 		}
+		if (ptr >= content_end) {
+			return -1;
+		}
 	} while ((!IsEmptyStr(buf)) && (*ptr != 0));
 
-	ptr = strchr(m->b[disposition].Key, ';');
-	if (ptr != NULL) *ptr = '\0';
-	m->b[disposition].len = striplt(m->b[disposition].Key);
+	m->is_multipart = m->b[boundary].len != 0;
+	*pcontent_start = ptr;
 
-	ptr = strchr(m->b[content_type].Key, ';');
-	if (ptr != NULL) *ptr = '\0';
-	m->b[content_type].len = striplt(m->b[content_type].Key);
+	return 0;
+}
 
-	m->is_multipart = m->b[boundary].len != 0;
+
+static int IsAsciiEncoding(interesting_mime_headers *m)
+{
+
+	if ((m->b[encoding].len != 0) &&
+	    (strcasecmp(m->b[encoding].Key, "base64") == 0))
+		return 1;
+	if ((m->b[encoding].len != 0) &&
+	    (strcmp(m->b[encoding].Key, "quoted-printable") == 0))
+		return 1;
 
 	return 0;
 }
 
+static char *FindNextContent(char *ptr,
+			     char *content_end,
+			     interesting_mime_headers *SubMimeHeaders,
+			     interesting_mime_headers *m)
+{
+	char *next_boundary;
+	char  tmp;
+
+	if (IsAsciiEncoding(SubMimeHeaders)) {
+		tmp = *content_end;
+		*content_end = '\0';
+
+		/** 
+		 * ok, if we have a content length of the mime part, 
+		 * try skipping the content on the search for the next
+		 * boundary. since we don't trust the content_length
+		 * to be all accurate, and suspect it to lose one digit 
+		 * per line with a line length of 80 chars, we need 
+		 * to start searching a little before..
+		 */
+				   
+		if ((SubMimeHeaders->content_length != -1) &&
+		    (SubMimeHeaders->content_length > 10))
+		{
+			char *pptr;
+			long lines;
+					
+			lines = SubMimeHeaders->content_length / 80;
+			pptr = ptr + SubMimeHeaders->content_length - lines - 10;
+			if (pptr < content_end)
+				ptr = pptr;
+		}
+			
+		next_boundary = strstr(ptr, m->b[startary].Key);
+		*content_end = tmp;
+	}
+	else {
+		char *srch;
+		/** 
+		 * ok, if we have a content length of the mime part, 
+		 * try skipping the content on the search for the next
+		 * boundary. since we don't trust the content_length
+		 * to be all accurate, start searching a little before..
+		 */
+				   
+		if ((SubMimeHeaders->content_length != -1) &&
+		    (SubMimeHeaders->content_length > 10))
+		{
+			char *pptr;
+			pptr = ptr + SubMimeHeaders->content_length - 10;
+			if (pptr < content_end)
+				ptr = pptr;
+		}
+		
+
+		srch = next_boundary = NULL;
+		for (srch = memchr(ptr, '-',  content_end - ptr);
+		     (srch != NULL) && (srch < content_end); 
+		     srch = memchr(srch, '-',  content_end - srch)) 
+		{
+			if (!memcmp(srch, 
+				    m->b[startary].Key, 
+				    m->b[startary].len)) 
+			{
+				next_boundary = srch;
+				srch = content_end;
+			}
+			else srch ++;
+
+		}
+
+	}
+	return next_boundary;
+}
+
 /*
  * Break out the components of a multipart message
  * (This function expects to be fed HEADERS + CONTENT)
  * Note: NULL can be supplied as content_end; in this case, the message is
  * considered to have ended when the parser encounters a 0x00 byte.
  */
-void the_mime_parser(char *partnum,
-		     char *content_start, char *content_end,
-		     MimeParserCallBackType CallBack,
-		     MimeParserCallBackType PreMultiPartCallBack,
-		     MimeParserCallBackType PostMultiPartCallBack,
-		     void *userdata,
-		     int dont_decode)
+static void recurseable_mime_parser(char *partnum,
+				    char *content_start, char *content_end,
+				    MimeParserCallBackType CallBack,
+				    MimeParserCallBackType PreMultiPartCallBack,
+				    MimeParserCallBackType PostMultiPartCallBack,
+				    void *userdata,
+				    int dont_decode, 
+				    interesting_mime_headers *m)
 {
-
-	char *ptr;
-	char *part_start, *part_end = NULL;
-	char *next_boundary;
-	
-	size_t content_length;
-	int part_seq = 0;
-	size_t length;
-	char nested_partnum[256];
-	int crlf_in_use = 0;
-	char *evaluate_crlf_ptr = NULL;
-	
-	interesting_mime_headers *m;
-	CBufStr *chosen_name;
-
-	ptr = content_start;
-	content_length = 0;
-
-	m = InitInterestingMimes();
-
-
-	/* If the caller didn't supply an endpointer, generate one by measure */
-	if (content_end == NULL) {
-		content_end = &content_start[strlen(content_start)];
-	}
+	interesting_mime_headers *SubMimeHeaders;
+	char     *ptr;
+	char     *part_start;
+	char     *part_end = NULL;
+	char     *evaluate_crlf_ptr = NULL;
+	char     *next_boundary;
+	char      nested_partnum[256];
+	int       crlf_in_use = 0;
+	int       part_seq = 0;
+	CBufStr  *chosen_name;
 
 
-	if (parse_MimeHeaders(m, content_start, content_end) != 0)
-		goto end_parser;
-	
 	/* If this is a multipart message, then recursively process it */
+	ptr = content_start;
 	part_start = NULL;
 	if (m->is_multipart) {
 
@@ -513,16 +624,38 @@ void the_mime_parser(char *partnum,
 
 		/* Figure out where the boundaries are */
 		m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
+		SubMimeHeaders = InitInterestingMimes ();
+
+		while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
+
+		if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0)
+			ptr += m->b[startary].len;
+
+		while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
 
 		part_start = NULL;
 		do {
-			char tmp;
-
-			tmp = *content_end;
-			*content_end = '\0';
+			char *optr;
+
+			optr = ptr;
+			if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
+				break;
+			if ((ptr - optr > 2) && 
+			    (*(ptr - 2) == '\r'))
+				crlf_in_use = 1;
+			
+			part_start = ptr;
 			
-			next_boundary = strstr(ptr, m->b[startary].Key);
-			*content_end = tmp;
+			next_boundary = FindNextContent(ptr,
+							content_end,
+							SubMimeHeaders,
+							m);
+			if ((next_boundary != NULL) && 
+			    (next_boundary - part_start < 3)) {
+				FlushInterestingMimes(SubMimeHeaders);
+
+				continue;
+			}
 
 			if ( (part_start != NULL) && (next_boundary != NULL) ) {
 				part_end = next_boundary;
@@ -542,21 +675,22 @@ void the_mime_parser(char *partnum,
 						 sizeof nested_partnum,
 						 "%d", ++part_seq);
 				}
-				the_mime_parser(nested_partnum,
-						part_start, 
-						part_end,
-						CallBack,
-						PreMultiPartCallBack,
-						PostMultiPartCallBack,
-						userdata,
-						dont_decode);
+				recurseable_mime_parser(nested_partnum,
+							part_start, 
+							part_end,
+							CallBack,
+							PreMultiPartCallBack,
+							PostMultiPartCallBack,
+							userdata,
+							dont_decode, 
+							SubMimeHeaders);
 			}
 
 			if (next_boundary != NULL) {
 				/* If we pass out of scope, don't attempt to
 				 * read past the end boundary. */
-				if ((*(next_boundary + m->b[startary].len + 1) == '-') && 
-				    (*(next_boundary + m->b[startary].len + 2) == '-') ){
+				if ((*(next_boundary + m->b[startary].len) == '-') && 
+				    (*(next_boundary + m->b[startary].len + 1) == '-') ){
 					ptr = content_end;
 				}
 				else {
@@ -584,8 +718,11 @@ void the_mime_parser(char *partnum,
 				/* Invalid end of multipart.  Bail out! */
 				ptr = content_end;
 			}
+			FlushInterestingMimes(SubMimeHeaders);
 		} while ( (ptr < content_end) && (next_boundary != NULL) );
 
+		free(SubMimeHeaders);
+
 		if (PostMultiPartCallBack != NULL) {
 			PostMultiPartCallBack("", 
 					      "", 
@@ -599,18 +736,12 @@ void the_mime_parser(char *partnum,
 					      m->b[id].Key, 
 					      userdata);
 		}
-		goto end_parser;
-	}
-
-	/* If it's not a multipart message, then do something with it */
-	if (!m->is_multipart) {
+	} /* If it's not a multipart message, then do something with it */
+	else {
+		size_t length;
 		part_start = ptr;
-		length = 0;
-		while (ptr < content_end) {
-			++ptr;
-			++length;
-		}
-		part_end = content_end;
+		length = content_end - part_start;
+		ptr = part_end = content_end;
 
 
 		/* The following code will truncate the MIME part to the size
@@ -710,11 +841,45 @@ void the_mime_parser(char *partnum,
 
 	}
 
-end_parser:	/* free the buffers!  end the oppression!! */
-	free(m);
 }
 
+/*
+ * Break out the components of a multipart message
+ * (This function expects to be fed HEADERS + CONTENT)
+ * Note: NULL can be supplied as content_end; in this case, the message is
+ * considered to have ended when the parser encounters a 0x00 byte.
+ */
+void the_mime_parser(char *partnum,
+		     char *content_start, char *content_end,
+		     MimeParserCallBackType CallBack,
+		     MimeParserCallBackType PreMultiPartCallBack,
+		     MimeParserCallBackType PostMultiPartCallBack,
+		     void *userdata,
+		     int dont_decode)
+{
+	interesting_mime_headers *m;
+
+	/* If the caller didn't supply an endpointer, generate one by measure */
+	if (content_end == NULL) {
+		content_end = &content_start[strlen(content_start)];
+	}
+
+	m = InitInterestingMimes();
+
+	if (!parse_MimeHeaders(m, &content_start, content_end))
+	{
 
+		recurseable_mime_parser(partnum,
+					content_start, content_end,
+					CallBack,
+					PreMultiPartCallBack,
+					PostMultiPartCallBack,
+					userdata,
+					dont_decode,
+					m);
+	}
+	free(m);
+}
 
 /*
  * Entry point for the MIME parser.
@@ -892,8 +1057,8 @@ int LoadIconDir(const char *DirName)
 	{
 		char *MinorPtr;
 		char *PStart;
-#ifdef _DIRENT_HAVE_D_NAMELEN
-		d_namelen = filedir_entry->d_namelen;
+#ifdef _DIRENT_HAVE_D_NAMLEN
+		d_namelen = filedir_entry->d_namlen;
 #else
 		d_namelen = strlen(filedir_entry->d_name);
 #endif