* This is the MIME parser for Citadel.
*
* Copyright (c) 1998-2010 by the citadel.org development team.
- * This code is distributed under the GNU General Public License v3.
*
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
static inline unsigned int _decode_hex(const char *Source)
{
- int ret = '?';
+ unsigned int ret = '?';
unsigned char LO_NIBBLE;
unsigned char HI_NIBBLE;
}
else
{
- ch = 0;
ch = _decode_hex(&encoded[pos]);
pos += 2;
decoded[decoded_length++] = ch;
/* Some encodings aren't really encodings */
if (!strcasecmp(encoding, "7bit"))
- strcpy(encoding, "");
+ *encoding = '\0';
if (!strcasecmp(encoding, "8bit"))
- strcpy(encoding, "");
+ *encoding = '\0';
if (!strcasecmp(encoding, "binary"))
- strcpy(encoding, "");
+ *encoding = '\0';
+ if (!strcasecmp(encoding, "ISO-8859-1"))
+ *encoding = '\0';
/* If this part is not encoded, send as-is */
if ( (strlen(encoding) == 0) || (dont_decode)) {
}
if (bytes_decoded > 0) if (CallBack != NULL) {
- CallBack(name, filename, fixed_partnum(partnum),
- disposition, decoded,
- content_type, charset, bytes_decoded, "binary", id, userdata);
+ char encoding_buf[SIZ];
+
+ strcpy(encoding_buf, "binary");
+ CallBack(name,
+ filename,
+ fixed_partnum(partnum),
+ disposition,
+ decoded,
+ content_type,
+ charset,
+ bytes_decoded,
+ encoding_buf,
+ id,
+ userdata);
}
free(decoded);
*decoded = NULL;
/* Some encodings aren't really encodings */
if (!strcasecmp(encoding, "7bit"))
- strcpy(encoding, "");
+ *encoding = '\0';
if (!strcasecmp(encoding, "8bit"))
- strcpy(encoding, "");
+ *encoding = '\0';
if (!strcasecmp(encoding, "binary"))
- strcpy(encoding, "");
+ *encoding = '\0';
/* If this part is not encoded, send as-is */
if (strlen(encoding) == 0) {
long is_multipart;
} interesting_mime_headers;
-interesting_mime_headers *InitInterestingMimes(void)
+
+static void FlushInterestingMimes(interesting_mime_headers *m)
{
int i;
- interesting_mime_headers *m;
- m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
for (i = 0; i < eMax; i++) {
m->b[i].Key[0] = '\0';
m->b[i].len = 0;
}
m->content_length = -1;
- return m;
}
+static interesting_mime_headers *InitInterestingMimes(void)
+{
+ interesting_mime_headers *m;
+ m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
+ FlushInterestingMimes(m);
+ return m;
+}
-long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *content_end)
+
+static long parse_MimeHeaders(interesting_mime_headers *m,
+ char** pcontent_start,
+ char *content_end)
{
char buf[SIZ];
char header[SIZ];
long headerlen;
- char *ptr;
- int buflen;
+ char *ptr, *pch;
+ int buflen = 0;
int i;
/* Learn interesting things from the headers */
- ptr = content_start;
+ ptr = *pcontent_start;
*header = '\0';
headerlen = 0;
do {
ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
- if (ptr >= content_end) {
- return -1;
- }
for (i = 0; i < buflen; ++i) {
if (isspace(buf[i])) {
}
}
- if (!isspace(buf[0])) {
+ if (!isspace(buf[0]) && (headerlen > 0)) {
if (!strncasecmp(header, "Content-type:", 13)) {
memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
+ m->b[content_type].Key[headerlen - 12] = '\0';
m->b[content_type].len = striplt (m->b[content_type].Key);
m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
m->b[boundary].len = extract_key(m->b[boundary].Key, header, headerlen, HKEY("boundary"), '=');
/* Deal with weird headers */
- if (strchr(m->b[content_type].Key, ' '))
- *(strchr(m->b[content_type].Key, ' ')) = '\0';
- if (strchr(m->b[content_type].Key, ';'))
- *(strchr(m->b[content_type].Key, ';')) = '\0';
+ pch = strchr(m->b[content_type].Key, ' ');
+ if (pch != NULL) {
+ *pch = '\0';
+ m->b[content_type].len = m->b[content_type].Key - pch;
+ }
+ pch = strchr(m->b[content_type].Key, ';');
+ if (pch != NULL) {
+ *pch = '\0';
+ m->b[content_type].len = m->b[content_type].Key - pch;
+ }
}
else if (!strncasecmp(header, "Content-Disposition:", 20)) {
memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
+ m->b[disposition].Key[headerlen - 19] = '\0';
m->b[disposition].len = striplt(m->b[disposition].Key);
m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
m->b[filename].len = extract_key(m->b[filename].Key, CKEY(m->b[disposition]), HKEY("filename"), '=');
+ pch = strchr(m->b[disposition].Key, ';');
+ if (pch != NULL) *pch = '\0';
+ m->b[disposition].len = striplt(m->b[disposition].Key);
}
else if (!strncasecmp(header, "Content-ID:", 11)) {
- memcpy(m->b[id].Key, &header[11], headerlen);
+ memcpy(m->b[id].Key, &header[11], headerlen - 11);
+ m->b[id].Key[headerlen - 11] = '\0';
striplt(m->b[id].Key);
m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
}
}
else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
+ m->b[encoding].Key[headerlen - 26] = '\0';
m->b[encoding].len = striplt(m->b[encoding].Key);
}
*header = '\0';
headerlen += buflen;
header[headerlen] = '\0';
}
+ if (ptr >= content_end) {
+ return -1;
+ }
} while ((!IsEmptyStr(buf)) && (*ptr != 0));
- ptr = strchr(m->b[disposition].Key, ';');
- if (ptr != NULL) *ptr = '\0';
- m->b[disposition].len = striplt(m->b[disposition].Key);
+ m->is_multipart = m->b[boundary].len != 0;
+ *pcontent_start = ptr;
- ptr = strchr(m->b[content_type].Key, ';');
- if (ptr != NULL) *ptr = '\0';
- m->b[content_type].len = striplt(m->b[content_type].Key);
+ return 0;
+}
- m->is_multipart = m->b[boundary].len != 0;
+
+static int IsAsciiEncoding(interesting_mime_headers *m)
+{
+
+ if ((m->b[encoding].len != 0) &&
+ (strcasecmp(m->b[encoding].Key, "base64") == 0))
+ return 1;
+ if ((m->b[encoding].len != 0) &&
+ (strcmp(m->b[encoding].Key, "quoted-printable") == 0))
+ return 1;
return 0;
}
+static char *FindNextContent(char *ptr,
+ char *content_end,
+ interesting_mime_headers *SubMimeHeaders,
+ interesting_mime_headers *m)
+{
+ char *next_boundary;
+ char tmp;
+
+ if (IsAsciiEncoding(SubMimeHeaders)) {
+ tmp = *content_end;
+ *content_end = '\0';
+
+ /**
+ * ok, if we have a content length of the mime part,
+ * try skipping the content on the search for the next
+ * boundary. since we don't trust the content_length
+ * to be all accurate, and suspect it to lose one digit
+ * per line with a line length of 80 chars, we need
+ * to start searching a little before..
+ */
+
+ if ((SubMimeHeaders->content_length != -1) &&
+ (SubMimeHeaders->content_length > 10))
+ {
+ char *pptr;
+ long lines;
+
+ lines = SubMimeHeaders->content_length / 80;
+ pptr = ptr + SubMimeHeaders->content_length - lines - 10;
+ if (pptr < content_end)
+ ptr = pptr;
+ }
+
+ next_boundary = strstr(ptr, m->b[startary].Key);
+ *content_end = tmp;
+ }
+ else {
+ char *srch;
+ /**
+ * ok, if we have a content length of the mime part,
+ * try skipping the content on the search for the next
+ * boundary. since we don't trust the content_length
+ * to be all accurate, start searching a little before..
+ */
+
+ if ((SubMimeHeaders->content_length != -1) &&
+ (SubMimeHeaders->content_length > 10))
+ {
+ char *pptr;
+ pptr = ptr + SubMimeHeaders->content_length - 10;
+ if (pptr < content_end)
+ ptr = pptr;
+ }
+
+
+ srch = next_boundary = NULL;
+ for (srch = memchr(ptr, '-', content_end - ptr);
+ (srch != NULL) && (srch < content_end);
+ srch = memchr(srch, '-', content_end - srch))
+ {
+ if (!memcmp(srch,
+ m->b[startary].Key,
+ m->b[startary].len))
+ {
+ next_boundary = srch;
+ srch = content_end;
+ }
+ else srch ++;
+
+ }
+
+ }
+ return next_boundary;
+}
+
/*
* Break out the components of a multipart message
* (This function expects to be fed HEADERS + CONTENT)
* Note: NULL can be supplied as content_end; in this case, the message is
* considered to have ended when the parser encounters a 0x00 byte.
*/
-void the_mime_parser(char *partnum,
- char *content_start, char *content_end,
- MimeParserCallBackType CallBack,
- MimeParserCallBackType PreMultiPartCallBack,
- MimeParserCallBackType PostMultiPartCallBack,
- void *userdata,
- int dont_decode)
+static void recurseable_mime_parser(char *partnum,
+ char *content_start, char *content_end,
+ MimeParserCallBackType CallBack,
+ MimeParserCallBackType PreMultiPartCallBack,
+ MimeParserCallBackType PostMultiPartCallBack,
+ void *userdata,
+ int dont_decode,
+ interesting_mime_headers *m)
{
-
- char *ptr;
- char *part_start, *part_end = NULL;
- char *next_boundary;
-
- size_t content_length;
- int part_seq = 0;
- size_t length;
- char nested_partnum[256];
- int crlf_in_use = 0;
- char *evaluate_crlf_ptr = NULL;
-
- interesting_mime_headers *m;
- CBufStr *chosen_name;
-
- ptr = content_start;
- content_length = 0;
-
- m = InitInterestingMimes();
-
-
- /* If the caller didn't supply an endpointer, generate one by measure */
- if (content_end == NULL) {
- content_end = &content_start[strlen(content_start)];
- }
+ interesting_mime_headers *SubMimeHeaders;
+ char *ptr;
+ char *part_start;
+ char *part_end = NULL;
+ char *evaluate_crlf_ptr = NULL;
+ char *next_boundary;
+ char nested_partnum[256];
+ int crlf_in_use = 0;
+ int part_seq = 0;
+ CBufStr *chosen_name;
- if (parse_MimeHeaders(m, content_start, content_end) != 0)
- goto end_parser;
-
/* If this is a multipart message, then recursively process it */
+ ptr = content_start;
part_start = NULL;
if (m->is_multipart) {
/* Figure out where the boundaries are */
m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
+ SubMimeHeaders = InitInterestingMimes ();
+
+ while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
+
+ if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0)
+ ptr += m->b[startary].len;
+
+ while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
part_start = NULL;
do {
- char tmp;
-
- tmp = *content_end;
- *content_end = '\0';
+ char *optr;
+
+ optr = ptr;
+ if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
+ break;
+ if ((ptr - optr > 2) &&
+ (*(ptr - 2) == '\r'))
+ crlf_in_use = 1;
+
+ part_start = ptr;
- next_boundary = strstr(ptr, m->b[startary].Key);
- *content_end = tmp;
+ next_boundary = FindNextContent(ptr,
+ content_end,
+ SubMimeHeaders,
+ m);
+ if ((next_boundary != NULL) &&
+ (next_boundary - part_start < 3)) {
+ FlushInterestingMimes(SubMimeHeaders);
+
+ continue;
+ }
if ( (part_start != NULL) && (next_boundary != NULL) ) {
part_end = next_boundary;
sizeof nested_partnum,
"%d", ++part_seq);
}
- the_mime_parser(nested_partnum,
- part_start,
- part_end,
- CallBack,
- PreMultiPartCallBack,
- PostMultiPartCallBack,
- userdata,
- dont_decode);
+ recurseable_mime_parser(nested_partnum,
+ part_start,
+ part_end,
+ CallBack,
+ PreMultiPartCallBack,
+ PostMultiPartCallBack,
+ userdata,
+ dont_decode,
+ SubMimeHeaders);
}
if (next_boundary != NULL) {
/* If we pass out of scope, don't attempt to
* read past the end boundary. */
- if ((*(next_boundary + m->b[startary].len + 1) == '-') &&
- (*(next_boundary + m->b[startary].len + 2) == '-') ){
+ if ((*(next_boundary + m->b[startary].len) == '-') &&
+ (*(next_boundary + m->b[startary].len + 1) == '-') ){
ptr = content_end;
}
else {
/* Invalid end of multipart. Bail out! */
ptr = content_end;
}
+ FlushInterestingMimes(SubMimeHeaders);
} while ( (ptr < content_end) && (next_boundary != NULL) );
+ free(SubMimeHeaders);
+
if (PostMultiPartCallBack != NULL) {
PostMultiPartCallBack("",
"",
m->b[id].Key,
userdata);
}
- goto end_parser;
- }
-
- /* If it's not a multipart message, then do something with it */
- if (!m->is_multipart) {
+ } /* If it's not a multipart message, then do something with it */
+ else {
+ size_t length;
part_start = ptr;
- length = 0;
- while (ptr < content_end) {
- ++ptr;
- ++length;
- }
- part_end = content_end;
+ length = content_end - part_start;
+ ptr = part_end = content_end;
/* The following code will truncate the MIME part to the size
}
-end_parser: /* free the buffers! end the oppression!! */
- free(m);
}
+/*
+ * Break out the components of a multipart message
+ * (This function expects to be fed HEADERS + CONTENT)
+ * Note: NULL can be supplied as content_end; in this case, the message is
+ * considered to have ended when the parser encounters a 0x00 byte.
+ */
+void the_mime_parser(char *partnum,
+ char *content_start, char *content_end,
+ MimeParserCallBackType CallBack,
+ MimeParserCallBackType PreMultiPartCallBack,
+ MimeParserCallBackType PostMultiPartCallBack,
+ void *userdata,
+ int dont_decode)
+{
+ interesting_mime_headers *m;
+
+ /* If the caller didn't supply an endpointer, generate one by measure */
+ if (content_end == NULL) {
+ content_end = &content_start[strlen(content_start)];
+ }
+
+ m = InitInterestingMimes();
+
+ if (!parse_MimeHeaders(m, &content_start, content_end))
+ {
+ recurseable_mime_parser(partnum,
+ content_start, content_end,
+ CallBack,
+ PreMultiPartCallBack,
+ PostMultiPartCallBack,
+ userdata,
+ dont_decode,
+ m);
+ }
+ free(m);
+}
/*
* Entry point for the MIME parser.
{
char *MinorPtr;
char *PStart;
-#ifdef _DIRENT_HAVE_D_NAMELEN
- d_namelen = filedir_entry->d_namelen;
+#ifdef _DIRENT_HAVE_D_NAMLEN
+ d_namelen = filedir_entry->d_namlen;
#else
d_namelen = strlen(filedir_entry->d_name);
#endif