X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmime_parser.c;h=f7b9681a0ad9e40bf62307ea94b513dc7a335cad;hb=01cc19a4c2da27b4db0e980ccd3ca54d834319c8;hp=9e020298db3b3b664505f6850aec86425ca0bf09;hpb=4d636fc697b2632a816b4ce2dcaf00a9cbd2f4c1;p=citadel.git diff --git a/citadel/mime_parser.c b/citadel/mime_parser.c index 9e020298d..f7b9681a0 100644 --- a/citadel/mime_parser.c +++ b/citadel/mime_parser.c @@ -1,17 +1,13 @@ /* * $Id$ * - * This is the MIME parser for Citadel. Sometimes it actually works. + * This is the MIME parser for Citadel. * - * Copyright (c) 1998-2005 by Art Cancro - * This code is distributed under the terms of the GNU General Public License. + * Copyright (c) 1998-2006 by Art Cancro + * This code is distributed under the GNU General Public License v2. * */ -#ifdef DLL_EXPORT -#define IN_LIBCIT -#endif - #include #include #include @@ -24,7 +20,6 @@ #include "citadel.h" #include "server.h" -#include "serv_extensions.h" #include "sysdep_decls.h" #include "tools.h" @@ -66,68 +61,41 @@ char *fixed_partnum(char *supplied_partnum) { /* * Convert "quoted-printable" to binary. Returns number of bytes decoded. + * according to RFC2045 section 6.7 */ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) { - char buf[SIZ]; - int buf_length = 0; - int soft_line_break = 0; unsigned int ch; int decoded_length = 0; - int i; - - decoded[0] = 0; - decoded_length = 0; - buf[0] = 0; - buf_length = 0; - - for (i = 0; i < sourcelen; ++i) { - - buf[buf_length++] = encoded[i]; - - if ( (encoded[i] == '\n') - || (encoded[i] == 0) - || (i == (sourcelen-1)) ) { - buf[buf_length++] = 0; + int pos = 0; - /*** begin -- process one line ***/ - - if (buf[strlen(buf)-1] == '\n') { - buf[strlen(buf)-1] = 0; - } - if (buf[strlen(buf)-1] == '\r') { - buf[strlen(buf)-1] = 0; - } - while (isspace(buf[strlen(buf)-1])) { - buf[strlen(buf)-1] = 0; - } - soft_line_break = 0; - - while (strlen(buf) > 0) { - if (!strcmp(buf, "=")) { - soft_line_break = 1; - strcpy(buf, ""); - } else if ((strlen(buf)>=3) && (buf[0]=='=')) { - sscanf(&buf[1], "%02x", &ch); - decoded[decoded_length++] = ch; - strcpy(buf, &buf[3]); - } else { - decoded[decoded_length++] = buf[0]; - strcpy(buf, &buf[1]); - } - } - if (soft_line_break == 0) { - decoded[decoded_length++] = '\r'; - decoded[decoded_length++] = '\n'; - } - buf_length = 0; - /*** end -- process one line ***/ + while (pos < sourcelen) + { + if (!strncmp(&encoded[pos], "=\r\n", 3)) + { + pos += 3; + } + else if (!strncmp(&encoded[pos], "=\n", 2)) + { + pos += 2; + } + else if (encoded[pos] == '=') + { + ch = 0; + sscanf(&encoded[pos+1], "%02x", &ch); + pos += 3; + decoded[decoded_length++] = ch; + } + else + { + decoded[decoded_length++] = encoded[pos]; + pos += 1; } } - - decoded[decoded_length++] = 0; + decoded[decoded_length] = 0; return(decoded_length); } + /* * Given a message or message-part body and a length, handle any necessary * decoding and pass the request up the stack. @@ -196,17 +164,19 @@ void mime_decode(char *partnum, return; } + /* Fail silently if we hit an unknown encoding. */ if ((strcasecmp(encoding, "base64")) && (strcasecmp(encoding, "quoted-printable"))) { return; } + /* - * Allocate a buffer for the decoded data. The output buffer is the - * same size as the input buffer; this assumes that the decoded data - * will never be larger than the encoded data. This is a safe - * assumption with base64, uuencode, and quoted-printable. + * Allocate a buffer for the decoded data. The output buffer is slightly + * larger than the input buffer; this assumes that the decoded data + * will never be significantly larger than the encoded data. This is a + * safe assumption with base64, uuencode, and quoted-printable. */ - decoded = malloc(length+2048); + decoded = malloc(length + 32768); if (decoded == NULL) { return; } @@ -215,8 +185,7 @@ void mime_decode(char *partnum, bytes_decoded = CtdlDecodeBase64(decoded, part_start, length); } else if (!strcasecmp(encoding, "quoted-printable")) { - bytes_decoded = CtdlDecodeQuotedPrintable(decoded, - part_start, length); + bytes_decoded = CtdlDecodeQuotedPrintable(decoded, part_start, length); } if (bytes_decoded > 0) if (CallBack != NULL) { @@ -275,11 +244,13 @@ void the_mime_parser(char *partnum, { char *ptr; + char *srch = NULL; char *part_start, *part_end = NULL; char buf[SIZ]; char *header; char *boundary; char *startary; + size_t startary_len = 0; char *endary; char *next_boundary; char *content_type; @@ -295,7 +266,9 @@ void the_mime_parser(char *partnum, int part_seq = 0; int i; size_t length; - char nested_partnum[SIZ]; + char nested_partnum[256]; + int crlf_in_use = 0; + char *evaluate_crlf_ptr = NULL; ptr = content_start; content_length = 0; @@ -353,8 +326,9 @@ void the_mime_parser(char *partnum, } if (!isspace(buf[0])) { - if (!strncasecmp(header, "Content-type: ", 14)) { - strcpy(content_type, &header[14]); + if (!strncasecmp(header, "Content-type:", 13)) { + strcpy(content_type, &header[13]); + striplt(content_type); extract_key(content_type_name, content_type, "name"); extract_key(charset, content_type, "charset"); /* Deal with weird headers */ @@ -363,23 +337,29 @@ void the_mime_parser(char *partnum, if (strchr(content_type, ';')) *(strchr(content_type, ';')) = '\0'; } - if (!strncasecmp(header, "Content-Disposition: ", 21)) { - strcpy(disposition, &header[21]); + if (!strncasecmp(header, "Content-Disposition:", 20)) { + strcpy(disposition, &header[20]); + striplt(disposition); extract_key(content_disposition_name, disposition, "name"); extract_key(filename, disposition, "filename"); } - if (!strncasecmp(header, "Content-length: ", 16)) { - content_length = (size_t) atol(&header[16]); + if (!strncasecmp(header, "Content-length: ", 15)) { + char clbuf[10]; + safestrncpy(clbuf, &header[15], sizeof clbuf); + striplt(clbuf); + content_length = (size_t) atol(clbuf); + } + if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) { + strcpy(encoding, &header[26]); + striplt(encoding); } - if (!strncasecmp(header, - "Content-transfer-encoding: ", 27)) - strcpy(encoding, &header[27]); if (strlen(boundary) == 0) extract_key(boundary, header, "boundary"); strcpy(header, ""); } - if ((strlen(header) + strlen(buf) + 2) < SIZ) + if ((strlen(header) + strlen(buf) + 2) < SIZ) { strcat(header, buf); + } } while ((strlen(buf) > 0) && (*ptr != 0)); if (strchr(disposition, ';')) @@ -389,13 +369,6 @@ void the_mime_parser(char *partnum, *(strchr(content_type, ';')) = '\0'; striplt(content_type); - if (!strlen(content_type)) { - strcpy(content_type, "text/plain"); - } - if (!strlen(charset)) { - strcpy(charset, "us-ascii"); - } - if (strlen(boundary) > 0) { is_multipart = 1; } else { @@ -416,13 +389,24 @@ void the_mime_parser(char *partnum, /* Figure out where the boundaries are */ snprintf(startary, SIZ, "--%s", boundary); snprintf(endary, SIZ, "--%s--", boundary); + startary_len = strlen(startary); part_start = NULL; do { - next_boundary = bmstrstr(ptr, startary, strncmp); + next_boundary = NULL; + for (srch=ptr; srch 0) { snprintf(nested_partnum, @@ -445,14 +429,26 @@ void the_mime_parser(char *partnum, } if (next_boundary != NULL) { - /* If we pass out of scope, don't attempt to read - * past the end boundary. */ + /* If we pass out of scope, don't attempt to + * read past the end boundary. */ if (!strcmp(next_boundary, endary)) { ptr = content_end; } else { /* Set up for the next part. */ part_start = strstr(next_boundary, "\n"); + + /* Determine whether newlines are LF or CRLF */ + evaluate_crlf_ptr = part_start; + --evaluate_crlf_ptr; + if (!memcmp(evaluate_crlf_ptr, "\r\n", 2)) { + crlf_in_use = 1; + } + else { + crlf_in_use = 0; + } + + /* Advance past the LF ... now we're in the next part */ ++part_start; ptr = part_start; } @@ -479,9 +475,14 @@ void the_mime_parser(char *partnum, ++length; } part_end = content_end; - /* fix an off-by-one error */ - --part_end; - --length; + + /****** + * I thought there was an off-by-one error here, but there isn't. + * This probably means that there's an off-by-one error somewhere + * else ... or maybe only in certain messages? + --part_end; + --length; + ******/ /* Truncate if the header told us to */ if ( (content_length > 0) && (length > content_length) ) { @@ -498,13 +499,59 @@ void the_mime_parser(char *partnum, else { name = content_type_name; } - + + /* lprintf(CTDL_DEBUG, "mime_decode part=%s, len=%d, type=%s, charset=%s, encoding=%s\n", + partnum, length, content_type, charset, encoding); */ + + /* Ok, we've got a non-multipart part here, so do something with it. + */ mime_decode(partnum, - part_start, length, - content_type, charset, encoding, disposition, - name, filename, - CallBack, NULL, NULL, - userdata, dont_decode); + part_start, length, + content_type, charset, encoding, disposition, + name, filename, + CallBack, NULL, NULL, + userdata, dont_decode + ); + + /* + * Now if it's an encapsulated message/rfc822 then we have to recurse into it + */ + if (!strcasecmp(content_type, "message/rfc822")) { + + if (PreMultiPartCallBack != NULL) { + PreMultiPartCallBack("", "", partnum, "", + NULL, content_type, charset, + 0, encoding, userdata); + } + if (CallBack != NULL) { + if (strlen(partnum) > 0) { + snprintf(nested_partnum, + sizeof nested_partnum, + "%s.%d", partnum, + ++part_seq); + } + else { + snprintf(nested_partnum, + sizeof nested_partnum, + "%d", ++part_seq); + } + the_mime_parser(nested_partnum, + part_start, part_end, + CallBack, + PreMultiPartCallBack, + PostMultiPartCallBack, + userdata, + dont_decode + ); + } + if (PostMultiPartCallBack != NULL) { + PostMultiPartCallBack("", "", partnum, "", NULL, + content_type, charset, 0, encoding, userdata); + } + + + } + } end_parser: /* free the buffers! end the oppression!! */