X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=libcitadel%2Flib%2Fmime_parser.c;h=1fc037d43ffd77462434eb93f645001f4dbb251a;hb=8659bf61f03724755374145e6211be6bbfe74fda;hp=b7249f79ec215b941643dc1f0abde569ce282dc8;hpb=bab826bb3fa8649d196208fffb4af8fba1a57544;p=citadel.git diff --git a/libcitadel/lib/mime_parser.c b/libcitadel/lib/mime_parser.c index b7249f79e..1fc037d43 100644 --- a/libcitadel/lib/mime_parser.c +++ b/libcitadel/lib/mime_parser.c @@ -2,8 +2,20 @@ * This is the MIME parser for Citadel. * * Copyright (c) 1998-2010 by the citadel.org development team. - * This code is distributed under the GNU General Public License v3. * + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -123,7 +135,7 @@ char *fixed_partnum(char *supplied_partnum) { static inline unsigned int _decode_hex(const char *Source) { - int ret = '?'; + unsigned int ret = '?'; unsigned char LO_NIBBLE; unsigned char HI_NIBBLE; @@ -166,7 +178,6 @@ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) { } else { - ch = 0; ch = _decode_hex(&encoded[pos]); pos += 2; decoded[decoded_length++] = ch; @@ -205,11 +216,13 @@ void mime_decode(char *partnum, /* Some encodings aren't really encodings */ if (!strcasecmp(encoding, "7bit")) - strcpy(encoding, ""); + *encoding = '\0'; if (!strcasecmp(encoding, "8bit")) - strcpy(encoding, ""); + *encoding = '\0'; if (!strcasecmp(encoding, "binary")) - strcpy(encoding, ""); + *encoding = '\0'; + if (!strcasecmp(encoding, "ISO-8859-1")) + *encoding = '\0'; /* If this part is not encoded, send as-is */ if ( (strlen(encoding) == 0) || (dont_decode)) { @@ -254,9 +267,20 @@ void mime_decode(char *partnum, } if (bytes_decoded > 0) if (CallBack != NULL) { - CallBack(name, filename, fixed_partnum(partnum), - disposition, decoded, - content_type, charset, bytes_decoded, "binary", id, userdata); + char encoding_buf[SIZ]; + + strcpy(encoding_buf, "binary"); + CallBack(name, + filename, + fixed_partnum(partnum), + disposition, + decoded, + content_type, + charset, + bytes_decoded, + encoding_buf, + id, + userdata); } free(decoded); @@ -280,11 +304,11 @@ int mime_decode_now (char *part_start, *decoded = NULL; /* Some encodings aren't really encodings */ if (!strcasecmp(encoding, "7bit")) - strcpy(encoding, ""); + *encoding = '\0'; if (!strcasecmp(encoding, "8bit")) - strcpy(encoding, ""); + *encoding = '\0'; if (!strcasecmp(encoding, "binary")) - strcpy(encoding, ""); + *encoding = '\0'; /* If this part is not encoded, send as-is */ if (strlen(encoding) == 0) { @@ -346,40 +370,45 @@ typedef struct _interesting_mime_headers { long is_multipart; } interesting_mime_headers; -interesting_mime_headers *InitInterestingMimes(void) + +static void FlushInterestingMimes(interesting_mime_headers *m) { int i; - interesting_mime_headers *m; - m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers)); for (i = 0; i < eMax; i++) { m->b[i].Key[0] = '\0'; m->b[i].len = 0; } m->content_length = -1; - return m; } +static interesting_mime_headers *InitInterestingMimes(void) +{ + interesting_mime_headers *m; + m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers)); + FlushInterestingMimes(m); + return m; +} -long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *content_end) + +static long parse_MimeHeaders(interesting_mime_headers *m, + char** pcontent_start, + char *content_end) { char buf[SIZ]; char header[SIZ]; long headerlen; - char *ptr; - int buflen; + char *ptr, *pch; + int buflen = 0; int i; /* Learn interesting things from the headers */ - ptr = content_start; + ptr = *pcontent_start; *header = '\0'; headerlen = 0; do { ptr = memreadlinelen(ptr, buf, SIZ, &buflen); - if (ptr >= content_end) { - return -1; - } for (i = 0; i < buflen; ++i) { if (isspace(buf[i])) { @@ -387,9 +416,10 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c } } - if (!isspace(buf[0])) { + if (!isspace(buf[0]) && (headerlen > 0)) { if (!strncasecmp(header, "Content-type:", 13)) { memcpy (m->b[content_type].Key, &header[13], headerlen - 12); + m->b[content_type].Key[headerlen - 12] = '\0'; m->b[content_type].len = striplt (m->b[content_type].Key); m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '='); @@ -397,32 +427,44 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c m->b[boundary].len = extract_key(m->b[boundary].Key, header, headerlen, HKEY("boundary"), '='); /* Deal with weird headers */ - if (strchr(m->b[content_type].Key, ' ')) - *(strchr(m->b[content_type].Key, ' ')) = '\0'; - if (strchr(m->b[content_type].Key, ';')) - *(strchr(m->b[content_type].Key, ';')) = '\0'; + pch = strchr(m->b[content_type].Key, ' '); + if (pch != NULL) { + *pch = '\0'; + m->b[content_type].len = m->b[content_type].Key - pch; + } + pch = strchr(m->b[content_type].Key, ';'); + if (pch != NULL) { + *pch = '\0'; + m->b[content_type].len = m->b[content_type].Key - pch; + } } else if (!strncasecmp(header, "Content-Disposition:", 20)) { memcpy (m->b[disposition].Key, &header[20], headerlen - 19); + m->b[disposition].Key[headerlen - 19] = '\0'; m->b[disposition].len = striplt(m->b[disposition].Key); m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '='); m->b[filename].len = extract_key(m->b[filename].Key, CKEY(m->b[disposition]), HKEY("filename"), '='); + pch = strchr(m->b[disposition].Key, ';'); + if (pch != NULL) *pch = '\0'; + m->b[disposition].len = striplt(m->b[disposition].Key); } else if (!strncasecmp(header, "Content-ID:", 11)) { - memcpy(m->b[id].Key, &header[11], headerlen); + memcpy(m->b[id].Key, &header[11], headerlen - 11); + m->b[id].Key[headerlen - 11] = '\0'; striplt(m->b[id].Key); m->b[id].len = stripallbut(m->b[id].Key, '<', '>'); } else if (!strncasecmp(header, "Content-length: ", 15)) { char *clbuf; clbuf = &header[15]; - while (isspace(clbuf)) + while (isspace(*clbuf)) clbuf ++; m->content_length = (size_t) atol(clbuf); } else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) { memcpy(m->b[encoding].Key, &header[26], headerlen - 26); + m->b[encoding].Key[headerlen - 26] = '\0'; m->b[encoding].len = striplt(m->b[encoding].Key); } *header = '\0'; @@ -433,66 +475,135 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c headerlen += buflen; header[headerlen] = '\0'; } + if (ptr >= content_end) { + return -1; + } } while ((!IsEmptyStr(buf)) && (*ptr != 0)); - ptr = strchr(m->b[disposition].Key, ';'); - if (ptr != NULL) *ptr = '\0'; - m->b[disposition].len = striplt(m->b[disposition].Key); + m->is_multipart = m->b[boundary].len != 0; + *pcontent_start = ptr; - ptr = strchr(m->b[content_type].Key, ';'); - if (ptr != NULL) *ptr = '\0'; - m->b[content_type].len = striplt(m->b[content_type].Key); + return 0; +} - m->is_multipart = m->b[boundary].len != 0; + +static int IsAsciiEncoding(interesting_mime_headers *m) +{ + + if ((m->b[encoding].len != 0) && + (strcasecmp(m->b[encoding].Key, "base64") == 0)) + return 1; + if ((m->b[encoding].len != 0) && + (strcmp(m->b[encoding].Key, "quoted-printable") == 0)) + return 1; return 0; } +static char *FindNextContent(char *ptr, + char *content_end, + interesting_mime_headers *SubMimeHeaders, + interesting_mime_headers *m) +{ + char *next_boundary; + char tmp; + + if (IsAsciiEncoding(SubMimeHeaders)) { + tmp = *content_end; + *content_end = '\0'; + + /** + * ok, if we have a content length of the mime part, + * try skipping the content on the search for the next + * boundary. since we don't trust the content_length + * to be all accurate, and suspect it to lose one digit + * per line with a line length of 80 chars, we need + * to start searching a little before.. + */ + + if ((SubMimeHeaders->content_length != -1) && + (SubMimeHeaders->content_length > 10)) + { + char *pptr; + long lines; + + lines = SubMimeHeaders->content_length / 80; + pptr = ptr + SubMimeHeaders->content_length - lines - 10; + if (pptr < content_end) + ptr = pptr; + } + + next_boundary = strstr(ptr, m->b[startary].Key); + *content_end = tmp; + } + else { + char *srch; + /** + * ok, if we have a content length of the mime part, + * try skipping the content on the search for the next + * boundary. since we don't trust the content_length + * to be all accurate, start searching a little before.. + */ + + if ((SubMimeHeaders->content_length != -1) && + (SubMimeHeaders->content_length > 10)) + { + char *pptr; + pptr = ptr + SubMimeHeaders->content_length - 10; + if (pptr < content_end) + ptr = pptr; + } + + + srch = next_boundary = NULL; + for (srch = memchr(ptr, '-', content_end - ptr); + (srch != NULL) && (srch < content_end); + srch = memchr(srch, '-', content_end - srch)) + { + if (!memcmp(srch, + m->b[startary].Key, + m->b[startary].len)) + { + next_boundary = srch; + srch = content_end; + } + else srch ++; + + } + + } + return next_boundary; +} + /* * Break out the components of a multipart message * (This function expects to be fed HEADERS + CONTENT) * Note: NULL can be supplied as content_end; in this case, the message is * considered to have ended when the parser encounters a 0x00 byte. */ -void the_mime_parser(char *partnum, - char *content_start, char *content_end, - MimeParserCallBackType CallBack, - MimeParserCallBackType PreMultiPartCallBack, - MimeParserCallBackType PostMultiPartCallBack, - void *userdata, - int dont_decode) +static void recurseable_mime_parser(char *partnum, + char *content_start, char *content_end, + MimeParserCallBackType CallBack, + MimeParserCallBackType PreMultiPartCallBack, + MimeParserCallBackType PostMultiPartCallBack, + void *userdata, + int dont_decode, + interesting_mime_headers *m) { - - char *ptr; - char *part_start, *part_end = NULL; - char *next_boundary; - - size_t content_length; - int part_seq = 0; - size_t length; - char nested_partnum[256]; - int crlf_in_use = 0; - char *evaluate_crlf_ptr = NULL; - - interesting_mime_headers *m; - CBufStr *chosen_name; - - ptr = content_start; - content_length = 0; - - m = InitInterestingMimes(); - - - /* If the caller didn't supply an endpointer, generate one by measure */ - if (content_end == NULL) { - content_end = &content_start[strlen(content_start)]; - } + interesting_mime_headers *SubMimeHeaders; + char *ptr; + char *part_start; + char *part_end = NULL; + char *evaluate_crlf_ptr = NULL; + char *next_boundary; + char nested_partnum[256]; + int crlf_in_use = 0; + int part_seq = 0; + CBufStr *chosen_name; - if (parse_MimeHeaders(m, content_start, content_end) != 0) - goto end_parser; - /* If this is a multipart message, then recursively process it */ + ptr = content_start; part_start = NULL; if (m->is_multipart) { @@ -513,16 +624,38 @@ void the_mime_parser(char *partnum, /* Figure out where the boundaries are */ m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key); + SubMimeHeaders = InitInterestingMimes (); + + while ((*ptr == '\r') || (*ptr == '\n')) ptr ++; + + if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0) + ptr += m->b[startary].len; + + while ((*ptr == '\r') || (*ptr == '\n')) ptr ++; part_start = NULL; do { - char tmp; - - tmp = *content_end; - *content_end = '\0'; + char *optr; + + optr = ptr; + if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0) + break; + if ((ptr - optr > 2) && + (*(ptr - 2) == '\r')) + crlf_in_use = 1; + + part_start = ptr; - next_boundary = strstr(ptr, m->b[startary].Key); - *content_end = tmp; + next_boundary = FindNextContent(ptr, + content_end, + SubMimeHeaders, + m); + if ((next_boundary != NULL) && + (next_boundary - part_start < 3)) { + FlushInterestingMimes(SubMimeHeaders); + + continue; + } if ( (part_start != NULL) && (next_boundary != NULL) ) { part_end = next_boundary; @@ -542,21 +675,22 @@ void the_mime_parser(char *partnum, sizeof nested_partnum, "%d", ++part_seq); } - the_mime_parser(nested_partnum, - part_start, - part_end, - CallBack, - PreMultiPartCallBack, - PostMultiPartCallBack, - userdata, - dont_decode); + recurseable_mime_parser(nested_partnum, + part_start, + part_end, + CallBack, + PreMultiPartCallBack, + PostMultiPartCallBack, + userdata, + dont_decode, + SubMimeHeaders); } if (next_boundary != NULL) { /* If we pass out of scope, don't attempt to * read past the end boundary. */ - if ((*(next_boundary + m->b[startary].len + 1) == '-') && - (*(next_boundary + m->b[startary].len + 2) == '-') ){ + if ((*(next_boundary + m->b[startary].len) == '-') && + (*(next_boundary + m->b[startary].len + 1) == '-') ){ ptr = content_end; } else { @@ -584,8 +718,11 @@ void the_mime_parser(char *partnum, /* Invalid end of multipart. Bail out! */ ptr = content_end; } + FlushInterestingMimes(SubMimeHeaders); } while ( (ptr < content_end) && (next_boundary != NULL) ); + free(SubMimeHeaders); + if (PostMultiPartCallBack != NULL) { PostMultiPartCallBack("", "", @@ -599,18 +736,12 @@ void the_mime_parser(char *partnum, m->b[id].Key, userdata); } - goto end_parser; - } - - /* If it's not a multipart message, then do something with it */ - if (!m->is_multipart) { + } /* If it's not a multipart message, then do something with it */ + else { + size_t length; part_start = ptr; - length = 0; - while (ptr < content_end) { - ++ptr; - ++length; - } - part_end = content_end; + length = content_end - part_start; + ptr = part_end = content_end; /* The following code will truncate the MIME part to the size @@ -710,11 +841,45 @@ void the_mime_parser(char *partnum, } -end_parser: /* free the buffers! end the oppression!! */ - free(m); } +/* + * Break out the components of a multipart message + * (This function expects to be fed HEADERS + CONTENT) + * Note: NULL can be supplied as content_end; in this case, the message is + * considered to have ended when the parser encounters a 0x00 byte. + */ +void the_mime_parser(char *partnum, + char *content_start, char *content_end, + MimeParserCallBackType CallBack, + MimeParserCallBackType PreMultiPartCallBack, + MimeParserCallBackType PostMultiPartCallBack, + void *userdata, + int dont_decode) +{ + interesting_mime_headers *m; + + /* If the caller didn't supply an endpointer, generate one by measure */ + if (content_end == NULL) { + content_end = &content_start[strlen(content_start)]; + } + + m = InitInterestingMimes(); + + if (!parse_MimeHeaders(m, &content_start, content_end)) + { + recurseable_mime_parser(partnum, + content_start, content_end, + CallBack, + PreMultiPartCallBack, + PostMultiPartCallBack, + userdata, + dont_decode, + m); + } + free(m); +} /* * Entry point for the MIME parser. @@ -892,8 +1057,8 @@ int LoadIconDir(const char *DirName) { char *MinorPtr; char *PStart; -#ifdef _DIRENT_HAVE_D_NAMELEN - d_namelen = filedir_entry->d_namelen; +#ifdef _DIRENT_HAVE_D_NAMLEN + d_namelen = filedir_entry->d_namlen; #else d_namelen = strlen(filedir_entry->d_name); #endif