more f*n places for d_nam<e>len
[citadel.git] / libcitadel / lib / mime_parser.c
index b7249f79ec215b941643dc1f0abde569ce282dc8..1fc037d43ffd77462434eb93f645001f4dbb251a 100644 (file)
@@ -2,8 +2,20 @@
  * This is the MIME parser for Citadel.
  *
  * Copyright (c) 1998-2010 by the citadel.org development team.
- * This code is distributed under the GNU General Public License v3.
  *
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <stdlib.h>
@@ -123,7 +135,7 @@ char *fixed_partnum(char *supplied_partnum) {
 
 static inline unsigned int _decode_hex(const char *Source)
 {
-       int ret = '?';
+       unsigned int ret = '?';
        unsigned char LO_NIBBLE;
        unsigned char HI_NIBBLE;
 
@@ -166,7 +178,6 @@ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
                        }
                        else
                        {
-                               ch = 0;
                                ch = _decode_hex(&encoded[pos]);
                                pos += 2;
                                decoded[decoded_length++] = ch;
@@ -205,11 +216,13 @@ void mime_decode(char *partnum,
 
        /* Some encodings aren't really encodings */
        if (!strcasecmp(encoding, "7bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
        if (!strcasecmp(encoding, "8bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
        if (!strcasecmp(encoding, "binary"))
-               strcpy(encoding, "");
+               *encoding = '\0';
+       if (!strcasecmp(encoding, "ISO-8859-1"))
+               *encoding = '\0';
 
        /* If this part is not encoded, send as-is */
        if ( (strlen(encoding) == 0) || (dont_decode)) {
@@ -254,9 +267,20 @@ void mime_decode(char *partnum,
        }
 
        if (bytes_decoded > 0) if (CallBack != NULL) {
-               CallBack(name, filename, fixed_partnum(partnum),
-                       disposition, decoded,
-                       content_type, charset, bytes_decoded, "binary", id, userdata);
+                       char encoding_buf[SIZ];
+
+                       strcpy(encoding_buf, "binary");
+                       CallBack(name, 
+                                filename, 
+                                fixed_partnum(partnum),
+                                disposition, 
+                                decoded,
+                                content_type, 
+                                charset, 
+                                bytes_decoded, 
+                                encoding_buf, 
+                                id, 
+                                userdata);
        }
 
        free(decoded);
@@ -280,11 +304,11 @@ int mime_decode_now (char *part_start,
        *decoded = NULL;
        /* Some encodings aren't really encodings */
        if (!strcasecmp(encoding, "7bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
        if (!strcasecmp(encoding, "8bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
        if (!strcasecmp(encoding, "binary"))
-               strcpy(encoding, "");
+               *encoding = '\0';
 
        /* If this part is not encoded, send as-is */
        if (strlen(encoding) == 0) {
@@ -346,40 +370,45 @@ typedef struct _interesting_mime_headers {
        long is_multipart;
 } interesting_mime_headers;
 
-interesting_mime_headers *InitInterestingMimes(void)
+
+static void FlushInterestingMimes(interesting_mime_headers *m)
 {
        int i;
-       interesting_mime_headers *m;
-       m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
        
        for (i = 0; i < eMax; i++) {
             m->b[i].Key[0] = '\0';
             m->b[i].len = 0;
        }
        m->content_length = -1;
-       return m;
 }
+static interesting_mime_headers *InitInterestingMimes(void)
+{
+       interesting_mime_headers *m;
+       m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
 
+       FlushInterestingMimes(m);
 
+       return m;
+}
 
-long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *content_end)
+
+static long parse_MimeHeaders(interesting_mime_headers *m, 
+                             char** pcontent_start, 
+                             char *content_end)
 {
        char buf[SIZ];
        char header[SIZ];
        long headerlen;
-       char *ptr;
-       int buflen;
+       char *ptr, *pch;
+       int buflen = 0;
        int i;
 
        /* Learn interesting things from the headers */
-       ptr = content_start;
+       ptr = *pcontent_start;
        *header = '\0';
        headerlen = 0;
        do {
                ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
-               if (ptr >= content_end) {
-                       return -1;
-               }
 
                for (i = 0; i < buflen; ++i) {
                        if (isspace(buf[i])) {
@@ -387,9 +416,10 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
                        }
                }
 
-               if (!isspace(buf[0])) {
+               if (!isspace(buf[0]) && (headerlen > 0)) {
                        if (!strncasecmp(header, "Content-type:", 13)) {
                                memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
+                               m->b[content_type].Key[headerlen - 12] = '\0';
                                m->b[content_type].len = striplt (m->b[content_type].Key);
 
                                m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
@@ -397,32 +427,44 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
                                m->b[boundary].len          = extract_key(m->b[boundary].Key,          header,       headerlen,  HKEY("boundary"), '=');
 
                                /* Deal with weird headers */
-                               if (strchr(m->b[content_type].Key, ' '))
-                                       *(strchr(m->b[content_type].Key, ' ')) = '\0';
-                               if (strchr(m->b[content_type].Key, ';'))
-                                       *(strchr(m->b[content_type].Key, ';')) = '\0';
+                               pch = strchr(m->b[content_type].Key, ' ');
+                               if (pch != NULL) {
+                                       *pch = '\0';
+                                       m->b[content_type].len = m->b[content_type].Key - pch;
+                               }
+                               pch = strchr(m->b[content_type].Key, ';');
+                               if (pch != NULL) {
+                                       *pch = '\0';
+                                       m->b[content_type].len = m->b[content_type].Key - pch;
+                               }
                        }
                        else if (!strncasecmp(header, "Content-Disposition:", 20)) {
                                memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
+                               m->b[disposition].Key[headerlen - 19] = '\0';
                                m->b[disposition].len = striplt(m->b[disposition].Key);
 
                                m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
                                m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
+                               pch = strchr(m->b[disposition].Key, ';');
+                               if (pch != NULL) *pch = '\0';
+                               m->b[disposition].len = striplt(m->b[disposition].Key);
                        }
                        else if (!strncasecmp(header, "Content-ID:", 11)) {
-                               memcpy(m->b[id].Key, &header[11], headerlen);
+                               memcpy(m->b[id].Key, &header[11], headerlen - 11);
+                               m->b[id].Key[headerlen - 11] = '\0';
                                striplt(m->b[id].Key);
                                m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
                        }
                        else if (!strncasecmp(header, "Content-length: ", 15)) {
                                char *clbuf;
                                clbuf = &header[15];
-                               while (isspace(clbuf))
+                               while (isspace(*clbuf))
                                        clbuf ++;
                                m->content_length = (size_t) atol(clbuf);
                        }
                        else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
                                memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
+                               m->b[encoding].Key[headerlen - 26] = '\0';
                                m->b[encoding].len = striplt(m->b[encoding].Key);
                        }
                        *header = '\0';
@@ -433,66 +475,135 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
                        headerlen += buflen;
                        header[headerlen] = '\0';
                }
+               if (ptr >= content_end) {
+                       return -1;
+               }
        } while ((!IsEmptyStr(buf)) && (*ptr != 0));
 
-       ptr = strchr(m->b[disposition].Key, ';');
-       if (ptr != NULL) *ptr = '\0';
-       m->b[disposition].len = striplt(m->b[disposition].Key);
+       m->is_multipart = m->b[boundary].len != 0;
+       *pcontent_start = ptr;
 
-       ptr = strchr(m->b[content_type].Key, ';');
-       if (ptr != NULL) *ptr = '\0';
-       m->b[content_type].len = striplt(m->b[content_type].Key);
+       return 0;
+}
 
-       m->is_multipart = m->b[boundary].len != 0;
+
+static int IsAsciiEncoding(interesting_mime_headers *m)
+{
+
+       if ((m->b[encoding].len != 0) &&
+           (strcasecmp(m->b[encoding].Key, "base64") == 0))
+               return 1;
+       if ((m->b[encoding].len != 0) &&
+           (strcmp(m->b[encoding].Key, "quoted-printable") == 0))
+               return 1;
 
        return 0;
 }
 
+static char *FindNextContent(char *ptr,
+                            char *content_end,
+                            interesting_mime_headers *SubMimeHeaders,
+                            interesting_mime_headers *m)
+{
+       char *next_boundary;
+       char  tmp;
+
+       if (IsAsciiEncoding(SubMimeHeaders)) {
+               tmp = *content_end;
+               *content_end = '\0';
+
+               /** 
+                * ok, if we have a content length of the mime part, 
+                * try skipping the content on the search for the next
+                * boundary. since we don't trust the content_length
+                * to be all accurate, and suspect it to lose one digit 
+                * per line with a line length of 80 chars, we need 
+                * to start searching a little before..
+                */
+                                  
+               if ((SubMimeHeaders->content_length != -1) &&
+                   (SubMimeHeaders->content_length > 10))
+               {
+                       char *pptr;
+                       long lines;
+                                       
+                       lines = SubMimeHeaders->content_length / 80;
+                       pptr = ptr + SubMimeHeaders->content_length - lines - 10;
+                       if (pptr < content_end)
+                               ptr = pptr;
+               }
+                       
+               next_boundary = strstr(ptr, m->b[startary].Key);
+               *content_end = tmp;
+       }
+       else {
+               char *srch;
+               /** 
+                * ok, if we have a content length of the mime part, 
+                * try skipping the content on the search for the next
+                * boundary. since we don't trust the content_length
+                * to be all accurate, start searching a little before..
+                */
+                                  
+               if ((SubMimeHeaders->content_length != -1) &&
+                   (SubMimeHeaders->content_length > 10))
+               {
+                       char *pptr;
+                       pptr = ptr + SubMimeHeaders->content_length - 10;
+                       if (pptr < content_end)
+                               ptr = pptr;
+               }
+               
+
+               srch = next_boundary = NULL;
+               for (srch = memchr(ptr, '-',  content_end - ptr);
+                    (srch != NULL) && (srch < content_end); 
+                    srch = memchr(srch, '-',  content_end - srch)) 
+               {
+                       if (!memcmp(srch, 
+                                   m->b[startary].Key, 
+                                   m->b[startary].len)) 
+                       {
+                               next_boundary = srch;
+                               srch = content_end;
+                       }
+                       else srch ++;
+
+               }
+
+       }
+       return next_boundary;
+}
+
 /*
  * Break out the components of a multipart message
  * (This function expects to be fed HEADERS + CONTENT)
  * Note: NULL can be supplied as content_end; in this case, the message is
  * considered to have ended when the parser encounters a 0x00 byte.
  */
-void the_mime_parser(char *partnum,
-                    char *content_start, char *content_end,
-                    MimeParserCallBackType CallBack,
-                    MimeParserCallBackType PreMultiPartCallBack,
-                    MimeParserCallBackType PostMultiPartCallBack,
-                    void *userdata,
-                    int dont_decode)
+static void recurseable_mime_parser(char *partnum,
+                                   char *content_start, char *content_end,
+                                   MimeParserCallBackType CallBack,
+                                   MimeParserCallBackType PreMultiPartCallBack,
+                                   MimeParserCallBackType PostMultiPartCallBack,
+                                   void *userdata,
+                                   int dont_decode, 
+                                   interesting_mime_headers *m)
 {
-
-       char *ptr;
-       char *part_start, *part_end = NULL;
-       char *next_boundary;
-       
-       size_t content_length;
-       int part_seq = 0;
-       size_t length;
-       char nested_partnum[256];
-       int crlf_in_use = 0;
-       char *evaluate_crlf_ptr = NULL;
-       
-       interesting_mime_headers *m;
-       CBufStr *chosen_name;
-
-       ptr = content_start;
-       content_length = 0;
-
-       m = InitInterestingMimes();
-
-
-       /* If the caller didn't supply an endpointer, generate one by measure */
-       if (content_end == NULL) {
-               content_end = &content_start[strlen(content_start)];
-       }
+       interesting_mime_headers *SubMimeHeaders;
+       char     *ptr;
+       char     *part_start;
+       char     *part_end = NULL;
+       char     *evaluate_crlf_ptr = NULL;
+       char     *next_boundary;
+       char      nested_partnum[256];
+       int       crlf_in_use = 0;
+       int       part_seq = 0;
+       CBufStr  *chosen_name;
 
 
-       if (parse_MimeHeaders(m, content_start, content_end) != 0)
-               goto end_parser;
-       
        /* If this is a multipart message, then recursively process it */
+       ptr = content_start;
        part_start = NULL;
        if (m->is_multipart) {
 
@@ -513,16 +624,38 @@ void the_mime_parser(char *partnum,
 
                /* Figure out where the boundaries are */
                m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
+               SubMimeHeaders = InitInterestingMimes ();
+
+               while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
+
+               if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0)
+                       ptr += m->b[startary].len;
+
+               while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
 
                part_start = NULL;
                do {
-                       char tmp;
-
-                       tmp = *content_end;
-                       *content_end = '\0';
+                       char *optr;
+
+                       optr = ptr;
+                       if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
+                               break;
+                       if ((ptr - optr > 2) && 
+                           (*(ptr - 2) == '\r'))
+                               crlf_in_use = 1;
+                       
+                       part_start = ptr;
                        
-                       next_boundary = strstr(ptr, m->b[startary].Key);
-                       *content_end = tmp;
+                       next_boundary = FindNextContent(ptr,
+                                                       content_end,
+                                                       SubMimeHeaders,
+                                                       m);
+                       if ((next_boundary != NULL) && 
+                           (next_boundary - part_start < 3)) {
+                               FlushInterestingMimes(SubMimeHeaders);
+
+                               continue;
+                       }
 
                        if ( (part_start != NULL) && (next_boundary != NULL) ) {
                                part_end = next_boundary;
@@ -542,21 +675,22 @@ void the_mime_parser(char *partnum,
                                                 sizeof nested_partnum,
                                                 "%d", ++part_seq);
                                }
-                               the_mime_parser(nested_partnum,
-                                               part_start, 
-                                               part_end,
-                                               CallBack,
-                                               PreMultiPartCallBack,
-                                               PostMultiPartCallBack,
-                                               userdata,
-                                               dont_decode);
+                               recurseable_mime_parser(nested_partnum,
+                                                       part_start, 
+                                                       part_end,
+                                                       CallBack,
+                                                       PreMultiPartCallBack,
+                                                       PostMultiPartCallBack,
+                                                       userdata,
+                                                       dont_decode, 
+                                                       SubMimeHeaders);
                        }
 
                        if (next_boundary != NULL) {
                                /* If we pass out of scope, don't attempt to
                                 * read past the end boundary. */
-                               if ((*(next_boundary + m->b[startary].len + 1) == '-') && 
-                                   (*(next_boundary + m->b[startary].len + 2) == '-') ){
+                               if ((*(next_boundary + m->b[startary].len) == '-') && 
+                                   (*(next_boundary + m->b[startary].len + 1) == '-') ){
                                        ptr = content_end;
                                }
                                else {
@@ -584,8 +718,11 @@ void the_mime_parser(char *partnum,
                                /* Invalid end of multipart.  Bail out! */
                                ptr = content_end;
                        }
+                       FlushInterestingMimes(SubMimeHeaders);
                } while ( (ptr < content_end) && (next_boundary != NULL) );
 
+               free(SubMimeHeaders);
+
                if (PostMultiPartCallBack != NULL) {
                        PostMultiPartCallBack("", 
                                              "", 
@@ -599,18 +736,12 @@ void the_mime_parser(char *partnum,
                                              m->b[id].Key, 
                                              userdata);
                }
-               goto end_parser;
-       }
-
-       /* If it's not a multipart message, then do something with it */
-       if (!m->is_multipart) {
+       } /* If it's not a multipart message, then do something with it */
+       else {
+               size_t length;
                part_start = ptr;
-               length = 0;
-               while (ptr < content_end) {
-                       ++ptr;
-                       ++length;
-               }
-               part_end = content_end;
+               length = content_end - part_start;
+               ptr = part_end = content_end;
 
 
                /* The following code will truncate the MIME part to the size
@@ -710,11 +841,45 @@ void the_mime_parser(char *partnum,
 
        }
 
-end_parser:    /* free the buffers!  end the oppression!! */
-       free(m);
 }
 
+/*
+ * Break out the components of a multipart message
+ * (This function expects to be fed HEADERS + CONTENT)
+ * Note: NULL can be supplied as content_end; in this case, the message is
+ * considered to have ended when the parser encounters a 0x00 byte.
+ */
+void the_mime_parser(char *partnum,
+                    char *content_start, char *content_end,
+                    MimeParserCallBackType CallBack,
+                    MimeParserCallBackType PreMultiPartCallBack,
+                    MimeParserCallBackType PostMultiPartCallBack,
+                    void *userdata,
+                    int dont_decode)
+{
+       interesting_mime_headers *m;
+
+       /* If the caller didn't supply an endpointer, generate one by measure */
+       if (content_end == NULL) {
+               content_end = &content_start[strlen(content_start)];
+       }
+
+       m = InitInterestingMimes();
+
+       if (!parse_MimeHeaders(m, &content_start, content_end))
+       {
 
+               recurseable_mime_parser(partnum,
+                                       content_start, content_end,
+                                       CallBack,
+                                       PreMultiPartCallBack,
+                                       PostMultiPartCallBack,
+                                       userdata,
+                                       dont_decode,
+                                       m);
+       }
+       free(m);
+}
 
 /*
  * Entry point for the MIME parser.
@@ -892,8 +1057,8 @@ int LoadIconDir(const char *DirName)
        {
                char *MinorPtr;
                char *PStart;
-#ifdef _DIRENT_HAVE_D_NAMELEN
-               d_namelen = filedir_entry->d_namelen;
+#ifdef _DIRENT_HAVE_D_NAMLEN
+               d_namelen = filedir_entry->d_namlen;
 #else
                d_namelen = strlen(filedir_entry->d_name);
 #endif