more f*n places for d_nam<e>len

[citadel.git] / libcitadel / lib / mime_parser.c
diff --git a/libcitadel/lib/mime_parser.c b/libcitadel/lib/mime_parser.c

index d71df777575378f14576835d81a43a29364f5e96..1fc037d43ffd77462434eb93f645001f4dbb251a 100644 (file)
--- a/libcitadel/lib/mime_parser.c
+++ b/libcitadel/lib/mime_parser.c
@@ -2,8 +2,20 @@
   * This is the MIME parser for Citadel.
   *
   * Copyright (c) 1998-2010 by the citadel.org development team.
- * This code is distributed under the GNU General Public License v3.
   *
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
   */
  
  #include <stdlib.h>
@@ -123,7 +135,7 @@ char *fixed_partnum(char *supplied_partnum) {
  
  static inline unsigned int _decode_hex(const char *Source)
  {
-       int ret = '?';
+       unsigned int ret = '?';
         unsigned char LO_NIBBLE;
         unsigned char HI_NIBBLE;
  
@@ -166,7 +178,6 @@ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
                         }
                         else
                         {
-                               ch = 0;
                                 ch = _decode_hex(&encoded[pos]);
                                 pos += 2;
                                 decoded[decoded_length++] = ch;
@@ -205,11 +216,13 @@ void mime_decode(char *partnum,
  
         /* Some encodings aren't really encodings */
         if (!strcasecmp(encoding, "7bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
         if (!strcasecmp(encoding, "8bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
         if (!strcasecmp(encoding, "binary"))
-               strcpy(encoding, "");
+               *encoding = '\0';
+       if (!strcasecmp(encoding, "ISO-8859-1"))
+               *encoding = '\0';
  
         /* If this part is not encoded, send as-is */
         if ( (strlen(encoding) == 0) || (dont_decode)) {
@@ -254,9 +267,20 @@ void mime_decode(char *partnum,
         }
  
         if (bytes_decoded > 0) if (CallBack != NULL) {
-               CallBack(name, filename, fixed_partnum(partnum),
-                       disposition, decoded,
-                       content_type, charset, bytes_decoded, "binary", id, userdata);
+                       char encoding_buf[SIZ];
+
+                       strcpy(encoding_buf, "binary");
+                       CallBack(name, 
+                                filename, 
+                                fixed_partnum(partnum),
+                                disposition, 
+                                decoded,
+                                content_type, 
+                                charset, 
+                                bytes_decoded, 
+                                encoding_buf, 
+                                id, 
+                                userdata);
         }
  
         free(decoded);
@@ -280,11 +304,11 @@ int mime_decode_now (char *part_start,
         *decoded = NULL;
         /* Some encodings aren't really encodings */
         if (!strcasecmp(encoding, "7bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
         if (!strcasecmp(encoding, "8bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
         if (!strcasecmp(encoding, "binary"))
-               strcpy(encoding, "");
+               *encoding = '\0';
  
         /* If this part is not encoded, send as-is */
         if (strlen(encoding) == 0) {
@@ -346,40 +370,45 @@ typedef struct _interesting_mime_headers {
         long is_multipart;
  } interesting_mime_headers;
  
-interesting_mime_headers *InitInterestingMimes(void)
+
+static void FlushInterestingMimes(interesting_mime_headers *m)
  {
         int i;
-       interesting_mime_headers *m;
-       m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
         
         for (i = 0; i < eMax; i++) {
              m->b[i].Key[0] = '\0';
              m->b[i].len = 0;
         }
         m->content_length = -1;
-       return m;
  }
+static interesting_mime_headers *InitInterestingMimes(void)
+{
+       interesting_mime_headers *m;
+       m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
  
+       FlushInterestingMimes(m);
  
+       return m;
+}
  
-long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *content_end)
+
+static long parse_MimeHeaders(interesting_mime_headers *m, 
+                             char** pcontent_start, 
+                             char *content_end)
  {
         char buf[SIZ];
         char header[SIZ];
         long headerlen;
-       char *ptr;
-       int buflen;
+       char *ptr, *pch;
+       int buflen = 0;
         int i;
  
         /* Learn interesting things from the headers */
-       ptr = content_start;
+       ptr = *pcontent_start;
         *header = '\0';
         headerlen = 0;
         do {
                 ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
-               if (ptr >= content_end) {
-                       return -1;
-               }
  
                 for (i = 0; i < buflen; ++i) {
                         if (isspace(buf[i])) {
@@ -387,9 +416,10 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
                         }
                 }
  
-               if (!isspace(buf[0])) {
+               if (!isspace(buf[0]) && (headerlen > 0)) {
                         if (!strncasecmp(header, "Content-type:", 13)) {
                                 memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
+                               m->b[content_type].Key[headerlen - 12] = '\0';
                                 m->b[content_type].len = striplt (m->b[content_type].Key);
  
                                 m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
@@ -397,20 +427,31 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
                                 m->b[boundary].len          = extract_key(m->b[boundary].Key,          header,       headerlen,  HKEY("boundary"), '=');
  
                                 /* Deal with weird headers */
-                               if (strchr(m->b[content_type].Key, ' '))
-                                       *(strchr(m->b[content_type].Key, ' ')) = '\0';
-                               if (strchr(m->b[content_type].Key, ';'))
-                                       *(strchr(m->b[content_type].Key, ';')) = '\0';
+                               pch = strchr(m->b[content_type].Key, ' ');
+                               if (pch != NULL) {
+                                       *pch = '\0';
+                                       m->b[content_type].len = m->b[content_type].Key - pch;
+                               }
+                               pch = strchr(m->b[content_type].Key, ';');
+                               if (pch != NULL) {
+                                       *pch = '\0';
+                                       m->b[content_type].len = m->b[content_type].Key - pch;
+                               }
                         }
                         else if (!strncasecmp(header, "Content-Disposition:", 20)) {
                                 memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
+                               m->b[disposition].Key[headerlen - 19] = '\0';
                                 m->b[disposition].len = striplt(m->b[disposition].Key);
  
                                 m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
                                 m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
+                               pch = strchr(m->b[disposition].Key, ';');
+                               if (pch != NULL) *pch = '\0';
+                               m->b[disposition].len = striplt(m->b[disposition].Key);
                         }
                         else if (!strncasecmp(header, "Content-ID:", 11)) {
-                               memcpy(m->b[id].Key, &header[11], headerlen);
+                               memcpy(m->b[id].Key, &header[11], headerlen - 11);
+                               m->b[id].Key[headerlen - 11] = '\0';
                                 striplt(m->b[id].Key);
                                 m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
                         }
@@ -423,6 +464,7 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
                         }
                         else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
                                 memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
+                               m->b[encoding].Key[headerlen - 26] = '\0';
                                 m->b[encoding].len = striplt(m->b[encoding].Key);
                         }
                         *header = '\0';
@@ -433,66 +475,135 @@ long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *c
                         headerlen += buflen;
                         header[headerlen] = '\0';
                 }
+               if (ptr >= content_end) {
+                       return -1;
+               }
         } while ((!IsEmptyStr(buf)) && (*ptr != 0));
  
-       ptr = strchr(m->b[disposition].Key, ';');
-       if (ptr != NULL) *ptr = '\0';
-       m->b[disposition].len = striplt(m->b[disposition].Key);
+       m->is_multipart = m->b[boundary].len != 0;
+       *pcontent_start = ptr;
  
-       ptr = strchr(m->b[content_type].Key, ';');
-       if (ptr != NULL) *ptr = '\0';
-       m->b[content_type].len = striplt(m->b[content_type].Key);
+       return 0;
+}
  
-       m->is_multipart = m->b[boundary].len != 0;
+
+static int IsAsciiEncoding(interesting_mime_headers *m)
+{
+
+       if ((m->b[encoding].len != 0) &&
+           (strcasecmp(m->b[encoding].Key, "base64") == 0))
+               return 1;
+       if ((m->b[encoding].len != 0) &&
+           (strcmp(m->b[encoding].Key, "quoted-printable") == 0))
+               return 1;
  
         return 0;
  }
  
+static char *FindNextContent(char *ptr,
+                            char *content_end,
+                            interesting_mime_headers *SubMimeHeaders,
+                            interesting_mime_headers *m)
+{
+       char *next_boundary;
+       char  tmp;
+
+       if (IsAsciiEncoding(SubMimeHeaders)) {
+               tmp = *content_end;
+               *content_end = '\0';
+
+               /** 
+                * ok, if we have a content length of the mime part, 
+                * try skipping the content on the search for the next
+                * boundary. since we don't trust the content_length
+                * to be all accurate, and suspect it to lose one digit 
+                * per line with a line length of 80 chars, we need 
+                * to start searching a little before..
+                */
+                                  
+               if ((SubMimeHeaders->content_length != -1) &&
+                   (SubMimeHeaders->content_length > 10))
+               {
+                       char *pptr;
+                       long lines;
+                                       
+                       lines = SubMimeHeaders->content_length / 80;
+                       pptr = ptr + SubMimeHeaders->content_length - lines - 10;
+                       if (pptr < content_end)
+                               ptr = pptr;
+               }
+                       
+               next_boundary = strstr(ptr, m->b[startary].Key);
+               *content_end = tmp;
+       }
+       else {
+               char *srch;
+               /** 
+                * ok, if we have a content length of the mime part, 
+                * try skipping the content on the search for the next
+                * boundary. since we don't trust the content_length
+                * to be all accurate, start searching a little before..
+                */
+                                  
+               if ((SubMimeHeaders->content_length != -1) &&
+                   (SubMimeHeaders->content_length > 10))
+               {
+                       char *pptr;
+                       pptr = ptr + SubMimeHeaders->content_length - 10;
+                       if (pptr < content_end)
+                               ptr = pptr;
+               }
+               
+
+               srch = next_boundary = NULL;
+               for (srch = memchr(ptr, '-',  content_end - ptr);
+                    (srch != NULL) && (srch < content_end); 
+                    srch = memchr(srch, '-',  content_end - srch)) 
+               {
+                       if (!memcmp(srch, 
+                                   m->b[startary].Key, 
+                                   m->b[startary].len)) 
+                       {
+                               next_boundary = srch;
+                               srch = content_end;
+                       }
+                       else srch ++;
+
+               }
+
+       }
+       return next_boundary;
+}
+
  /*
   * Break out the components of a multipart message
   * (This function expects to be fed HEADERS + CONTENT)
   * Note: NULL can be supplied as content_end; in this case, the message is
   * considered to have ended when the parser encounters a 0x00 byte.
   */
-void the_mime_parser(char *partnum,
-                    char *content_start, char *content_end,
-                    MimeParserCallBackType CallBack,
-                    MimeParserCallBackType PreMultiPartCallBack,
-                    MimeParserCallBackType PostMultiPartCallBack,
-                    void *userdata,
-                    int dont_decode)
+static void recurseable_mime_parser(char *partnum,
+                                   char *content_start, char *content_end,
+                                   MimeParserCallBackType CallBack,
+                                   MimeParserCallBackType PreMultiPartCallBack,
+                                   MimeParserCallBackType PostMultiPartCallBack,
+                                   void *userdata,
+                                   int dont_decode, 
+                                   interesting_mime_headers *m)
  {
-
-       char *ptr;
-       char *part_start, *part_end = NULL;
-       char *next_boundary;
-       
-       size_t content_length;
-       int part_seq = 0;
-       size_t length;
-       char nested_partnum[256];
-       int crlf_in_use = 0;
-       char *evaluate_crlf_ptr = NULL;
-       
-       interesting_mime_headers *m;
-       CBufStr *chosen_name;
-
-       ptr = content_start;
-       content_length = 0;
-
-       m = InitInterestingMimes();
-
-
-       /* If the caller didn't supply an endpointer, generate one by measure */
-       if (content_end == NULL) {
-               content_end = &content_start[strlen(content_start)];
-       }
+       interesting_mime_headers *SubMimeHeaders;
+       char     *ptr;
+       char     *part_start;
+       char     *part_end = NULL;
+       char     *evaluate_crlf_ptr = NULL;
+       char     *next_boundary;
+       char      nested_partnum[256];
+       int       crlf_in_use = 0;
+       int       part_seq = 0;
+       CBufStr  *chosen_name;
  
  
-       if (parse_MimeHeaders(m, content_start, content_end) != 0)
-               goto end_parser;
-       
         /* If this is a multipart message, then recursively process it */
+       ptr = content_start;
         part_start = NULL;
         if (m->is_multipart) {
  
@@ -513,16 +624,38 @@ void the_mime_parser(char *partnum,
  
                 /* Figure out where the boundaries are */
                 m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
+               SubMimeHeaders = InitInterestingMimes ();
+
+               while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
+
+               if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0)
+                       ptr += m->b[startary].len;
+
+               while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
  
                 part_start = NULL;
                 do {
-                       char tmp;
-
-                       tmp = *content_end;
-                       *content_end = '\0';
+                       char *optr;
+
+                       optr = ptr;
+                       if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
+                               break;
+                       if ((ptr - optr > 2) && 
+                           (*(ptr - 2) == '\r'))
+                               crlf_in_use = 1;
+                       
+                       part_start = ptr;
                         
-                       next_boundary = strstr(ptr, m->b[startary].Key);
-                       *content_end = tmp;
+                       next_boundary = FindNextContent(ptr,
+                                                       content_end,
+                                                       SubMimeHeaders,
+                                                       m);
+                       if ((next_boundary != NULL) && 
+                           (next_boundary - part_start < 3)) {
+                               FlushInterestingMimes(SubMimeHeaders);
+
+                               continue;
+                       }
  
                         if ( (part_start != NULL) && (next_boundary != NULL) ) {
                                 part_end = next_boundary;
@@ -542,21 +675,22 @@ void the_mime_parser(char *partnum,
                                                  sizeof nested_partnum,
                                                  "%d", ++part_seq);
                                 }
-                               the_mime_parser(nested_partnum,
-                                               part_start, 
-                                               part_end,
-                                               CallBack,
-                                               PreMultiPartCallBack,
-                                               PostMultiPartCallBack,
-                                               userdata,
-                                               dont_decode);
+                               recurseable_mime_parser(nested_partnum,
+                                                       part_start, 
+                                                       part_end,
+                                                       CallBack,
+                                                       PreMultiPartCallBack,
+                                                       PostMultiPartCallBack,
+                                                       userdata,
+                                                       dont_decode, 
+                                                       SubMimeHeaders);
                         }
  
                         if (next_boundary != NULL) {
                                 /* If we pass out of scope, don't attempt to
                                  * read past the end boundary. */
-                               if ((*(next_boundary + m->b[startary].len + 1) == '-') && 
-                                   (*(next_boundary + m->b[startary].len + 2) == '-') ){
+                               if ((*(next_boundary + m->b[startary].len) == '-') && 
+                                   (*(next_boundary + m->b[startary].len + 1) == '-') ){
                                         ptr = content_end;
                                 }
                                 else {
@@ -584,8 +718,11 @@ void the_mime_parser(char *partnum,
                                 /* Invalid end of multipart.  Bail out! */
                                 ptr = content_end;
                         }
+                       FlushInterestingMimes(SubMimeHeaders);
                 } while ( (ptr < content_end) && (next_boundary != NULL) );
  
+               free(SubMimeHeaders);
+
                 if (PostMultiPartCallBack != NULL) {
                         PostMultiPartCallBack("", 
                                               "", 
@@ -599,18 +736,12 @@ void the_mime_parser(char *partnum,
                                               m->b[id].Key, 
                                               userdata);
                 }
-               goto end_parser;
-       }
-
-       /* If it's not a multipart message, then do something with it */
-       if (!m->is_multipart) {
+       } /* If it's not a multipart message, then do something with it */
+       else {
+               size_t length;
                 part_start = ptr;
-               length = 0;
-               while (ptr < content_end) {
-                       ++ptr;
-                       ++length;
-               }
-               part_end = content_end;
+               length = content_end - part_start;
+               ptr = part_end = content_end;
  
  
                 /* The following code will truncate the MIME part to the size
@@ -710,11 +841,45 @@ void the_mime_parser(char *partnum,
  
         }
  
-end_parser:    /* free the buffers!  end the oppression!! */
-       free(m);
  }
  
+/*
+ * Break out the components of a multipart message
+ * (This function expects to be fed HEADERS + CONTENT)
+ * Note: NULL can be supplied as content_end; in this case, the message is
+ * considered to have ended when the parser encounters a 0x00 byte.
+ */
+void the_mime_parser(char *partnum,
+                    char *content_start, char *content_end,
+                    MimeParserCallBackType CallBack,
+                    MimeParserCallBackType PreMultiPartCallBack,
+                    MimeParserCallBackType PostMultiPartCallBack,
+                    void *userdata,
+                    int dont_decode)
+{
+       interesting_mime_headers *m;
+
+       /* If the caller didn't supply an endpointer, generate one by measure */
+       if (content_end == NULL) {
+               content_end = &content_start[strlen(content_start)];
+       }
+
+       m = InitInterestingMimes();
+
+       if (!parse_MimeHeaders(m, &content_start, content_end))
+       {
  
+               recurseable_mime_parser(partnum,
+                                       content_start, content_end,
+                                       CallBack,
+                                       PreMultiPartCallBack,
+                                       PostMultiPartCallBack,
+                                       userdata,
+                                       dont_decode,
+                                       m);
+       }
+       free(m);
+}
  
  /*
   * Entry point for the MIME parser.
@@ -892,8 +1057,8 @@ int LoadIconDir(const char *DirName)
         {
                 char *MinorPtr;
                 char *PStart;
-#ifdef _DIRENT_HAVE_D_NAMELEN
-               d_namelen = filedir_entry->d_namelen;
+#ifdef _DIRENT_HAVE_D_NAMLEN
+               d_namelen = filedir_entry->d_namlen;
  #else
                 d_namelen = strlen(filedir_entry->d_name);
  #endif