utf8ify_rfc822_string() is in libcitadel now
[citadel.git] / libcitadel / lib / mime_parser.c
index 56336ba6afe445421a1db9b3cb37c9a47780d7b2..318c23fdebc30a1b96446bac2994764e44e78158 100644 (file)
@@ -1,10 +1,9 @@
-/*
- * This is the MIME parser for Citadel.
- *
- * Copyright (c) 1998-2010 by the citadel.org development team.
- * This code is distributed under the GNU General Public License v3.
- *
- */
+// This is the MIME parser for Citadel.
+//
+// Copyright (c) 1998-2022 by the citadel.org development team.
+//
+// This program is open source software.  Use, duplication, or disclosure
+// is subject to the terms of the GNU General Public License, version 3.
 
 #include <stdlib.h>
 #include <unistd.h>
@@ -52,29 +51,23 @@ const unsigned char FromHexTable [256] = {
 };
 
 
-long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd)
-{
+long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd) {
        char *sptr, *ptr = NULL;
        int double_quotes = 0;
        long RealKeyLen = keylen;
 
        sptr = source;
 
-       while (sptr != NULL)
-       {
-               ptr = bmstrcasestr_len(sptr, sourcelen - (sptr - source), 
-                                      key, keylen);
-               if(ptr != NULL)
-               {
+       while (sptr != NULL) {
+               ptr = bmstrcasestr_len(sptr, sourcelen - (sptr - source), key, keylen);
+               if (ptr != NULL) {
                        while (isspace(*(ptr + RealKeyLen)))
                                RealKeyLen ++;
-                       if (*(ptr + RealKeyLen) == KeyEnd)
-                       {
+                       if (*(ptr + RealKeyLen) == KeyEnd) {
                                sptr = NULL;
                                RealKeyLen ++;                          
                        }
-                       else
-                       {
+                       else {
                                sptr = ptr + RealKeyLen + 1;
                        }
                }
@@ -121,9 +114,8 @@ char *fixed_partnum(char *supplied_partnum) {
 }
 
 
-static inline unsigned int _decode_hex(const char *Source)
-{
-       int ret = '?';
+static inline unsigned int _decode_hex(const char *Source) {
+       unsigned int ret = '?';
        unsigned char LO_NIBBLE;
        unsigned char HI_NIBBLE;
 
@@ -149,31 +141,24 @@ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
        int decoded_length = 0;
        int pos = 0;
 
-       while (pos < sourcelen)
-       {
-               if (*(encoded + pos) == '=')
-               {
+       while (pos < sourcelen) {
+               if (*(encoded + pos) == '=') {
                        pos ++;
-                       if (*(encoded + pos) == '\n')
-                       {
+                       if (*(encoded + pos) == '\n') {
                                pos ++;
                        }
-                       else if (*(encoded + pos) == '\r')
-                       {
+                       else if (*(encoded + pos) == '\r') {
                                pos ++;
                                if (*(encoded + pos) == '\n')
                                        pos++;
                        }
-                       else
-                       {
-                               ch = 0;
+                       else {
                                ch = _decode_hex(&encoded[pos]);
                                pos += 2;
                                decoded[decoded_length++] = ch;
                        }
                }
-               else
-               {
+               else {
                        decoded[decoded_length++] = encoded[pos];
                        pos += 1;
                }
@@ -197,21 +182,22 @@ void mime_decode(char *partnum,
                 MimeParserCallBackType PreMultiPartCallBack,
                 MimeParserCallBackType PostMultiPartCallBack,
                 void *userdata,
-                int dont_decode)
-{
-
+                int dont_decode
+) {
        char *decoded;
        size_t bytes_decoded = 0;
 
-       /* Some encodings aren't really encodings */
+       // Some encodings aren't really encodings
        if (!strcasecmp(encoding, "7bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
        if (!strcasecmp(encoding, "8bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
        if (!strcasecmp(encoding, "binary"))
-               strcpy(encoding, "");
+               *encoding = '\0';
+       if (!strcasecmp(encoding, "ISO-8859-1"))
+               *encoding = '\0';
 
-       /* If this part is not encoded, send as-is */
+       // If this part is not encoded, send as-is
        if ( (strlen(encoding) == 0) || (dont_decode)) {
                if (CallBack != NULL) {
                        CallBack(name, 
@@ -229,18 +215,15 @@ void mime_decode(char *partnum,
                return;
        }
        
-       /* Fail silently if we hit an unknown encoding. */
-       if ((strcasecmp(encoding, "base64"))
-           && (strcasecmp(encoding, "quoted-printable"))) {
+       // Fail silently if we hit an unknown encoding.
+       if ((strcasecmp(encoding, "base64")) && (strcasecmp(encoding, "quoted-printable"))) {
                return;
        }
 
-       /*
-        * Allocate a buffer for the decoded data.  The output buffer is slightly
-        * larger than the input buffer; this assumes that the decoded data
-        * will never be significantly larger than the encoded data.  This is a
-        * safe assumption with base64, uuencode, and quoted-printable.
-        */
+       // Allocate a buffer for the decoded data.  The output buffer is slightly
+       // larger than the input buffer; this assumes that the decoded data
+       // will never be significantly larger than the encoded data.  This is a
+       // safe assumption with base64, uuencode, and quoted-printable.
        decoded = malloc(length + 32768);
        if (decoded == NULL) {
                return;
@@ -291,11 +274,11 @@ int mime_decode_now (char *part_start,
        *decoded = NULL;
        /* Some encodings aren't really encodings */
        if (!strcasecmp(encoding, "7bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
        if (!strcasecmp(encoding, "8bit"))
-               strcpy(encoding, "");
+               *encoding = '\0';
        if (!strcasecmp(encoding, "binary"))
-               strcpy(encoding, "");
+               *encoding = '\0';
 
        /* If this part is not encoded, send as-is */
        if (strlen(encoding) == 0) {
@@ -406,6 +389,7 @@ static long parse_MimeHeaders(interesting_mime_headers *m,
                if (!isspace(buf[0]) && (headerlen > 0)) {
                        if (!strncasecmp(header, "Content-type:", 13)) {
                                memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
+                               m->b[content_type].Key[headerlen - 12] = '\0';
                                m->b[content_type].len = striplt (m->b[content_type].Key);
 
                                m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
@@ -426,6 +410,7 @@ static long parse_MimeHeaders(interesting_mime_headers *m,
                        }
                        else if (!strncasecmp(header, "Content-Disposition:", 20)) {
                                memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
+                               m->b[disposition].Key[headerlen - 19] = '\0';
                                m->b[disposition].len = striplt(m->b[disposition].Key);
 
                                m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
@@ -435,7 +420,8 @@ static long parse_MimeHeaders(interesting_mime_headers *m,
                                m->b[disposition].len = striplt(m->b[disposition].Key);
                        }
                        else if (!strncasecmp(header, "Content-ID:", 11)) {
-                               memcpy(m->b[id].Key, &header[11], headerlen);
+                               memcpy(m->b[id].Key, &header[11], headerlen - 11);
+                               m->b[id].Key[headerlen - 11] = '\0';
                                striplt(m->b[id].Key);
                                m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
                        }
@@ -448,6 +434,7 @@ static long parse_MimeHeaders(interesting_mime_headers *m,
                        }
                        else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
                                memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
+                               m->b[encoding].Key[headerlen - 26] = '\0';
                                m->b[encoding].len = striplt(m->b[encoding].Key);
                        }
                        *header = '\0';
@@ -539,7 +526,7 @@ static char *FindNextContent(char *ptr,
                
 
                srch = next_boundary = NULL;
-               for (srch = memchr(ptr, '-',  content_end - srch);
+               for (srch = memchr(ptr, '-',  content_end - ptr);
                     (srch != NULL) && (srch < content_end); 
                     srch = memchr(srch, '-',  content_end - srch)) 
                {
@@ -608,16 +595,14 @@ static void recurseable_mime_parser(char *partnum,
                /* Figure out where the boundaries are */
                m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
                SubMimeHeaders = InitInterestingMimes ();
-               if (*ptr == '\r')
-                       ptr ++;
-               if (*ptr == '\n')
-                       ptr ++;
+
+               while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
+
                if (strncmp(ptr, m->b[startary].Key, m->b[startary].len) == 0)
                        ptr += m->b[startary].len;
-               if (*ptr == '\r')
-                       ptr ++;
-               if (*ptr == '\n')
-                       ptr ++;
+
+               while ((*ptr == '\r') || (*ptr == '\n')) ptr ++;
+
                part_start = NULL;
                do {
                        char *optr;
@@ -625,19 +610,17 @@ static void recurseable_mime_parser(char *partnum,
                        optr = ptr;
                        if (parse_MimeHeaders(SubMimeHeaders, &ptr, content_end) != 0)
                                break;
-                       if ((ptr - optr > 2) && 
-                           (*(ptr - 2) == '\r'))
+                       if ((ptr - optr > 2) && (*(ptr - 2) == '\r')) {
                                crlf_in_use = 1;
+                       }
                        
                        part_start = ptr;
                        
-                       next_boundary = FindNextContent(ptr,
-                                                       content_end,
-                                                       SubMimeHeaders,
-                                                       m);
-                       if ((next_boundary != NULL) && 
-                           (next_boundary - part_start < 3))
+                       next_boundary = FindNextContent(ptr, content_end, SubMimeHeaders, m);
+                       if ((next_boundary != NULL) && (next_boundary - part_start < 3)) {
+                               FlushInterestingMimes(SubMimeHeaders);
                                continue;
+                       }
 
                        if ( (part_start != NULL) && (next_boundary != NULL) ) {
                                part_end = next_boundary;
@@ -671,8 +654,8 @@ static void recurseable_mime_parser(char *partnum,
                        if (next_boundary != NULL) {
                                /* If we pass out of scope, don't attempt to
                                 * read past the end boundary. */
-                               if ((*(next_boundary + m->b[startary].len + 1) == '-') && 
-                                   (*(next_boundary + m->b[startary].len + 2) == '-') ){
+                               if ((*(next_boundary + m->b[startary].len) == '-') && 
+                                   (*(next_boundary + m->b[startary].len + 1) == '-') ){
                                        ptr = content_end;
                                }
                                else {
@@ -682,9 +665,7 @@ static void recurseable_mime_parser(char *partnum,
                                        /* Determine whether newlines are LF or CRLF */
                                        evaluate_crlf_ptr = part_start;
                                        --evaluate_crlf_ptr;
-                                       if ((*evaluate_crlf_ptr == '\r') && 
-                                           (*(evaluate_crlf_ptr + 1) == '\n'))
-                                       {
+                                       if ((*evaluate_crlf_ptr == '\r') && (*(evaluate_crlf_ptr + 1) == '\n')) {
                                                crlf_in_use = 1;
                                        }
                                        else {
@@ -725,7 +706,6 @@ static void recurseable_mime_parser(char *partnum,
                length = content_end - part_start;
                ptr = part_end = content_end;
 
-
                /* The following code will truncate the MIME part to the size
                 * specified by the Content-length: header.   We have commented it
                 * out because these headers have a tendency to be wrong.
@@ -746,8 +726,7 @@ static void recurseable_mime_parser(char *partnum,
                        chosen_name = &m->b[content_type_name];
                }
        
-               /* Ok, we've got a non-multipart part here, so do something with it.
-                */
+               // Ok, we've got a non-multipart part here, so do something with it.
                mime_decode(partnum,
                            part_start, 
                            length,
@@ -863,6 +842,7 @@ void the_mime_parser(char *partnum,
        free(m);
 }
 
+
 /*
  * Entry point for the MIME parser.
  * (This function expects to be fed HEADERS + CONTENT)
@@ -964,6 +944,8 @@ const char* GuessMimeByFilename(const char *what, size_t len)
                return "text/x-component";
        else if ((len > 3) && !strncasecmp(&what[len - 4], ".jpg", 4))
                return "image/jpeg";
+       else if ((len > 4) && !strncasecmp(&what[len - 5], ".jpeg", 5))
+               return "image/jpeg";
        else if ((len > 3) && !strncasecmp(&what[len - 4], ".png", 4))
                return "image/png";
        else if ((len > 3) && !strncasecmp(&what[len - 4], ".ico", 4))
@@ -1039,8 +1021,8 @@ int LoadIconDir(const char *DirName)
        {
                char *MinorPtr;
                char *PStart;
-#ifdef _DIRENT_HAVE_D_NAMELEN
-               d_namelen = filedir_entry->d_namelen;
+#ifdef _DIRENT_HAVE_D_NAMLEN
+               d_namelen = filedir_entry->d_namlen;
 #else
                d_namelen = strlen(filedir_entry->d_name);
 #endif