First Step: chop mimeparser into pieces.
authorWilfried Goesgens <dothebart@citadel.org>
Fri, 26 Nov 2010 00:13:32 +0000 (01:13 +0100)
committerWilfried Goesgens <dothebart@citadel.org>
Sat, 27 Nov 2010 12:58:51 +0000 (13:58 +0100)
 - move the cluster of buffers into a struct compatible to the CKEY macro
 - add the interesting other evaluated flags & numbers to that struct
 - move finding & parsing of the 'interesting headers' into its own function.

by all this we hope to get informations about the mimepart before we have to search for its end

libcitadel/lib/libcitadel.h
libcitadel/lib/mime_parser.c

index 79a16ed19ce9be0fb460d4357eca9177c3ea739e..989945a54eae12ee54f4318eb8a7d065d6beed39 100644 (file)
@@ -131,7 +131,7 @@ void ShutDownLibCitadel(void);
  * MIME parser declarations
  */
 
-void extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd);
+long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd);
 
 
 typedef void (*MimeParserCallBackType)(char *cbname,
index 50b51497152bca582016d8e3ea4691adc2ff19cc..b7249f79ec215b941643dc1f0abde569ce282dc8 100644 (file)
@@ -52,7 +52,7 @@ const unsigned char FromHexTable [256] = {
 };
 
 
-void extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd)
+long extract_key(char *target, char *source, long sourcelen, char *key, long keylen, char KeyEnd)
 {
        char *sptr, *ptr = NULL;
        int double_quotes = 0;
@@ -83,7 +83,7 @@ void extract_key(char *target, char *source, long sourcelen, char *key, long key
        }
        if (ptr == NULL) {
                *target = '\0';
-               return;
+               return 0;
        }
        strcpy(target, (ptr + RealKeyLen));
 
@@ -106,6 +106,7 @@ void extract_key(char *target, char *source, long sourcelen, char *key, long key
                }
        }
        *ptr = '\0';
+       return ptr - target;
 }
 
 
@@ -319,104 +320,65 @@ int mime_decode_now (char *part_start,
        return -1;
 }
 
-
-/*
- * Break out the components of a multipart message
- * (This function expects to be fed HEADERS + CONTENT)
- * Note: NULL can be supplied as content_end; in this case, the message is
- * considered to have ended when the parser encounters a 0x00 byte.
- */
-void the_mime_parser(char *partnum,
-                    char *content_start, char *content_end,
-                    MimeParserCallBackType CallBack,
-                    MimeParserCallBackType PreMultiPartCallBack,
-                    MimeParserCallBackType PostMultiPartCallBack,
-                    void *userdata,
-                    int dont_decode)
+typedef enum _eIntMimeHdrs {
+       boundary,
+       startary,
+       endary,
+       content_type,
+       charset,
+       encoding,
+       content_type_name,
+       content_disposition_name,
+       filename,
+       disposition,
+       id,
+       eMax /* don't move ! */
+} eIntMimeHdrs;
+
+typedef struct _CBufStr {
+       char Key[SIZ];
+       long len;
+}CBufStr;
+
+typedef struct _interesting_mime_headers {
+       CBufStr b[eMax];
+       long content_length;
+       long is_multipart;
+} interesting_mime_headers;
+
+interesting_mime_headers *InitInterestingMimes(void)
 {
-
-       char *ptr;
-       char *part_start, *part_end = NULL;
-       char buf[SIZ];
-       char *header;
-       char *boundary;
-       char *startary;
-       size_t startary_len = 0;
-       char *endary;
-       char *next_boundary;
-       char *content_type;
-       char *charset;
-       size_t content_type_len;
-       size_t content_length;
-       char *encoding;
-       char *disposition;
-       size_t disposition_len;
-       char *id;
-       char *name = NULL;
-       char *content_type_name;
-       char *content_disposition_name;
-       char *filename;
-       int is_multipart;
-       int part_seq = 0;
        int i;
-       size_t length;
-       char nested_partnum[256];
-       int crlf_in_use = 0;
-       char *evaluate_crlf_ptr = NULL;
-       int buflen = 0;
-       int headerlen = 0;
-
-       ptr = content_start;
-       content_length = 0;
-
-       boundary = malloc(SIZ * 12);
-       *boundary = '\0';
-
-       startary = boundary + SIZ * 1;
-       *startary = '\0';
-
-       endary = boundary + SIZ * 2;
-       *endary = '\0';
-
-       header = boundary + SIZ * 3;
-       *header = '\0';
-
-       content_type = boundary + SIZ * 4;
-       *content_type = '\0';
-
-       charset = boundary + SIZ * 5;
-       *charset = '\0';
-
-       encoding = boundary + SIZ * 6;
-       *encoding = '\0';
-
-       content_type_name = boundary + SIZ * 7;
-       *content_type_name = '\0';
-
-       content_disposition_name = boundary + SIZ * 8;
-       *content_disposition_name = '\0';
-
-       filename = boundary + SIZ * 9;
-       *filename = '\0';
+       interesting_mime_headers *m;
+       m = (interesting_mime_headers*) malloc( sizeof(interesting_mime_headers));
+       
+       for (i = 0; i < eMax; i++) {
+            m->b[i].Key[0] = '\0';
+            m->b[i].len = 0;
+       }
+       m->content_length = -1;
+       return m;
+}
 
-       disposition = boundary + SIZ * 10;
-       *disposition = '\0';
 
-       id = boundary + SIZ * 11;
-       *id = '\0';
 
-       /* If the caller didn't supply an endpointer, generate one by measure */
-       if (content_end == NULL) {
-               content_end = &content_start[strlen(content_start)];
-       }
+long parse_MimeHeaders(interesting_mime_headers *m, char* content_start, char *content_end)
+{
+       char buf[SIZ];
+       char header[SIZ];
+       long headerlen;
+       char *ptr;
+       int buflen;
+       int i;
 
        /* Learn interesting things from the headers */
-       strcpy(header, "");
+       ptr = content_start;
+       *header = '\0';
        headerlen = 0;
        do {
                ptr = memreadlinelen(ptr, buf, SIZ, &buflen);
                if (ptr >= content_end) {
-                       goto end_parser;
+                       return -1;
                }
 
                for (i = 0; i < buflen; ++i) {
@@ -427,41 +389,43 @@ void the_mime_parser(char *partnum,
 
                if (!isspace(buf[0])) {
                        if (!strncasecmp(header, "Content-type:", 13)) {
-                               memcpy (content_type, &header[13], headerlen - 12);
-                               content_type_len = striplt (content_type);
+                               memcpy (m->b[content_type].Key, &header[13], headerlen - 12);
+                               m->b[content_type].len = striplt (m->b[content_type].Key);
 
-                               extract_key(content_type_name, content_type, content_type_len, HKEY("name"), '=');
-                               extract_key(charset,           content_type, content_type_len, HKEY("charset"), '=');
-                               extract_key(boundary,          header,       headerlen,        HKEY("boundary"), '=');
+                               m->b[content_type_name].len = extract_key(m->b[content_type_name].Key, CKEY(m->b[content_type]), HKEY("name"), '=');
+                               m->b[charset].len           = extract_key(m->b[charset].Key,           CKEY(m->b[content_type]), HKEY("charset"), '=');
+                               m->b[boundary].len          = extract_key(m->b[boundary].Key,          header,       headerlen,  HKEY("boundary"), '=');
 
                                /* Deal with weird headers */
-                               if (strchr(content_type, ' '))
-                                       *(strchr(content_type, ' ')) = '\0';
-                               if (strchr(content_type, ';'))
-                                       *(strchr(content_type, ';')) = '\0';
+                               if (strchr(m->b[content_type].Key, ' '))
+                                       *(strchr(m->b[content_type].Key, ' ')) = '\0';
+                               if (strchr(m->b[content_type].Key, ';'))
+                                       *(strchr(m->b[content_type].Key, ';')) = '\0';
                        }
                        else if (!strncasecmp(header, "Content-Disposition:", 20)) {
-                               memcpy (disposition, &header[20], headerlen - 19);
-                               disposition_len = striplt(disposition);
-                               extract_key(content_disposition_name, disposition, disposition_len,  HKEY("name"), '=');
-                               extract_key(filename,                 disposition, disposition_len, HKEY("filename"), '=');
+                               memcpy (m->b[disposition].Key, &header[20], headerlen - 19);
+                               m->b[disposition].len = striplt(m->b[disposition].Key);
+
+                               m->b[content_disposition_name].len = extract_key(m->b[content_disposition_name].Key, CKEY(m->b[disposition]), HKEY("name"), '=');
+                               m->b[filename].len                 = extract_key(m->b[filename].Key,                 CKEY(m->b[disposition]), HKEY("filename"), '=');
                        }
                        else if (!strncasecmp(header, "Content-ID:", 11)) {
-                               strcpy(id, &header[11]);
-                               striplt(id);
-                               stripallbut(id, '<', '>');
+                               memcpy(m->b[id].Key, &header[11], headerlen);
+                               striplt(m->b[id].Key);
+                               m->b[id].len = stripallbut(m->b[id].Key, '<', '>');
                        }
                        else if (!strncasecmp(header, "Content-length: ", 15)) {
-                               char clbuf[10];
-                               safestrncpy(clbuf, &header[15], sizeof clbuf);
-                               striplt(clbuf);
-                               content_length = (size_t) atol(clbuf);
+                               char *clbuf;
+                               clbuf = &header[15];
+                               while (isspace(clbuf))
+                                       clbuf ++;
+                               m->content_length = (size_t) atol(clbuf);
                        }
                        else if (!strncasecmp(header, "Content-transfer-encoding: ", 26)) {
-                               strcpy(encoding, &header[26]);
-                               striplt(encoding);
+                               memcpy(m->b[encoding].Key, &header[26], headerlen - 26);
+                               m->b[encoding].len = striplt(m->b[encoding].Key);
                        }
-                       strcpy(header, "");
+                       *header = '\0';
                        headerlen = 0;
                }
                if ((headerlen + buflen + 2) < SIZ) {
@@ -471,33 +435,84 @@ void the_mime_parser(char *partnum,
                }
        } while ((!IsEmptyStr(buf)) && (*ptr != 0));
 
-       if (strchr(disposition, ';'))
-               *(strchr(disposition, ';')) = '\0';
-       striplt(disposition);
-       if (strchr(content_type, ';'))
-               *(strchr(content_type, ';')) = '\0';
-       striplt(content_type);
-
-       if (!IsEmptyStr(boundary)) {
-               is_multipart = 1;
-       } else {
-               is_multipart = 0;
+       ptr = strchr(m->b[disposition].Key, ';');
+       if (ptr != NULL) *ptr = '\0';
+       m->b[disposition].len = striplt(m->b[disposition].Key);
+
+       ptr = strchr(m->b[content_type].Key, ';');
+       if (ptr != NULL) *ptr = '\0';
+       m->b[content_type].len = striplt(m->b[content_type].Key);
+
+       m->is_multipart = m->b[boundary].len != 0;
+
+       return 0;
+}
+
+/*
+ * Break out the components of a multipart message
+ * (This function expects to be fed HEADERS + CONTENT)
+ * Note: NULL can be supplied as content_end; in this case, the message is
+ * considered to have ended when the parser encounters a 0x00 byte.
+ */
+void the_mime_parser(char *partnum,
+                    char *content_start, char *content_end,
+                    MimeParserCallBackType CallBack,
+                    MimeParserCallBackType PreMultiPartCallBack,
+                    MimeParserCallBackType PostMultiPartCallBack,
+                    void *userdata,
+                    int dont_decode)
+{
+
+       char *ptr;
+       char *part_start, *part_end = NULL;
+       char *next_boundary;
+       
+       size_t content_length;
+       int part_seq = 0;
+       size_t length;
+       char nested_partnum[256];
+       int crlf_in_use = 0;
+       char *evaluate_crlf_ptr = NULL;
+       
+       interesting_mime_headers *m;
+       CBufStr *chosen_name;
+
+       ptr = content_start;
+       content_length = 0;
+
+       m = InitInterestingMimes();
+
+
+       /* If the caller didn't supply an endpointer, generate one by measure */
+       if (content_end == NULL) {
+               content_end = &content_start[strlen(content_start)];
        }
 
+
+       if (parse_MimeHeaders(m, content_start, content_end) != 0)
+               goto end_parser;
+       
        /* If this is a multipart message, then recursively process it */
        part_start = NULL;
-       if (is_multipart) {
+       if (m->is_multipart) {
 
                /* Tell the client about this message's multipartedness */
                if (PreMultiPartCallBack != NULL) {
-                       PreMultiPartCallBack("", "", partnum, "",
-                               NULL, content_type, charset,
-                               0, encoding, id, userdata);
+                       PreMultiPartCallBack("", 
+                                            "", 
+                                            partnum, 
+                                            "",
+                                            NULL, 
+                                            m->b[content_type].Key, 
+                                            m->b[charset].Key,
+                                            0, 
+                                            m->b[encoding].Key, 
+                                            m->b[id].Key, 
+                                            userdata);
                }
 
                /* Figure out where the boundaries are */
-               startary_len = snprintf(startary, SIZ, "--%s", boundary);
-               snprintf(endary, SIZ, "--%s--", boundary);
+               m->b[startary].len = snprintf(m->b[startary].Key, SIZ, "--%s", m->b[boundary].Key);
 
                part_start = NULL;
                do {
@@ -506,7 +521,7 @@ void the_mime_parser(char *partnum,
                        tmp = *content_end;
                        *content_end = '\0';
                        
-                       next_boundary = strstr(ptr, startary);
+                       next_boundary = strstr(ptr, m->b[startary].Key);
                        *content_end = tmp;
 
                        if ( (part_start != NULL) && (next_boundary != NULL) ) {
@@ -528,7 +543,8 @@ void the_mime_parser(char *partnum,
                                                 "%d", ++part_seq);
                                }
                                the_mime_parser(nested_partnum,
-                                           part_start, part_end,
+                                               part_start, 
+                                               part_end,
                                                CallBack,
                                                PreMultiPartCallBack,
                                                PostMultiPartCallBack,
@@ -539,7 +555,8 @@ void the_mime_parser(char *partnum,
                        if (next_boundary != NULL) {
                                /* If we pass out of scope, don't attempt to
                                 * read past the end boundary. */
-                               if (!strcmp(next_boundary, endary)) {
+                               if ((*(next_boundary + m->b[startary].len + 1) == '-') && 
+                                   (*(next_boundary + m->b[startary].len + 2) == '-') ){
                                        ptr = content_end;
                                }
                                else {
@@ -570,14 +587,23 @@ void the_mime_parser(char *partnum,
                } while ( (ptr < content_end) && (next_boundary != NULL) );
 
                if (PostMultiPartCallBack != NULL) {
-                       PostMultiPartCallBack("", "", partnum, "", NULL,
-                               content_type, charset, 0, encoding, id, userdata);
+                       PostMultiPartCallBack("", 
+                                             "", 
+                                             partnum, 
+                                             "", 
+                                             NULL,
+                                             m->b[content_type].Key, 
+                                             m->b[charset].Key,
+                                             0, 
+                                             m->b[encoding].Key, 
+                                             m->b[id].Key, 
+                                             userdata);
                }
                goto end_parser;
        }
 
        /* If it's not a multipart message, then do something with it */
-       if (!is_multipart) {
+       if (!m->is_multipart) {
                part_start = ptr;
                length = 0;
                while (ptr < content_end) {
@@ -600,32 +626,48 @@ void the_mime_parser(char *partnum,
                 * and sometimes it's tacked on to Content-disposition.  Use
                 * whichever one we have.
                 */
-               if (strlen(content_disposition_name) > strlen(content_type_name)) {
-                       name = content_disposition_name;
+               if (m->b[content_disposition_name].len > m->b[content_type_name].len) {
+                       chosen_name = &m->b[content_disposition_name];
                }
                else {
-                       name = content_type_name;
+                       chosen_name = &m->b[content_type_name];
                }
        
                /* Ok, we've got a non-multipart part here, so do something with it.
                 */
                mime_decode(partnum,
-                       part_start, length,
-                       content_type, charset, encoding, disposition, id,
-                       name, filename,
-                       CallBack, NULL, NULL,
-                       userdata, dont_decode
-               );
+                           part_start, 
+                           length,
+                           m->b[content_type].Key, 
+                           m->b[charset].Key,
+                           m->b[encoding].Key, 
+                           m->b[disposition].Key, 
+                           m->b[id].Key, 
+                           chosen_name->Key, 
+                           m->b[filename].Key,
+                           CallBack, 
+                           NULL, NULL,
+                           userdata, 
+                           dont_decode
+                       );
 
                /*
                 * Now if it's an encapsulated message/rfc822 then we have to recurse into it
                 */
-               if (!strcasecmp(content_type, "message/rfc822")) {
+               if (!strcasecmp(&m->b[content_type].Key[0], "message/rfc822")) {
 
                        if (PreMultiPartCallBack != NULL) {
-                               PreMultiPartCallBack("", "", partnum, "",
-                                       NULL, content_type, charset,
-                                       0, encoding, id, userdata);
+                               PreMultiPartCallBack("", 
+                                                    "", 
+                                                    partnum, 
+                                                    "",
+                                                    NULL, 
+                                                    m->b[content_type].Key, 
+                                                    m->b[charset].Key,
+                                                    0, 
+                                                    m->b[encoding].Key, 
+                                                    m->b[id].Key, 
+                                                    userdata);
                        }
                        if (CallBack != NULL) {
                                if (strlen(partnum) > 0) {
@@ -640,17 +682,27 @@ void the_mime_parser(char *partnum,
                                                 "%d", ++part_seq);
                                }
                                the_mime_parser(nested_partnum,
-                                       part_start, part_end,
-                                       CallBack,
-                                       PreMultiPartCallBack,
-                                       PostMultiPartCallBack,
-                                       userdata,
-                                       dont_decode
-                               );
+                                               part_start, 
+                                               part_end,
+                                               CallBack,
+                                               PreMultiPartCallBack,
+                                               PostMultiPartCallBack,
+                                               userdata,
+                                               dont_decode
+                                       );
                        }
                        if (PostMultiPartCallBack != NULL) {
-                               PostMultiPartCallBack("", "", partnum, "", NULL,
-                                       content_type, charset, 0, encoding, id, userdata);
+                               PostMultiPartCallBack("", 
+                                                     "", 
+                                                     partnum, 
+                                                     "", 
+                                                     NULL,
+                                                     m->b[content_type].Key, 
+                                                     m->b[charset].Key,
+                                                     0, 
+                                                     m->b[encoding].Key, 
+                                                     m->b[id].Key, 
+                                                     userdata);
                        }
 
 
@@ -659,7 +711,7 @@ void the_mime_parser(char *partnum,
        }
 
 end_parser:    /* free the buffers!  end the oppression!! */
-       free(boundary);
+       free(m);
 }