]> code.citadel.org Git - citadel.git/blobdiff - citadel/mime_parser.c
fixes for BSDI. see ChangeLog.
[citadel.git] / citadel / mime_parser.c
index f7ad28d8ab08abee5e767d6f2963c84e6571ee42..bc046ba8e27c963c7f2aeeadaacbade62fade08d 100644 (file)
@@ -2,15 +2,14 @@
  * mime_parser.c
  *
  * This is a really bad attempt at writing a parser to handle MIME-encoded
- * data, including multipart messages.  In the case of WebCit, the input data
- * might be a form containing uploaded files.  In the Citadel server, the data
- * is more likely to be an actual MIME-encoded message.
+ * messages.
  *
  * Copyright (c) 1998-1999 by Art Cancro
  * This code is distributed under the terms of the GNU General Public License.
  *
  */
 
+#include "sysdep.h"
 #include <stdlib.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <ctype.h>
 #include <string.h>
+#include <sys/stat.h>
+#include <errno.h>
+#ifdef HAVE_PTHREAD_H
+#include <pthread.h>
+#endif
+#include "citadel.h"
 #include "mime_parser.h"
+#include "sysdep_decls.h"
+#include "server.h"
 
 
 
@@ -41,203 +48,298 @@ void extract_key(char *target, char *source, char *key) {
 
 
 
+/* 
+ * Utility function to "readline" from memory
+ * (returns new pointer)
+ */
+char *memreadline(char *start, char *buf, int maxlen) {
+       char ch;
+       char *ptr;
+
+       ptr = start;
+       memset(buf, 0, maxlen);
+
+       while(1) {
+               ch = *ptr++;
+               if ((ch==10)||(ch==0)) {
+                       if (strlen(buf)>0)
+                               if (buf[strlen(buf)-1]==13)
+                                       buf[strlen(buf)-1] = 0;
+                       return ptr;
+                       }
+               if (strlen(buf) < (maxlen-1)) {
+                       buf[strlen(buf)+1] = 0;
+                       buf[strlen(buf)] = ch;
+                       }
+               }
+       }
+
 /*
- * The very back end for the component handler
- * (This function expects to be fed CONTENT ONLY, no headers)
+ * Given a message or message-part body and a length, handle any necessary
+ * decoding and pass the request up the stack.
  */
-void do_something_with_it(char *content,
-               int length,
-               char *content_type,
-               char *content_disposition,
+void mime_decode(char *partnum,
+               char *part_start, size_t length,
+               char *content_type, char *encoding,
+               char *disposition,
+               char *name, char *filename,
                void (*CallBack)
                        (char *cbname,
                        char *cbfilename,
-                       char *cbencoding,
+                       char *cbpartnum,
+                       char *cbdisp,
                        void *cbcontent,
                        char *cbtype,
                        size_t cblength)
                ) {
-       char name[256];
-       char filename[256];
 
-       extract_key(name, content_disposition, " name");
-       extract_key(filename, content_disposition, "filename");
+       char *decoded;
+       struct stat statbuf;
+       int sendpipe[2];
+       int recvpipe[2];
+       int childpid;
+       size_t bytes_sent = 0;
+       size_t bytes_recv = 0;
+       size_t blocksize;
+       int write_error = 0;
+
+       lprintf(9, "mime_decode() called\n");
+
+       /* Some encodings aren't really encodings */
+       if (!strcasecmp(encoding, "7bit"))      strcpy(encoding, "");
+       if (!strcasecmp(encoding, "8bit"))      strcpy(encoding, "");
+       if (!strcasecmp(encoding, "binary"))    strcpy(encoding, "");
+
+       /* If this part is not encoded, send as-is */
+       if (strlen(encoding)==0) {
+               CallBack(name, filename, partnum, disposition, part_start,
+                       content_type, length);
+               return;
+               }
+
+       if ( (strcasecmp(encoding, "base64"))
+            && (strcasecmp(encoding, "quoted-printable"))  ) {
+               lprintf(5, "ERROR: unknown MIME encoding '%s'\n", encoding);
+               return;
+               }
+
+       /*
+        * Allocate a buffer for the decoded data.  The output buffer is the
+        * same size as the input buffer; this assumes that the decoded data
+        * will never be larger than the encoded data.  This is a safe
+        * assumption with base64, uuencode, and quoted-printable.  Just to
+        * be safe, we still pad the buffer a bit.
+        */
+       decoded = mallok(length + 1024);
+       if (decoded == NULL) {
+               lprintf(5, "ERROR: cannot allocate memory.\n");
+               return;
+               }
+       if (pipe(sendpipe) != 0) return;
+       if (pipe(recvpipe) != 0) return;
 
-       /* Nested multipart gets recursively fed back into the parser */
-       if (!strncasecmp(content_type, "multipart", 9)) {
-               mime_parser(content, length, content_type, CallBack);
+       childpid = fork();
+       if (childpid < 0) {
+               phree(decoded);
+               return;
                }
 
-       /**** OTHERWISE, HERE'S WHERE WE HANDLE THE STUFF!! *****/
+       if (childpid == 0) {
+               close(2);
+               /* send stdio to the pipes */
+               if (dup2(sendpipe[0], 0)<0) lprintf(5, "ERROR dup2()\n");
+               if (dup2(recvpipe[1], 1)<0) lprintf(5, "ERROR dup2()\n");
+               close(sendpipe[1]);      /* Close the ends we're not using */
+               close(recvpipe[0]);
+               if (!strcasecmp(encoding, "base64"))
+                  execlp("./base64", "base64", "-d", NULL);
+               else if (!strcasecmp(encoding, "quoted-printable"))
+                  execlp("./qpdecode", "qpdecode", NULL);
+               lprintf(5, "ERROR: cannot exec decoder for %s\n", encoding);
+               exit(1);
+               }
 
-       CallBack(name, filename, "", content, content_type, length);
+       close(sendpipe[0]);      /* Close the ends we're not using  */
+       close(recvpipe[1]);
+
+       while ( (bytes_sent < length) && (write_error == 0) ) {
+               /* Empty the input pipe FIRST */
+               while (fstat(recvpipe[0], &statbuf), (statbuf.st_size > 0) ) {
+                       blocksize = read(recvpipe[0], &decoded[bytes_recv],
+                               statbuf.st_size);
+                       if (blocksize < 0) 
+                               lprintf(5, "ERROR: cannot read from pipe\n");
+                       else
+                               bytes_recv = bytes_recv + blocksize;
+                       }
+               /* Then put some data into the output pipe */
+               blocksize = length - bytes_sent;
+               if (blocksize > 2048) blocksize = 2048;
+               if (write(sendpipe[1], &part_start[bytes_sent], blocksize) <0) {
+                       lprintf(5, "ERROR: cannot write to pipe: %s\n",
+                               strerror(errno));
+                       write_error = 1;
+                       }
+               bytes_sent = bytes_sent + blocksize;
+               }
+       close(sendpipe[1]);
+       /* Empty the input pipe */
+       while ( (blocksize = read(recvpipe[0], &decoded[bytes_recv], 1)),
+             (blocksize > 0) )  {
+               bytes_recv = bytes_recv + blocksize;
+               }
 
-       /**** END OF STUFF-HANDLER ****/
+       if (bytes_recv > 0)
+               CallBack(name, filename, partnum, disposition, decoded,
+                       content_type, bytes_recv);
 
+       phree(decoded);
        }
 
-
 /*
- * Take a part, figure out its length, and do something with it
- * (This function expects to be fed HEADERS+CONTENT)
+ * Break out the components of a multipart message
+ * (This function expects to be fed HEADERS + CONTENT)
+ * Note: NULL can be supplied as content_end; in this case, the message is
+ * considered to have ended when the parser encounters a 0x00 byte.
  */
-void handle_part(char *content,
-               int part_length,
-               char *supplied_content_type,
+void the_mime_parser(char *partnum,
+               char *content_start, char *content_end,
                void (*CallBack)
                        (char *cbname,
                        char *cbfilename,
-                       char *cbencoding,
+                       char *cbpartnum,
+                       char *cbdisp,
                        void *cbcontent,
                        char *cbtype,
                        size_t cblength)
                ) {
+
+       char *ptr;
+       char *part_start, *part_end;
+       char buf[256];
+       char header[256];
+       char boundary[256];
+       char startary[256];
+       char endary[256];
        char content_type[256];
-       char content_disposition[256];
-       char *start;
-       char buf[512];
-       int crlf = 0;   /* set to 1 for crlf-style newlines */
-       int actual_length;
-
-       strcpy(content_type, supplied_content_type);
-
-       /* Strip off any leading blank lines. */
-       start = content;
-       while ((!strncmp(start, "\r", 1)) || (!strncmp(start, "\n", 1))) {
-               ++start;
-               --part_length;
+       char encoding[256];
+       char disposition[256];
+       char name[256];
+       char filename[256];
+       int is_multipart;
+       int part_seq = 0;
+       int i;
+       size_t length;
+       char nested_partnum[256];
+
+       lprintf(9, "the_mime_parser() called\n");
+       ptr = content_start;
+       memset(boundary, 0, sizeof boundary);
+       memset(content_type, 0, sizeof content_type);
+       memset(encoding, 0, sizeof encoding);
+       memset(name, 0, sizeof name);
+       memset(filename, 0, sizeof filename);
+
+       /* Learn interesting things from the headers */
+       strcpy(header, "");
+       do {
+               ptr = memreadline(ptr, buf, sizeof buf);
+               if (*ptr == 0) return; /* premature end of message */
+               if (content_end != NULL)
+                       if (ptr >= content_end) return;
+
+               for (i=0; i<strlen(buf); ++i)
+                       if (isspace(buf[i])) buf[i]=' ';
+               if (!isspace(buf[0])) {
+                       if (!strncasecmp(header, "Content-type: ", 14)) {
+                               strcpy(content_type, &header[14]);
+                               extract_key(name, content_type, "name");
+                               }
+                       if (!strncasecmp(header, "Content-Disposition: ", 21)) {
+                               strcpy(disposition, &header[21]);
+                               extract_key(filename, disposition, "filename");
+                               }
+                       if (!strncasecmp(header,
+                               "Content-transfer-encoding: ", 27))
+                                       strcpy(encoding, &header[27]);
+                       if (strlen(boundary)==0)
+                               extract_key(boundary, header, "boundary");
+                       strcpy(header, "");
+                       }
+               if ((strlen(header)+strlen(buf)+2)<sizeof(header))
+                       strcat(header, buf);
+               } while ((strlen(buf) > 0) && (*ptr != 0));
+
+       for (i=0; i<strlen(disposition); ++i) 
+               if (disposition[i]==';') disposition[i] = 0;
+       for (i=0; i<strlen(content_type); ++i) 
+               if (content_type[i]==';') content_type[i] = 0;
+
+       if (strlen(boundary) > 0) {
+               is_multipart = 1;
+               }
+       else {
+               is_multipart = 0;
                }
 
-       /* At this point all we have left is the headers and the content. */
-       do {
-               strcpy(buf, "");
+       /* If this is a multipart message, then recursively process it */
+       part_start = NULL;
+       if (is_multipart) {
+               sprintf(startary, "--%s", boundary);
+               sprintf(endary, "--%s--", boundary);
                do {
-                       buf[strlen(buf)+1] = 0;
-                       if (strlen(buf)<((sizeof buf)-1)) {
-                               strncpy(&buf[strlen(buf)], start, 1);
+                       part_end = ptr;
+                       ptr = memreadline(ptr, buf, sizeof buf);
+                       if (*ptr == 0) return; /* premature end of message */
+                       if (content_end != NULL)
+                               if (ptr >= content_end) return;
+                       if ((!strcasecmp(buf, startary))
+                           ||(!strcasecmp(buf, endary))) {
+                               if (part_start != NULL) {
+                                       sprintf(nested_partnum, "%s.%d",
+                                               partnum, ++part_seq);
+                                       the_mime_parser(nested_partnum,
+                                                       part_start, part_end,
+                                                       CallBack);
+                                       }
+                               part_start = ptr;
                                }
-                       ++start;
-                       --part_length;
-                       } while((buf[strlen(buf)-1] != 10) && (part_length>0));
-               if (part_length <= 0) return;
-               buf[strlen(buf)-1] = 0;
-               if (buf[strlen(buf)-1]==13) {
-                       buf[strlen(buf)-1] = 0;
-                       crlf = 1;
-                       }
-               if (!strncasecmp(buf, "Content-type: ", 14)) {
-                       strcpy(content_type, &buf[14]);
-                       }
-               if (!strncasecmp(buf, "Content-disposition: ", 21)) {
-                       strcpy(content_disposition, &buf[21]);
+                       } while (strcasecmp(buf, endary));
+               }
+
+       /* If it's not a multipart message, then do something with it */
+       if (!is_multipart) {
+               part_start = ptr;
+               length = 0;
+               while ((*ptr != 0)&&((content_end==NULL)||(ptr<content_end))) {
+                       ++length;
+                       part_end = ptr++;
                        }
-               } while (strlen(buf)>0);
+               mime_decode(partnum,
+                               part_start, length,
+                               content_type, encoding, disposition,
+                               name, filename, CallBack);
+               }
        
-       if (crlf) actual_length = part_length - 2;
-       else actual_length = part_length - 1;
-
-       /* Now that we've got this component isolated, what to do with it? */
-       do_something_with_it(start, actual_length,
-                       content_type, content_disposition, CallBack);
-
        }
 
-       
 /*
- * Break out the components of a multipart message
- * (This function expects to be fed CONTENT ONLY, no headers)
+ * Entry point for the MIME parser.
+ * (This function expects to be fed HEADERS + CONTENT)
+ * Note: NULL can be supplied as content_end; in this case, the message is
+ * considered to have ended when the parser encounters a 0x00 byte.
  */
-
-
-void mime_parser(char *content,
-               int ContentLength,
-               char *ContentType,
+void mime_parser(char *content_start, char *content_end,
                void (*CallBack)
                        (char *cbname,
                        char *cbfilename,
-                       char *cbencoding,
+                       char *cbpartnum,
+                       char *cbdisp,
                        void *cbcontent,
                        char *cbtype,
                        size_t cblength)
                ) {
-       char boundary[256];
-       char endary[256];
-       int have_boundary = 0;
-       int a;
-       char *ptr;
-       char *beginning;
-       int bytes_processed = 0;
-       int part_length;
-
-       /* If it's not multipart, don't process it as multipart */
-       if (strncasecmp(ContentType, "multipart", 9)) {
-               do_something_with_it(content, ContentLength,
-                               ContentType, "", CallBack);
-               return;
-               }
-
-       /* Figure out what the boundary is */
-       strcpy(boundary, ContentType);
-       for (a=0; a<strlen(boundary); ++a) {
-               if (!strncasecmp(&boundary[a], "boundary=", 9)) {
-                       boundary[0]='-';
-                       boundary[1]='-';
-                       strcpy(&boundary[2], &boundary[a+9]);
-                       have_boundary = 1;
-                       a = 0;
-                       }
-               if ((boundary[a]==13) || (boundary[a]==10)) {
-                       boundary[a] = 0;
-                       }
-               }
-       if (boundary[2]==34) {
-               strcpy(&boundary[2], &boundary[3]);
-               for (a=2; a<strlen(boundary); ++a)
-                       if (boundary[a]==34) boundary[a]=0;
-               }
-
-       /* We can't process multipart messages without a boundary. */
-       if (have_boundary == 0) return;
-       strcpy(endary, boundary);
-       strcat(endary, "--");
-       fprintf(stderr, "BOUNDARY: %s\n", boundary);
-
-       ptr = content;
-
-       /* Seek to the beginning of the next boundary */
-       while (bytes_processed < ContentLength) {
-             /* && (strncasecmp(ptr, boundary, strlen(boundary))) ) { */
 
-               if (strncasecmp(ptr, boundary, strlen(boundary))) {
-                       ++ptr;
-                       ++bytes_processed;
-                       }
-
-               /* See if we're at the end */
-               if (!strncasecmp(ptr, endary, strlen(endary))) {
-                       return;
-                       }
-
-               /* Seek to the end of the boundary string */
-               if (!strncasecmp(ptr, boundary, strlen(boundary))) {
-                       fprintf(stderr, "FOUNDA BOUNDA\n");
-                       while ( (bytes_processed < ContentLength)
-                             && (strncasecmp(ptr, "\n", 1)) ) {
-                               ++ptr;
-                               ++bytes_processed;
-                               }
-                       beginning = ptr;
-                       part_length = 0;
-                       while ( (bytes_processed < ContentLength)
-                         && (strncasecmp(ptr, boundary, strlen(boundary))) ) {
-                               ++ptr;
-                               ++bytes_processed;
-                               ++part_length;
-                               }
-                       handle_part(beginning, part_length, "", CallBack);
-                       /* Back off so we can see the next boundary */
-                       --ptr;
-                       --bytes_processed;
-                       }
-               }
+       lprintf(9, "mime_parser() called\n");
+       the_mime_parser("1", content_start, content_end, CallBack);
        }