* Brought over the newest version of the MIME parser from Citadel,

author Art Cancro <ajc@citadel.org>

Tue, 19 Jul 2005 04:15:45 +0000 (04:15 +0000)

committer Art Cancro <ajc@citadel.org>

Tue, 19 Jul 2005 04:15:45 +0000 (04:15 +0000)
author Art Cancro <ajc@citadel.org>
Tue, 19 Jul 2005 04:15:45 +0000 (04:15 +0000)
committer Art Cancro <ajc@citadel.org>
Tue, 19 Jul 2005 04:15:45 +0000 (04:15 +0000)
diff --git a/webcit/ChangeLog b/webcit/ChangeLog

index 8516b132a4d6a07491ec2839c41555b7d98a520e..a147319bb56b75999b501af4e0276707380d1081 100644 (file)
--- a/webcit/ChangeLog
+++ b/webcit/ChangeLog
@@ -1,4 +1,8 @@
  $Log$
+Revision 619.25  2005/07/19 04:15:45  ajc
+* Brought over the newest version of the MIME parser from Citadel,
+  containing an API that supports arbitrary character sets.
+
  Revision 619.24  2005/07/14 04:21:53  ajc
  * More intelligent JavaScript to handle the greying out of end date and
    start/end times when "all day event" is selected.
@@ -2703,3 +2707,4 @@ Sun Dec  6 19:50:55 EST 1998 Art Cancro <ajc@uncnsrd.mt-kisco.ny.us>
  
  1998-12-03 Nathan Bryant <bryant@cs.usm.maine.edu>
         * webserver.c: warning fix
+
diff --git a/webcit/mime_parser.c b/webcit/mime_parser.c

index 7067bb1a8d2d684ee470260cda225e7629d582b3..07d89e8a0d8006654661e6121278744df824d9b1 100644 (file)
--- a/webcit/mime_parser.c
+++ b/webcit/mime_parser.c
@@ -3,41 +3,23 @@
   *
   * This is the MIME parser for Citadel.  Sometimes it actually works.
   *
- * Copyright (c) 1998-2003 by Art Cancro
+ * Copyright (c) 1998-2005 by Art Cancro
   * This code is distributed under the terms of the GNU General Public License.
   *
   */
  
-#include <ctype.h>
+
  #include <stdlib.h>
-#ifdef HAVE_UNISTD_H
  #include <unistd.h>
-#endif
  #include <stdio.h>
-#ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
  #include <signal.h>
  #include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/socket.h>
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#ifdef HAVE_LIMITS_H
-#include <limits.h>
-#endif
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <netdb.h>
+#include <ctype.h>
  #include <string.h>
-#include <pwd.h>
+#include <sys/stat.h>
  #include <errno.h>
-#include <stdarg.h>
-#include <pthread.h>
-#include <signal.h>
+
  #include "webcit.h"
-#include "webserver.h"
  
  #include "mime_parser.h"
  
@@ -82,7 +64,7 @@ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
         char buf[SIZ];
         int buf_length = 0;
         int soft_line_break = 0;
-       int ch;
+       unsigned int ch;
         int decoded_length = 0;
         int i;
  
@@ -145,7 +127,7 @@ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
   */
  void mime_decode(char *partnum,
                  char *part_start, size_t length,
-                char *content_type, char *encoding,
+                char *content_type, char *charset, char *encoding,
                  char *disposition,
                  char *name, char *filename,
                  void (*CallBack)
@@ -155,6 +137,7 @@ void mime_decode(char *partnum,
                    char *cbdisp,
                    void *cbcontent,
                    char *cbtype,
+                  char *cbcharset,
                    size_t cblength,
                    char *cbencoding,
                    void *cbuserdata),
@@ -165,6 +148,7 @@ void mime_decode(char *partnum,
                    char *cbdisp,
                    void *cbcontent,
                    char *cbtype,
+                  char *cbcharset,
                    size_t cblength,
                    char *cbencoding,
                    void *cbuserdata),
@@ -175,6 +159,7 @@ void mime_decode(char *partnum,
                    char *cbdisp,
                    void *cbcontent,
                    char *cbtype,
+                  char *cbcharset,
                    size_t cblength,
                    char *cbencoding,
                    void *cbuserdata),
@@ -199,7 +184,7 @@ void mime_decode(char *partnum,
                 if (CallBack != NULL) {
                         CallBack(name, filename, fixed_partnum(partnum),
                                 disposition, part_start,
-                               content_type, length, encoding, userdata);
+                               content_type, charset, length, encoding, userdata);
                         }
                 return;
         }
@@ -214,7 +199,7 @@ void mime_decode(char *partnum,
          * will never be larger than the encoded data.  This is a safe
          * assumption with base64, uuencode, and quoted-printable.
          */
-       decoded = mallok(length+2048);
+       decoded = malloc(length+2048);
         if (decoded == NULL) {
                 return;
         }
@@ -230,10 +215,10 @@ void mime_decode(char *partnum,
         if (bytes_decoded > 0) if (CallBack != NULL) {
                 CallBack(name, filename, fixed_partnum(partnum),
                         disposition, decoded,
-                       content_type, bytes_decoded, "binary", userdata);
+                       content_type, charset, bytes_decoded, "binary", userdata);
         }
  
-       phree(decoded);
+       free(decoded);
  }
  
  /*
@@ -251,6 +236,7 @@ void the_mime_parser(char *partnum,
                        char *cbdisp,
                        void *cbcontent,
                        char *cbtype,
+                      char *cbcharset,
                        size_t cblength,
                        char *cbencoding,
                        void *cbuserdata),
@@ -261,6 +247,7 @@ void the_mime_parser(char *partnum,
                        char *cbdisp,
                        void *cbcontent,
                        char *cbtype,
+                      char *cbcharset,
                        size_t cblength,
                        char *cbencoding,
                        void *cbuserdata),
@@ -271,6 +258,7 @@ void the_mime_parser(char *partnum,
                        char *cbdisp,
                        void *cbcontent,
                        char *cbtype,
+                      char *cbcharset,
                        size_t cblength,
                        char *cbencoding,
                        void *cbuserdata),
@@ -286,7 +274,9 @@ void the_mime_parser(char *partnum,
         char *boundary;
         char *startary;
         char *endary;
+       char *next_boundary;
         char *content_type;
+       char *charset;
         size_t content_length;
         char *encoding;
         char *disposition;
@@ -303,34 +293,37 @@ void the_mime_parser(char *partnum,
         ptr = content_start;
         content_length = 0;
  
-       boundary = mallok(SIZ);
+       boundary = malloc(SIZ);
         memset(boundary, 0, SIZ);
  
-       startary = mallok(SIZ);
+       startary = malloc(SIZ);
         memset(startary, 0, SIZ);
  
-       endary = mallok(SIZ);
+       endary = malloc(SIZ);
         memset(endary, 0, SIZ);
  
-       header = mallok(SIZ);
+       header = malloc(SIZ);
         memset(header, 0, SIZ);
  
-       content_type = mallok(SIZ);
+       content_type = malloc(SIZ);
         memset(content_type, 0, SIZ);
  
-       encoding = mallok(SIZ);
+       charset = malloc(SIZ);
+       memset(charset, 0, SIZ);
+
+       encoding = malloc(SIZ);
         memset(encoding, 0, SIZ);
  
-       content_type_name = mallok(SIZ);
+       content_type_name = malloc(SIZ);
         memset(content_type_name, 0, SIZ);
  
-       content_disposition_name = mallok(SIZ);
+       content_disposition_name = malloc(SIZ);
         memset(content_disposition_name, 0, SIZ);
  
-       filename = mallok(SIZ);
+       filename = malloc(SIZ);
         memset(filename, 0, SIZ);
  
-       disposition = mallok(SIZ);
+       disposition = malloc(SIZ);
         memset(disposition, 0, SIZ);
  
         /* If the caller didn't supply an endpointer, generate one by measure */
@@ -346,13 +339,17 @@ void the_mime_parser(char *partnum,
                         goto end_parser;
                 }
  
-               for (i = 0; i < strlen(buf); ++i)
-                       if (isspace(buf[i]))
+               for (i = 0; i < strlen(buf); ++i) {
+                       if (isspace(buf[i])) {
                                 buf[i] = ' ';
+                       }
+               }
+
                 if (!isspace(buf[0])) {
                         if (!strncasecmp(header, "Content-type: ", 14)) {
                                 strcpy(content_type, &header[14]);
                                 extract_key(content_type_name, content_type, "name");
+                               extract_key(charset, content_type, "charset");
                                 /* Deal with weird headers */
                                 if (strchr(content_type, ' '))
                                         *(strchr(content_type, ' ')) = '\0';
@@ -385,6 +382,13 @@ void the_mime_parser(char *partnum,
                 *(strchr(content_type, ';')) = '\0';
         striplt(content_type);
  
+       if (!strlen(content_type)) {
+               strcpy(content_type, "text/plain");
+       }
+       if (!strlen(charset)) {
+               strcpy(charset, "us-ascii");
+       }
+
         if (strlen(boundary) > 0) {
                 is_multipart = 1;
         } else {
@@ -398,56 +402,63 @@ void the_mime_parser(char *partnum,
                 /* Tell the client about this message's multipartedness */
                 if (PreMultiPartCallBack != NULL) {
                         PreMultiPartCallBack("", "", partnum, "",
-                               NULL, content_type,
+                               NULL, content_type, charset,
                                 0, encoding, userdata);
                 }
  
                 /* Figure out where the boundaries are */
                 snprintf(startary, SIZ, "--%s", boundary);
                 snprintf(endary, SIZ, "--%s--", boundary);
+
+               part_start = NULL;
                 do {
-                       if ( (!strncasecmp(ptr, startary, strlen(startary)))
-                          || (!strncasecmp(ptr, endary, strlen(endary))) ) {
-                               if (part_start != NULL) {
-                                       if (strlen(partnum) > 0) {
-                                               snprintf(nested_partnum,
-                                                        sizeof nested_partnum,
-                                                        "%s.%d", partnum,
-                                                        ++part_seq);
-                                       }
-                                       else {
-                                               snprintf(nested_partnum,
-                                                        sizeof nested_partnum,
-                                                        "%d", ++part_seq);
-                                       }
-                                       the_mime_parser(nested_partnum,
-                                                   part_start, part_end,
-                                                       CallBack,
-                                                       PreMultiPartCallBack,
-                                                       PostMultiPartCallBack,
-                                                       userdata,
-                                                       dont_decode);
+                       next_boundary = bmstrstr(ptr, startary, strncmp);
+                       if ( (part_start != NULL) && (next_boundary != NULL) ) {
+                               part_end = next_boundary;
+                               --part_end;
+
+                               if (strlen(partnum) > 0) {
+                                       snprintf(nested_partnum,
+                                                sizeof nested_partnum,
+                                                "%s.%d", partnum,
+                                                ++part_seq);
                                 }
-                               ptr = memreadline(ptr, buf, SIZ);
-                               part_start = ptr;
-                       }
-                       else {
-                               part_end = ptr;
-                               ++ptr;
+                               else {
+                                       snprintf(nested_partnum,
+                                                sizeof nested_partnum,
+                                                "%d", ++part_seq);
+                               }
+                               the_mime_parser(nested_partnum,
+                                           part_start, part_end,
+                                               CallBack,
+                                               PreMultiPartCallBack,
+                                               PostMultiPartCallBack,
+                                               userdata,
+                                               dont_decode);
                         }
-                       /* If we pass out of scope in the MIME multipart (by
-                        * hitting the end boundary), force the pointer out
-                        * of scope so this loop ends.
-                        */
-                       if (ptr < content_end) {
-                               if (!strcasecmp(ptr, endary)) {
-                                       ptr = content_end++;
+
+                       if (next_boundary != NULL) {
+                               /* If we pass out of scope, don't attempt to read
+                                * past the end boundary. */
+                               if (!strcmp(next_boundary, endary)) {
+                                       ptr = content_end;
                                 }
+                               else {
+                                       /* Set up for the next part. */
+                                       part_start = strstr(next_boundary, "\n");
+                                       ++part_start;
+                                       ptr = part_start;
+                               }
+                       }
+                       else {
+                               /* Invalid end of multipart.  Bail out! */
+                               ptr = content_end;
                         }
-               } while (ptr <= content_end);
+               } while ( (ptr < content_end) && (next_boundary != NULL) );
+
                 if (PostMultiPartCallBack != NULL) {
                         PostMultiPartCallBack("", "", partnum, "", NULL,
-                               content_type, 0, encoding, userdata);
+                               content_type, charset, 0, encoding, userdata);
                 }
                 goto end_parser;
         }
@@ -461,9 +472,9 @@ void the_mime_parser(char *partnum,
                         ++length;
                 }
                 part_end = content_end;
-               /* fix an off-by-one error */
-               --part_end;
-               --length;
+                /* fix an off-by-one error */
+                --part_end;
+                --length;
                 
                 /* Truncate if the header told us to */
                 if ( (content_length > 0) && (length > content_length) ) {
@@ -483,23 +494,24 @@ void the_mime_parser(char *partnum,
                 
                 mime_decode(partnum,
                             part_start, length,
-                           content_type, encoding, disposition,
+                           content_type, charset, encoding, disposition,
                             name, filename,
                             CallBack, NULL, NULL,
                             userdata, dont_decode);
         }
  
  end_parser:    /* free the buffers!  end the oppression!! */
-       phree(boundary);
-       phree(startary);
-       phree(endary);  
-       phree(header);
-       phree(content_type);
-       phree(encoding);
-       phree(content_type_name);
-       phree(content_disposition_name);
-       phree(filename);
-       phree(disposition);
+       free(boundary);
+       free(startary);
+       free(endary);   
+       free(header);
+       free(content_type);
+       free(charset);
+       free(encoding);
+       free(content_type_name);
+       free(content_disposition_name);
+       free(filename);
+       free(disposition);
  }
  
  
@@ -520,6 +532,7 @@ void mime_parser(char *content_start,
                    char *cbdisp,
                    void *cbcontent,
                    char *cbtype,
+                  char *cbcharset,
                    size_t cblength,
                    char *cbencoding,
                    void *cbuserdata),
@@ -531,6 +544,7 @@ void mime_parser(char *content_start,
                    char *cbdisp,
                    void *cbcontent,
                    char *cbtype,
+                  char *cbcharset,
                    size_t cblength,
                    char *cbencoding,
                    void *cbuserdata),
@@ -542,6 +556,7 @@ void mime_parser(char *content_start,
                    char *cbdisp,
                    void *cbcontent,
                    char *cbtype,
+                  char *cbcharset,
                    size_t cblength,
                    char *cbencoding,
                    void *cbuserdata),
@@ -557,4 +572,3 @@ void mime_parser(char *content_start,
                         PostMultiPartCallBack,
                         userdata, dont_decode);
  }
-
diff --git a/webcit/mime_parser.h b/webcit/mime_parser.h

index 9464f01fae32802f2c5839328204db87260f6f75..b82cd6884798ddd012646e9dcc101e9b0f746283 100644 (file)
--- a/webcit/mime_parser.h
+++ b/webcit/mime_parser.h
@@ -11,21 +11,6 @@
  #define SIZ    4096
  #endif
  
-#ifndef mallok
-#define mallok(x) malloc(x)
-#endif
-
-#ifndef phree
-#define phree(x) free(x)
-#endif
-
-#ifndef reallok
-#define reallok(x,y) realloc(x,y)
-#endif
-
-#ifndef strdoop
-#define strdoop(x) strdup(x)
-#endif
  
  /* 
   * Declarations for functions in the parser
@@ -41,6 +26,7 @@ void mime_parser(char *content_start, char *content_end,
                         char *cbdisp,
                         void *cbcontent,
                         char *cbtype,
+                       char *cbcharset,
                         size_t cblength,
                         char *cbencoding,
                         void *cbuserdata),
@@ -51,6 +37,7 @@ void mime_parser(char *content_start, char *content_end,
                         char *cbdisp,
                         void *cbcontent,
                         char *cbtype,
+                       char *cbcharset,
                         size_t cblength,
                         char *cbencoding,
                         void *cbuserdata),
@@ -61,6 +48,7 @@ void mime_parser(char *content_start, char *content_end,
                         char *cbdisp,
                         void *cbcontent,
                         char *cbtype,
+                       char *cbcharset,
                         size_t cblength,
                         char *cbencoding,
                         void *cbuserdata),
diff --git a/webcit/tools.c b/webcit/tools.c

index 86fd68ec1e351f47eeddf8fc8e98fac886f32478..041fe027e853c0fb28d78807867f62acc5cfe43b 100644 (file)
--- a/webcit/tools.c
+++ b/webcit/tools.c
@@ -494,3 +494,69 @@ void generate_uuid(char *buf) {
         );
  }
  
+
+
+/*
+ * bmstrstr() is a variant of strstr() that uses the Boyer-Moore search
+ * algorithm, and can use any caller-supplied string compare function whose
+ * calling syntax is similar to strncmp().  For example, we can supply it
+ * with strncasecmp() to do a case-insensitive search.
+ * 
+ * Original code: copyright (c) 1997-1998 by Urs Janssen <urs@tin.org>
+ * Modifications: copyright (c) 2003 by Art Cancro <ajc@uncensored.citadel.org>
+ */
+char *bmstrstr(char *text, char *pattern,
+       int (*cmpfunc)(const char *, const char *, size_t) )
+{
+       register unsigned char *p, *t;
+       register int i, j, *delta;
+       register size_t p1;
+       int deltaspace[256];
+       size_t textlen;
+       size_t patlen;
+
+       if (text == NULL) return(NULL);
+       if (pattern == NULL) return(NULL);
+
+       textlen = strlen(text);
+       patlen = strlen(pattern);
+
+       /* algorithm fails if pattern is empty */
+       if ((p1 = patlen) == 0)
+               return (text);
+
+       /* code below fails (whenever i is unsigned) if pattern too long */
+       if (p1 > textlen)
+               return (NULL);
+
+       /* set up deltas */
+       delta = deltaspace;
+       for (i = 0; i <= 255; i++)
+               delta[i] = p1;
+       for (p = (unsigned char *) pattern, i = p1; --i > 0;)
+               delta[*p++] = i;
+
+       /*
+        * From now on, we want patlen - 1.
+        * In the loop below, p points to the end of the pattern,
+        * t points to the end of the text to be tested against the
+        * pattern, and i counts the amount of text remaining, not
+        * including the part to be tested.
+        */
+       p1--;
+       p = (unsigned char *) pattern + p1;
+       t = (unsigned char *) text + p1;
+       i = textlen - patlen;
+       while (1) {
+               if (tolower(*p) == tolower(*t)
+                  && cmpfunc((p - p1), (t - p1), p1) == 0)
+                       return ((char *) t - p1);
+               j = delta[*t];
+               if (i < j)
+                       break;
+               i -= j;
+               t += j;
+       }
+       return (NULL);
+}
+
diff --git a/webcit/webcit.c b/webcit/webcit.c

index e4570532a2408763ca5afc02c73c8ac60e3bda79..22c62ca43b4fad6371f0d53b39642f330df1234d 100644 (file)
--- a/webcit/webcit.c
+++ b/webcit/webcit.c
@@ -817,7 +817,7 @@ void extract_action(char *actbuf, char *cmdbuf)
  
  
  void upload_handler(char *name, char *filename, char *partnum, char *disp,
-                       void *content, char *cbtype, size_t length,
+                       void *content, char *cbtype, char *cbcharset, size_t length,
                         char *encoding, void *userdata)
  {
         struct urlcontent *u;
diff --git a/webcit/webcit.h b/webcit/webcit.h

index 19d62976a9c6cadf4b63bd1c563f3f8ccf9d89b5..699854449c06466af4a84f16c37f35a62c076cd2 100644 (file)
--- a/webcit/webcit.h
+++ b/webcit/webcit.h
@@ -274,6 +274,8 @@ void cookie_to_stuff(char *cookie, int *session,
                  char *user, size_t user_len,
                  char *pass, size_t pass_len,
                  char *room, size_t room_len);
+char *bmstrstr(char *text, char *pattern,
+        int (*cmpfunc)(const char *, const char *, size_t) );
  void locate_host(char *, int);
  void become_logged_in(char *, char *, char *);
  void do_login(void);
author	Art Cancro <ajc@citadel.org>
	Tue, 19 Jul 2005 04:15:45 +0000 (04:15 +0000)
committer	Art Cancro <ajc@citadel.org>
	Tue, 19 Jul 2005 04:15:45 +0000 (04:15 +0000)
webcit/ChangeLog		patch \| blob \| history
webcit/mime_parser.c		patch \| blob \| history
webcit/mime_parser.h		patch \| blob \| history
webcit/tools.c		patch \| blob \| history
webcit/webcit.c		patch \| blob \| history
webcit/webcit.h		patch \| blob \| history