* Brought over the newest version of the MIME parser from Citadel,
authorArt Cancro <ajc@citadel.org>
Tue, 19 Jul 2005 04:15:45 +0000 (04:15 +0000)
committerArt Cancro <ajc@citadel.org>
Tue, 19 Jul 2005 04:15:45 +0000 (04:15 +0000)
  containing an API that supports arbitrary character sets.

webcit/ChangeLog
webcit/mime_parser.c
webcit/mime_parser.h
webcit/tools.c
webcit/webcit.c
webcit/webcit.h

index 8516b132a4d6a07491ec2839c41555b7d98a520e..a147319bb56b75999b501af4e0276707380d1081 100644 (file)
@@ -1,4 +1,8 @@
 $Log$
+Revision 619.25  2005/07/19 04:15:45  ajc
+* Brought over the newest version of the MIME parser from Citadel,
+  containing an API that supports arbitrary character sets.
+
 Revision 619.24  2005/07/14 04:21:53  ajc
 * More intelligent JavaScript to handle the greying out of end date and
   start/end times when "all day event" is selected.
@@ -2703,3 +2707,4 @@ Sun Dec  6 19:50:55 EST 1998 Art Cancro <ajc@uncnsrd.mt-kisco.ny.us>
 
 1998-12-03 Nathan Bryant <bryant@cs.usm.maine.edu>
        * webserver.c: warning fix
+
index 7067bb1a8d2d684ee470260cda225e7629d582b3..07d89e8a0d8006654661e6121278744df824d9b1 100644 (file)
@@ -3,41 +3,23 @@
  *
  * This is the MIME parser for Citadel.  Sometimes it actually works.
  *
- * Copyright (c) 1998-2003 by Art Cancro
+ * Copyright (c) 1998-2005 by Art Cancro
  * This code is distributed under the terms of the GNU General Public License.
  *
  */
 
-#include <ctype.h>
+
 #include <stdlib.h>
-#ifdef HAVE_UNISTD_H
 #include <unistd.h>
-#endif
 #include <stdio.h>
-#ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
 #include <signal.h>
 #include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/socket.h>
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#ifdef HAVE_LIMITS_H
-#include <limits.h>
-#endif
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <netdb.h>
+#include <ctype.h>
 #include <string.h>
-#include <pwd.h>
+#include <sys/stat.h>
 #include <errno.h>
-#include <stdarg.h>
-#include <pthread.h>
-#include <signal.h>
+
 #include "webcit.h"
-#include "webserver.h"
 
 #include "mime_parser.h"
 
@@ -82,7 +64,7 @@ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
        char buf[SIZ];
        int buf_length = 0;
        int soft_line_break = 0;
-       int ch;
+       unsigned int ch;
        int decoded_length = 0;
        int i;
 
@@ -145,7 +127,7 @@ int CtdlDecodeQuotedPrintable(char *decoded, char *encoded, int sourcelen) {
  */
 void mime_decode(char *partnum,
                 char *part_start, size_t length,
-                char *content_type, char *encoding,
+                char *content_type, char *charset, char *encoding,
                 char *disposition,
                 char *name, char *filename,
                 void (*CallBack)
@@ -155,6 +137,7 @@ void mime_decode(char *partnum,
                   char *cbdisp,
                   void *cbcontent,
                   char *cbtype,
+                  char *cbcharset,
                   size_t cblength,
                   char *cbencoding,
                   void *cbuserdata),
@@ -165,6 +148,7 @@ void mime_decode(char *partnum,
                   char *cbdisp,
                   void *cbcontent,
                   char *cbtype,
+                  char *cbcharset,
                   size_t cblength,
                   char *cbencoding,
                   void *cbuserdata),
@@ -175,6 +159,7 @@ void mime_decode(char *partnum,
                   char *cbdisp,
                   void *cbcontent,
                   char *cbtype,
+                  char *cbcharset,
                   size_t cblength,
                   char *cbencoding,
                   void *cbuserdata),
@@ -199,7 +184,7 @@ void mime_decode(char *partnum,
                if (CallBack != NULL) {
                        CallBack(name, filename, fixed_partnum(partnum),
                                disposition, part_start,
-                               content_type, length, encoding, userdata);
+                               content_type, charset, length, encoding, userdata);
                        }
                return;
        }
@@ -214,7 +199,7 @@ void mime_decode(char *partnum,
         * will never be larger than the encoded data.  This is a safe
         * assumption with base64, uuencode, and quoted-printable.
         */
-       decoded = mallok(length+2048);
+       decoded = malloc(length+2048);
        if (decoded == NULL) {
                return;
        }
@@ -230,10 +215,10 @@ void mime_decode(char *partnum,
        if (bytes_decoded > 0) if (CallBack != NULL) {
                CallBack(name, filename, fixed_partnum(partnum),
                        disposition, decoded,
-                       content_type, bytes_decoded, "binary", userdata);
+                       content_type, charset, bytes_decoded, "binary", userdata);
        }
 
-       phree(decoded);
+       free(decoded);
 }
 
 /*
@@ -251,6 +236,7 @@ void the_mime_parser(char *partnum,
                       char *cbdisp,
                       void *cbcontent,
                       char *cbtype,
+                      char *cbcharset,
                       size_t cblength,
                       char *cbencoding,
                       void *cbuserdata),
@@ -261,6 +247,7 @@ void the_mime_parser(char *partnum,
                       char *cbdisp,
                       void *cbcontent,
                       char *cbtype,
+                      char *cbcharset,
                       size_t cblength,
                       char *cbencoding,
                       void *cbuserdata),
@@ -271,6 +258,7 @@ void the_mime_parser(char *partnum,
                       char *cbdisp,
                       void *cbcontent,
                       char *cbtype,
+                      char *cbcharset,
                       size_t cblength,
                       char *cbencoding,
                       void *cbuserdata),
@@ -286,7 +274,9 @@ void the_mime_parser(char *partnum,
        char *boundary;
        char *startary;
        char *endary;
+       char *next_boundary;
        char *content_type;
+       char *charset;
        size_t content_length;
        char *encoding;
        char *disposition;
@@ -303,34 +293,37 @@ void the_mime_parser(char *partnum,
        ptr = content_start;
        content_length = 0;
 
-       boundary = mallok(SIZ);
+       boundary = malloc(SIZ);
        memset(boundary, 0, SIZ);
 
-       startary = mallok(SIZ);
+       startary = malloc(SIZ);
        memset(startary, 0, SIZ);
 
-       endary = mallok(SIZ);
+       endary = malloc(SIZ);
        memset(endary, 0, SIZ);
 
-       header = mallok(SIZ);
+       header = malloc(SIZ);
        memset(header, 0, SIZ);
 
-       content_type = mallok(SIZ);
+       content_type = malloc(SIZ);
        memset(content_type, 0, SIZ);
 
-       encoding = mallok(SIZ);
+       charset = malloc(SIZ);
+       memset(charset, 0, SIZ);
+
+       encoding = malloc(SIZ);
        memset(encoding, 0, SIZ);
 
-       content_type_name = mallok(SIZ);
+       content_type_name = malloc(SIZ);
        memset(content_type_name, 0, SIZ);
 
-       content_disposition_name = mallok(SIZ);
+       content_disposition_name = malloc(SIZ);
        memset(content_disposition_name, 0, SIZ);
 
-       filename = mallok(SIZ);
+       filename = malloc(SIZ);
        memset(filename, 0, SIZ);
 
-       disposition = mallok(SIZ);
+       disposition = malloc(SIZ);
        memset(disposition, 0, SIZ);
 
        /* If the caller didn't supply an endpointer, generate one by measure */
@@ -346,13 +339,17 @@ void the_mime_parser(char *partnum,
                        goto end_parser;
                }
 
-               for (i = 0; i < strlen(buf); ++i)
-                       if (isspace(buf[i]))
+               for (i = 0; i < strlen(buf); ++i) {
+                       if (isspace(buf[i])) {
                                buf[i] = ' ';
+                       }
+               }
+
                if (!isspace(buf[0])) {
                        if (!strncasecmp(header, "Content-type: ", 14)) {
                                strcpy(content_type, &header[14]);
                                extract_key(content_type_name, content_type, "name");
+                               extract_key(charset, content_type, "charset");
                                /* Deal with weird headers */
                                if (strchr(content_type, ' '))
                                        *(strchr(content_type, ' ')) = '\0';
@@ -385,6 +382,13 @@ void the_mime_parser(char *partnum,
                *(strchr(content_type, ';')) = '\0';
        striplt(content_type);
 
+       if (!strlen(content_type)) {
+               strcpy(content_type, "text/plain");
+       }
+       if (!strlen(charset)) {
+               strcpy(charset, "us-ascii");
+       }
+
        if (strlen(boundary) > 0) {
                is_multipart = 1;
        } else {
@@ -398,56 +402,63 @@ void the_mime_parser(char *partnum,
                /* Tell the client about this message's multipartedness */
                if (PreMultiPartCallBack != NULL) {
                        PreMultiPartCallBack("", "", partnum, "",
-                               NULL, content_type,
+                               NULL, content_type, charset,
                                0, encoding, userdata);
                }
 
                /* Figure out where the boundaries are */
                snprintf(startary, SIZ, "--%s", boundary);
                snprintf(endary, SIZ, "--%s--", boundary);
+
+               part_start = NULL;
                do {
-                       if ( (!strncasecmp(ptr, startary, strlen(startary)))
-                          || (!strncasecmp(ptr, endary, strlen(endary))) ) {
-                               if (part_start != NULL) {
-                                       if (strlen(partnum) > 0) {
-                                               snprintf(nested_partnum,
-                                                        sizeof nested_partnum,
-                                                        "%s.%d", partnum,
-                                                        ++part_seq);
-                                       }
-                                       else {
-                                               snprintf(nested_partnum,
-                                                        sizeof nested_partnum,
-                                                        "%d", ++part_seq);
-                                       }
-                                       the_mime_parser(nested_partnum,
-                                                   part_start, part_end,
-                                                       CallBack,
-                                                       PreMultiPartCallBack,
-                                                       PostMultiPartCallBack,
-                                                       userdata,
-                                                       dont_decode);
+                       next_boundary = bmstrstr(ptr, startary, strncmp);
+                       if ( (part_start != NULL) && (next_boundary != NULL) ) {
+                               part_end = next_boundary;
+                               --part_end;
+
+                               if (strlen(partnum) > 0) {
+                                       snprintf(nested_partnum,
+                                                sizeof nested_partnum,
+                                                "%s.%d", partnum,
+                                                ++part_seq);
                                }
-                               ptr = memreadline(ptr, buf, SIZ);
-                               part_start = ptr;
-                       }
-                       else {
-                               part_end = ptr;
-                               ++ptr;
+                               else {
+                                       snprintf(nested_partnum,
+                                                sizeof nested_partnum,
+                                                "%d", ++part_seq);
+                               }
+                               the_mime_parser(nested_partnum,
+                                           part_start, part_end,
+                                               CallBack,
+                                               PreMultiPartCallBack,
+                                               PostMultiPartCallBack,
+                                               userdata,
+                                               dont_decode);
                        }
-                       /* If we pass out of scope in the MIME multipart (by
-                        * hitting the end boundary), force the pointer out
-                        * of scope so this loop ends.
-                        */
-                       if (ptr < content_end) {
-                               if (!strcasecmp(ptr, endary)) {
-                                       ptr = content_end++;
+
+                       if (next_boundary != NULL) {
+                               /* If we pass out of scope, don't attempt to read
+                                * past the end boundary. */
+                               if (!strcmp(next_boundary, endary)) {
+                                       ptr = content_end;
                                }
+                               else {
+                                       /* Set up for the next part. */
+                                       part_start = strstr(next_boundary, "\n");
+                                       ++part_start;
+                                       ptr = part_start;
+                               }
+                       }
+                       else {
+                               /* Invalid end of multipart.  Bail out! */
+                               ptr = content_end;
                        }
-               } while (ptr <= content_end);
+               } while ( (ptr < content_end) && (next_boundary != NULL) );
+
                if (PostMultiPartCallBack != NULL) {
                        PostMultiPartCallBack("", "", partnum, "", NULL,
-                               content_type, 0, encoding, userdata);
+                               content_type, charset, 0, encoding, userdata);
                }
                goto end_parser;
        }
@@ -461,9 +472,9 @@ void the_mime_parser(char *partnum,
                        ++length;
                }
                part_end = content_end;
-               /* fix an off-by-one error */
-               --part_end;
-               --length;
+                /* fix an off-by-one error */
+                --part_end;
+                --length;
                
                /* Truncate if the header told us to */
                if ( (content_length > 0) && (length > content_length) ) {
@@ -483,23 +494,24 @@ void the_mime_parser(char *partnum,
                
                mime_decode(partnum,
                            part_start, length,
-                           content_type, encoding, disposition,
+                           content_type, charset, encoding, disposition,
                            name, filename,
                            CallBack, NULL, NULL,
                            userdata, dont_decode);
        }
 
 end_parser:    /* free the buffers!  end the oppression!! */
-       phree(boundary);
-       phree(startary);
-       phree(endary);  
-       phree(header);
-       phree(content_type);
-       phree(encoding);
-       phree(content_type_name);
-       phree(content_disposition_name);
-       phree(filename);
-       phree(disposition);
+       free(boundary);
+       free(startary);
+       free(endary);   
+       free(header);
+       free(content_type);
+       free(charset);
+       free(encoding);
+       free(content_type_name);
+       free(content_disposition_name);
+       free(filename);
+       free(disposition);
 }
 
 
@@ -520,6 +532,7 @@ void mime_parser(char *content_start,
                   char *cbdisp,
                   void *cbcontent,
                   char *cbtype,
+                  char *cbcharset,
                   size_t cblength,
                   char *cbencoding,
                   void *cbuserdata),
@@ -531,6 +544,7 @@ void mime_parser(char *content_start,
                   char *cbdisp,
                   void *cbcontent,
                   char *cbtype,
+                  char *cbcharset,
                   size_t cblength,
                   char *cbencoding,
                   void *cbuserdata),
@@ -542,6 +556,7 @@ void mime_parser(char *content_start,
                   char *cbdisp,
                   void *cbcontent,
                   char *cbtype,
+                  char *cbcharset,
                   size_t cblength,
                   char *cbencoding,
                   void *cbuserdata),
@@ -557,4 +572,3 @@ void mime_parser(char *content_start,
                        PostMultiPartCallBack,
                        userdata, dont_decode);
 }
-
index 9464f01fae32802f2c5839328204db87260f6f75..b82cd6884798ddd012646e9dcc101e9b0f746283 100644 (file)
 #define SIZ    4096
 #endif
 
-#ifndef mallok
-#define mallok(x) malloc(x)
-#endif
-
-#ifndef phree
-#define phree(x) free(x)
-#endif
-
-#ifndef reallok
-#define reallok(x,y) realloc(x,y)
-#endif
-
-#ifndef strdoop
-#define strdoop(x) strdup(x)
-#endif
 
 /* 
  * Declarations for functions in the parser
@@ -41,6 +26,7 @@ void mime_parser(char *content_start, char *content_end,
                        char *cbdisp,
                        void *cbcontent,
                        char *cbtype,
+                       char *cbcharset,
                        size_t cblength,
                        char *cbencoding,
                        void *cbuserdata),
@@ -51,6 +37,7 @@ void mime_parser(char *content_start, char *content_end,
                        char *cbdisp,
                        void *cbcontent,
                        char *cbtype,
+                       char *cbcharset,
                        size_t cblength,
                        char *cbencoding,
                        void *cbuserdata),
@@ -61,6 +48,7 @@ void mime_parser(char *content_start, char *content_end,
                        char *cbdisp,
                        void *cbcontent,
                        char *cbtype,
+                       char *cbcharset,
                        size_t cblength,
                        char *cbencoding,
                        void *cbuserdata),
index 86fd68ec1e351f47eeddf8fc8e98fac886f32478..041fe027e853c0fb28d78807867f62acc5cfe43b 100644 (file)
@@ -494,3 +494,69 @@ void generate_uuid(char *buf) {
        );
 }
 
+
+
+/*
+ * bmstrstr() is a variant of strstr() that uses the Boyer-Moore search
+ * algorithm, and can use any caller-supplied string compare function whose
+ * calling syntax is similar to strncmp().  For example, we can supply it
+ * with strncasecmp() to do a case-insensitive search.
+ * 
+ * Original code: copyright (c) 1997-1998 by Urs Janssen <urs@tin.org>
+ * Modifications: copyright (c) 2003 by Art Cancro <ajc@uncensored.citadel.org>
+ */
+char *bmstrstr(char *text, char *pattern,
+       int (*cmpfunc)(const char *, const char *, size_t) )
+{
+       register unsigned char *p, *t;
+       register int i, j, *delta;
+       register size_t p1;
+       int deltaspace[256];
+       size_t textlen;
+       size_t patlen;
+
+       if (text == NULL) return(NULL);
+       if (pattern == NULL) return(NULL);
+
+       textlen = strlen(text);
+       patlen = strlen(pattern);
+
+       /* algorithm fails if pattern is empty */
+       if ((p1 = patlen) == 0)
+               return (text);
+
+       /* code below fails (whenever i is unsigned) if pattern too long */
+       if (p1 > textlen)
+               return (NULL);
+
+       /* set up deltas */
+       delta = deltaspace;
+       for (i = 0; i <= 255; i++)
+               delta[i] = p1;
+       for (p = (unsigned char *) pattern, i = p1; --i > 0;)
+               delta[*p++] = i;
+
+       /*
+        * From now on, we want patlen - 1.
+        * In the loop below, p points to the end of the pattern,
+        * t points to the end of the text to be tested against the
+        * pattern, and i counts the amount of text remaining, not
+        * including the part to be tested.
+        */
+       p1--;
+       p = (unsigned char *) pattern + p1;
+       t = (unsigned char *) text + p1;
+       i = textlen - patlen;
+       while (1) {
+               if (tolower(*p) == tolower(*t)
+                  && cmpfunc((p - p1), (t - p1), p1) == 0)
+                       return ((char *) t - p1);
+               j = delta[*t];
+               if (i < j)
+                       break;
+               i -= j;
+               t += j;
+       }
+       return (NULL);
+}
+
index e4570532a2408763ca5afc02c73c8ac60e3bda79..22c62ca43b4fad6371f0d53b39642f330df1234d 100644 (file)
@@ -817,7 +817,7 @@ void extract_action(char *actbuf, char *cmdbuf)
 
 
 void upload_handler(char *name, char *filename, char *partnum, char *disp,
-                       void *content, char *cbtype, size_t length,
+                       void *content, char *cbtype, char *cbcharset, size_t length,
                        char *encoding, void *userdata)
 {
        struct urlcontent *u;
index 19d62976a9c6cadf4b63bd1c563f3f8ccf9d89b5..699854449c06466af4a84f16c37f35a62c076cd2 100644 (file)
@@ -274,6 +274,8 @@ void cookie_to_stuff(char *cookie, int *session,
                 char *user, size_t user_len,
                 char *pass, size_t pass_len,
                 char *room, size_t room_len);
+char *bmstrstr(char *text, char *pattern,
+        int (*cmpfunc)(const char *, const char *, size_t) );
 void locate_host(char *, int);
 void become_logged_in(char *, char *, char *);
 void do_login(void);