* New bmstrcasestr() function to perform very fast case-insensitive
authorArt Cancro <ajc@citadel.org>
Fri, 9 Sep 2005 19:44:21 +0000 (19:44 +0000)
committerArt Cancro <ajc@citadel.org>
Fri, 9 Sep 2005 19:44:21 +0000 (19:44 +0000)
  substring searches using the Boyer-Moore algorithm.  Like its predecessor,
  it is based on the one written by Urs Jannsen; unlike its predecessor, it
  actually works.

citadel/ChangeLog
citadel/imap_search.c
citadel/internet_addressing.c
citadel/msgbase.c
citadel/serv_imap.c
citadel/tools.c
citadel/tools.h
citadel/user_ops.c

index 6e0a53a9d9170897448b07ae2337e7ccd244d39c..b01d670920abaa57d8cecbcde4e0a35fba6dd3a5 100644 (file)
@@ -1,4 +1,10 @@
 $Log$
+Revision 654.25  2005/09/09 19:44:21  ajc
+* New bmstrcasestr() function to perform very fast case-insensitive
+  substring searches using the Boyer-Moore algorithm.  Like its predecessor,
+  it is based on the one written by Urs Jannsen; unlike its predecessor, it
+  actually works.
+
 Revision 654.24  2005/09/08 03:25:09  ajc
 * messages.c: break up long lines (preferably by substituting spaces with
   newlines) to avoid sending messages to the server containing lines with
@@ -7089,4 +7095,3 @@ Sat Jul 11 00:20:48 EDT 1998 Nathan Bryant <bryant@cs.usm.maine.edu>
 
 Fri Jul 10 1998 Art Cancro <ajc@uncensored.citadel.org>
        * Initial CVS import
-
index 905a84a8541b9ac67088192a288eb75ed9e850fb..50bf6dd785fe3f1f0aab9d082697810e497c3db8 100644 (file)
@@ -113,7 +113,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                }
                fieldptr = rfc822_fetch_field(msg->cm_fields['M'], "Bcc");
                if (fieldptr != NULL) {
-                       if (strcasestr(fieldptr, itemlist[pos+1])) {
+                       if (bmstrcasestr(fieldptr, itemlist[pos+1])) {
                                match = 1;
                        }
                        free(fieldptr);
@@ -140,7 +140,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                        msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1);
                        need_to_free_msg = 1;
                }
-               if (strcasestr(msg->cm_fields['M'], itemlist[pos+1])) {
+               if (bmstrcasestr(msg->cm_fields['M'], itemlist[pos+1])) {
                        match = 1;
                }
                pos += 2;
@@ -153,7 +153,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                }
                fieldptr = rfc822_fetch_field(msg->cm_fields['M'], "Cc");
                if (fieldptr != NULL) {
-                       if (strcasestr(fieldptr, itemlist[pos+1])) {
+                       if (bmstrcasestr(fieldptr, itemlist[pos+1])) {
                                match = 1;
                        }
                        free(fieldptr);
@@ -187,7 +187,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                        msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1);
                        need_to_free_msg = 1;
                }
-               if (strcasestr(msg->cm_fields['A'], itemlist[pos+1])) {
+               if (bmstrcasestr(msg->cm_fields['A'], itemlist[pos+1])) {
                        match = 1;
                }
                pos += 2;
@@ -328,7 +328,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                        msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1);
                        need_to_free_msg = 1;
                }
-               if (strcasestr(msg->cm_fields['U'], itemlist[pos+1])) {
+               if (bmstrcasestr(msg->cm_fields['U'], itemlist[pos+1])) {
                        match = 1;
                }
                pos += 2;
@@ -340,7 +340,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                        need_to_free_msg = 1;
                }
                for (i='A'; i<='Z'; ++i) {
-                       if (strcasestr(msg->cm_fields[i], itemlist[pos+1])) {
+                       if (bmstrcasestr(msg->cm_fields[i], itemlist[pos+1])) {
                                match = 1;
                        }
                }
@@ -352,7 +352,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                        msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1);
                        need_to_free_msg = 1;
                }
-               if (strcasestr(msg->cm_fields['R'], itemlist[pos+1])) {
+               if (bmstrcasestr(msg->cm_fields['R'], itemlist[pos+1])) {
                        match = 1;
                }
                pos += 2;
index e53954dfd4024af0b23a5316c5a127ace4a03cfd..0a0de73a1054b19e091df3c5c845f5b5a8fc2ddb 100644 (file)
@@ -491,13 +491,13 @@ char *rfc822_fetch_field(char *rfc822, char *fieldname) {
        snprintf(fieldhdr, sizeof fieldhdr, "%s:", fieldname);
 
        /* Locate the end of the headers, so we don't run past that point */
-       end_of_headers = strcasestr(rfc822, "\n\r\n");
+       end_of_headers = bmstrcasestr(rfc822, "\n\r\n");
        if (end_of_headers == NULL) {
-               end_of_headers = strcasestr(rfc822, "\n\n");
+               end_of_headers = bmstrcasestr(rfc822, "\n\n");
        }
        if (end_of_headers == NULL) return (NULL);
 
-       field_start = strcasestr(rfc822, fieldhdr);
+       field_start = bmstrcasestr(rfc822, fieldhdr);
        if (field_start == NULL) return(NULL);
        if (field_start > end_of_headers) return(NULL);
 
index 07accb07b810a20443775e7a9a44eaac7eed8865..7544cfadbf067c54abee9160e4f69e5d44e7a481 100644 (file)
@@ -2109,7 +2109,7 @@ long CtdlSubmitMsg(struct CtdlMessage *msg,       /* message to save */
                break;
        case 4:
                strcpy(content_type, "text/plain");
-               mptr = strcasestr(msg->cm_fields['M'], "Content-type: ");
+               mptr = bmstrcasestr(msg->cm_fields['M'], "Content-type: ");
                if (mptr != NULL) {
                        safestrncpy(content_type, &mptr[14], 
                                        sizeof content_type);
index c297ac7886c573c167bc182259996a02367f8bcb..b81732a6885a8d8fd6b6087bc9f12ebc91eb6a81 100644 (file)
@@ -1386,7 +1386,7 @@ void imap_command_loop(void)
        if (IMAP->authstate == imap_as_expecting_password) {
                lprintf(CTDL_INFO, "IMAP: <password>\n");
        }
-       else if (strcasestr(cmdbuf, " LOGIN ")) {
+       else if (bmstrcasestr(cmdbuf, " LOGIN ")) {
                lprintf(CTDL_INFO, "IMAP: LOGIN...\n");
        }
        else {
index 69cb23d443d2382c015af575854250154f85248f..b1c4955327383216185e18a446e41f5ea049b810 100644 (file)
@@ -576,3 +576,65 @@ void generate_uuid(char *buf) {
        );
 }
 
+/*
+ * bmstrcasestr() -- case-insensitive substring search
+ *
+ * This uses the Boyer-Moore search algorithm and is therefore quite fast.
+ * The code is roughly based on the strstr() replacement from 'tin' written
+ * by Urs Jannsen.
+ */
+char *bmstrcasestr(char *text, char *pattern) {
+
+       register unsigned char *p, *t;
+       register int i, j, *delta;
+       register size_t p1;
+       int deltaspace[256];
+       size_t textlen;
+       size_t patlen;
+
+       textlen = strlen (text);
+       patlen = strlen (pattern);
+
+       /* algorithm fails if pattern is empty */
+       if ((p1 = patlen) == 0)
+               return (text);
+
+       /* code below fails (whenever i is unsigned) if pattern too long */
+       if (p1 > textlen)
+               return (NULL);
+
+       /* set up deltas */
+       delta = deltaspace;
+       for (i = 0; i <= 255; i++)
+               delta[i] = p1;
+       for (p = (unsigned char *) pattern, i = p1; --i > 0;)
+               delta[tolower(*p++)] = i;
+
+       /*
+        * From now on, we want patlen - 1.
+        * In the loop below, p points to the end of the pattern,
+        * t points to the end of the text to be tested against the
+        * pattern, and i counts the amount of text remaining, not
+        * including the part to be tested.
+        */
+       p1--;
+       p = (unsigned char *) pattern + p1;
+       t = (unsigned char *) text + p1;
+       i = textlen - patlen;
+       while(1) {
+               if (tolower(p[0]) == tolower(t[0])) {
+                       if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
+                               return ((char *)t - p1);
+                       }
+               }
+               j = delta[tolower(t[0])];
+               if (i < j)
+                       break;
+               i -= j;
+               t += j;
+       }
+       return (NULL);
+}
+
+
+
index cb63f75bd7febc7c3782c5ee4ae10b5d8f57941b..41bfb94073db9815caacdd18af65d843a1108a39 100644 (file)
@@ -31,3 +31,4 @@ char *CtdlTempFileName(char *prefix1, int prefix2);
 FILE *CtdlTempFile(void);
 char *ascmonths[12];
 void generate_uuid(char *buf);
+char *bmstrcasestr(char *text, char *pattern);
index abb388924a2fbafbf9e38714b8a590d4ec3c959b..d5880127c1ef38229e692175a7eefbcdc42c4a72 100644 (file)
@@ -1370,7 +1370,7 @@ void ListThisUser(struct ctdluser *usbuf, void *data)
        char *searchstring;
 
        searchstring = (char *)data;
-       if (strcasestr(usbuf->fullname, searchstring) == NULL) {
+       if (bmstrcasestr(usbuf->fullname, searchstring) == NULL) {
                return;
        }