* New bmstrcasestr() function to perform very fast case-insensitive

author Art Cancro <ajc@citadel.org>

Fri, 9 Sep 2005 19:44:21 +0000 (19:44 +0000)

committer Art Cancro <ajc@citadel.org>

Fri, 9 Sep 2005 19:44:21 +0000 (19:44 +0000)
author Art Cancro <ajc@citadel.org>
Fri, 9 Sep 2005 19:44:21 +0000 (19:44 +0000)
committer Art Cancro <ajc@citadel.org>
Fri, 9 Sep 2005 19:44:21 +0000 (19:44 +0000)
diff --git a/citadel/ChangeLog b/citadel/ChangeLog

index 6e0a53a9d9170897448b07ae2337e7ccd244d39c..b01d670920abaa57d8cecbcde4e0a35fba6dd3a5 100644 (file)
--- a/citadel/ChangeLog
+++ b/citadel/ChangeLog
@@ -1,4 +1,10 @@
  $Log$
+Revision 654.25  2005/09/09 19:44:21  ajc
+* New bmstrcasestr() function to perform very fast case-insensitive
+  substring searches using the Boyer-Moore algorithm.  Like its predecessor,
+  it is based on the one written by Urs Jannsen; unlike its predecessor, it
+  actually works.
+
  Revision 654.24  2005/09/08 03:25:09  ajc
  * messages.c: break up long lines (preferably by substituting spaces with
    newlines) to avoid sending messages to the server containing lines with
@@ -7089,4 +7095,3 @@ Sat Jul 11 00:20:48 EDT 1998 Nathan Bryant <bryant@cs.usm.maine.edu>
  
  Fri Jul 10 1998 Art Cancro <ajc@uncensored.citadel.org>
         * Initial CVS import
-
diff --git a/citadel/imap_search.c b/citadel/imap_search.c

index 905a84a8541b9ac67088192a288eb75ed9e850fb..50bf6dd785fe3f1f0aab9d082697810e497c3db8 100644 (file)
--- a/citadel/imap_search.c
+++ b/citadel/imap_search.c
@@ -113,7 +113,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                 }
                 fieldptr = rfc822_fetch_field(msg->cm_fields['M'], "Bcc");
                 if (fieldptr != NULL) {
-                       if (strcasestr(fieldptr, itemlist[pos+1])) {
+                       if (bmstrcasestr(fieldptr, itemlist[pos+1])) {
                                 match = 1;
                         }
                         free(fieldptr);
@@ -140,7 +140,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                         msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1);
                         need_to_free_msg = 1;
                 }
-               if (strcasestr(msg->cm_fields['M'], itemlist[pos+1])) {
+               if (bmstrcasestr(msg->cm_fields['M'], itemlist[pos+1])) {
                         match = 1;
                 }
                 pos += 2;
@@ -153,7 +153,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                 }
                 fieldptr = rfc822_fetch_field(msg->cm_fields['M'], "Cc");
                 if (fieldptr != NULL) {
-                       if (strcasestr(fieldptr, itemlist[pos+1])) {
+                       if (bmstrcasestr(fieldptr, itemlist[pos+1])) {
                                 match = 1;
                         }
                         free(fieldptr);
@@ -187,7 +187,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                         msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1);
                         need_to_free_msg = 1;
                 }
-               if (strcasestr(msg->cm_fields['A'], itemlist[pos+1])) {
+               if (bmstrcasestr(msg->cm_fields['A'], itemlist[pos+1])) {
                         match = 1;
                 }
                 pos += 2;
@@ -328,7 +328,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                         msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1);
                         need_to_free_msg = 1;
                 }
-               if (strcasestr(msg->cm_fields['U'], itemlist[pos+1])) {
+               if (bmstrcasestr(msg->cm_fields['U'], itemlist[pos+1])) {
                         match = 1;
                 }
                 pos += 2;
@@ -340,7 +340,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                         need_to_free_msg = 1;
                 }
                 for (i='A'; i<='Z'; ++i) {
-                       if (strcasestr(msg->cm_fields[i], itemlist[pos+1])) {
+                       if (bmstrcasestr(msg->cm_fields[i], itemlist[pos+1])) {
                                 match = 1;
                         }
                 }
@@ -352,7 +352,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg,
                         msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1);
                         need_to_free_msg = 1;
                 }
-               if (strcasestr(msg->cm_fields['R'], itemlist[pos+1])) {
+               if (bmstrcasestr(msg->cm_fields['R'], itemlist[pos+1])) {
                         match = 1;
                 }
                 pos += 2;
diff --git a/citadel/internet_addressing.c b/citadel/internet_addressing.c

index e53954dfd4024af0b23a5316c5a127ace4a03cfd..0a0de73a1054b19e091df3c5c845f5b5a8fc2ddb 100644 (file)
--- a/citadel/internet_addressing.c
+++ b/citadel/internet_addressing.c
@@ -491,13 +491,13 @@ char *rfc822_fetch_field(char *rfc822, char *fieldname) {
         snprintf(fieldhdr, sizeof fieldhdr, "%s:", fieldname);
  
         /* Locate the end of the headers, so we don't run past that point */
-       end_of_headers = strcasestr(rfc822, "\n\r\n");
+       end_of_headers = bmstrcasestr(rfc822, "\n\r\n");
         if (end_of_headers == NULL) {
-               end_of_headers = strcasestr(rfc822, "\n\n");
+               end_of_headers = bmstrcasestr(rfc822, "\n\n");
         }
         if (end_of_headers == NULL) return (NULL);
  
-       field_start = strcasestr(rfc822, fieldhdr);
+       field_start = bmstrcasestr(rfc822, fieldhdr);
         if (field_start == NULL) return(NULL);
         if (field_start > end_of_headers) return(NULL);
  
diff --git a/citadel/msgbase.c b/citadel/msgbase.c

index 07accb07b810a20443775e7a9a44eaac7eed8865..7544cfadbf067c54abee9160e4f69e5d44e7a481 100644 (file)
--- a/citadel/msgbase.c
+++ b/citadel/msgbase.c
@@ -2109,7 +2109,7 @@ long CtdlSubmitMsg(struct CtdlMessage *msg,       /* message to save */
                 break;
         case 4:
                 strcpy(content_type, "text/plain");
-               mptr = strcasestr(msg->cm_fields['M'], "Content-type: ");
+               mptr = bmstrcasestr(msg->cm_fields['M'], "Content-type: ");
                 if (mptr != NULL) {
                         safestrncpy(content_type, &mptr[14], 
                                         sizeof content_type);
diff --git a/citadel/serv_imap.c b/citadel/serv_imap.c

index c297ac7886c573c167bc182259996a02367f8bcb..b81732a6885a8d8fd6b6087bc9f12ebc91eb6a81 100644 (file)
--- a/citadel/serv_imap.c
+++ b/citadel/serv_imap.c
@@ -1386,7 +1386,7 @@ void imap_command_loop(void)
         if (IMAP->authstate == imap_as_expecting_password) {
                 lprintf(CTDL_INFO, "IMAP: <password>\n");
         }
-       else if (strcasestr(cmdbuf, " LOGIN ")) {
+       else if (bmstrcasestr(cmdbuf, " LOGIN ")) {
                 lprintf(CTDL_INFO, "IMAP: LOGIN...\n");
         }
         else {
diff --git a/citadel/tools.c b/citadel/tools.c

index 69cb23d443d2382c015af575854250154f85248f..b1c4955327383216185e18a446e41f5ea049b810 100644 (file)
--- a/citadel/tools.c
+++ b/citadel/tools.c
@@ -576,3 +576,65 @@ void generate_uuid(char *buf) {
         );
  }
  
+/*
+ * bmstrcasestr() -- case-insensitive substring search
+ *
+ * This uses the Boyer-Moore search algorithm and is therefore quite fast.
+ * The code is roughly based on the strstr() replacement from 'tin' written
+ * by Urs Jannsen.
+ */
+char *bmstrcasestr(char *text, char *pattern) {
+
+       register unsigned char *p, *t;
+       register int i, j, *delta;
+       register size_t p1;
+       int deltaspace[256];
+       size_t textlen;
+       size_t patlen;
+
+       textlen = strlen (text);
+       patlen = strlen (pattern);
+
+       /* algorithm fails if pattern is empty */
+       if ((p1 = patlen) == 0)
+               return (text);
+
+       /* code below fails (whenever i is unsigned) if pattern too long */
+       if (p1 > textlen)
+               return (NULL);
+
+       /* set up deltas */
+       delta = deltaspace;
+       for (i = 0; i <= 255; i++)
+               delta[i] = p1;
+       for (p = (unsigned char *) pattern, i = p1; --i > 0;)
+               delta[tolower(*p++)] = i;
+
+       /*
+        * From now on, we want patlen - 1.
+        * In the loop below, p points to the end of the pattern,
+        * t points to the end of the text to be tested against the
+        * pattern, and i counts the amount of text remaining, not
+        * including the part to be tested.
+        */
+       p1--;
+       p = (unsigned char *) pattern + p1;
+       t = (unsigned char *) text + p1;
+       i = textlen - patlen;
+       while(1) {
+               if (tolower(p[0]) == tolower(t[0])) {
+                       if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) {
+                               return ((char *)t - p1);
+                       }
+               }
+               j = delta[tolower(t[0])];
+               if (i < j)
+                       break;
+               i -= j;
+               t += j;
+       }
+       return (NULL);
+}
+
+
+
diff --git a/citadel/tools.h b/citadel/tools.h

index cb63f75bd7febc7c3782c5ee4ae10b5d8f57941b..41bfb94073db9815caacdd18af65d843a1108a39 100644 (file)
--- a/citadel/tools.h
+++ b/citadel/tools.h
@@ -31,3 +31,4 @@ char *CtdlTempFileName(char *prefix1, int prefix2);
  FILE *CtdlTempFile(void);
  char *ascmonths[12];
  void generate_uuid(char *buf);
+char *bmstrcasestr(char *text, char *pattern);
diff --git a/citadel/user_ops.c b/citadel/user_ops.c

index abb388924a2fbafbf9e38714b8a590d4ec3c959b..d5880127c1ef38229e692175a7eefbcdc42c4a72 100644 (file)
--- a/citadel/user_ops.c
+++ b/citadel/user_ops.c
@@ -1370,7 +1370,7 @@ void ListThisUser(struct ctdluser *usbuf, void *data)
         char *searchstring;
  
         searchstring = (char *)data;
-       if (strcasestr(usbuf->fullname, searchstring) == NULL) {
+       if (bmstrcasestr(usbuf->fullname, searchstring) == NULL) {
                 return;
         }
author	Art Cancro <ajc@citadel.org>
	Fri, 9 Sep 2005 19:44:21 +0000 (19:44 +0000)
committer	Art Cancro <ajc@citadel.org>
	Fri, 9 Sep 2005 19:44:21 +0000 (19:44 +0000)
citadel/ChangeLog		patch \| blob \| history
citadel/imap_search.c		patch \| blob \| history
citadel/internet_addressing.c		patch \| blob \| history
citadel/msgbase.c		patch \| blob \| history
citadel/serv_imap.c		patch \| blob \| history
citadel/tools.c		patch \| blob \| history
citadel/tools.h		patch \| blob \| history
citadel/user_ops.c		patch \| blob \| history