From a9438eac305db65f7bbd4baddef8b9c48c9a1746 Mon Sep 17 00:00:00 2001 From: Art Cancro Date: Fri, 9 Sep 2005 19:44:21 +0000 Subject: [PATCH] * New bmstrcasestr() function to perform very fast case-insensitive substring searches using the Boyer-Moore algorithm. Like its predecessor, it is based on the one written by Urs Jannsen; unlike its predecessor, it actually works. --- citadel/ChangeLog | 7 +++- citadel/imap_search.c | 14 ++++---- citadel/internet_addressing.c | 6 ++-- citadel/msgbase.c | 2 +- citadel/serv_imap.c | 2 +- citadel/tools.c | 62 +++++++++++++++++++++++++++++++++++ citadel/tools.h | 1 + citadel/user_ops.c | 2 +- 8 files changed, 82 insertions(+), 14 deletions(-) diff --git a/citadel/ChangeLog b/citadel/ChangeLog index 6e0a53a9d..b01d67092 100644 --- a/citadel/ChangeLog +++ b/citadel/ChangeLog @@ -1,4 +1,10 @@ $Log$ +Revision 654.25 2005/09/09 19:44:21 ajc +* New bmstrcasestr() function to perform very fast case-insensitive + substring searches using the Boyer-Moore algorithm. Like its predecessor, + it is based on the one written by Urs Jannsen; unlike its predecessor, it + actually works. + Revision 654.24 2005/09/08 03:25:09 ajc * messages.c: break up long lines (preferably by substituting spaces with newlines) to avoid sending messages to the server containing lines with @@ -7089,4 +7095,3 @@ Sat Jul 11 00:20:48 EDT 1998 Nathan Bryant Fri Jul 10 1998 Art Cancro * Initial CVS import - diff --git a/citadel/imap_search.c b/citadel/imap_search.c index 905a84a85..50bf6dd78 100644 --- a/citadel/imap_search.c +++ b/citadel/imap_search.c @@ -113,7 +113,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg, } fieldptr = rfc822_fetch_field(msg->cm_fields['M'], "Bcc"); if (fieldptr != NULL) { - if (strcasestr(fieldptr, itemlist[pos+1])) { + if (bmstrcasestr(fieldptr, itemlist[pos+1])) { match = 1; } free(fieldptr); @@ -140,7 +140,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg, msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1); need_to_free_msg = 1; } - if (strcasestr(msg->cm_fields['M'], itemlist[pos+1])) { + if (bmstrcasestr(msg->cm_fields['M'], itemlist[pos+1])) { match = 1; } pos += 2; @@ -153,7 +153,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg, } fieldptr = rfc822_fetch_field(msg->cm_fields['M'], "Cc"); if (fieldptr != NULL) { - if (strcasestr(fieldptr, itemlist[pos+1])) { + if (bmstrcasestr(fieldptr, itemlist[pos+1])) { match = 1; } free(fieldptr); @@ -187,7 +187,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg, msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1); need_to_free_msg = 1; } - if (strcasestr(msg->cm_fields['A'], itemlist[pos+1])) { + if (bmstrcasestr(msg->cm_fields['A'], itemlist[pos+1])) { match = 1; } pos += 2; @@ -328,7 +328,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg, msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1); need_to_free_msg = 1; } - if (strcasestr(msg->cm_fields['U'], itemlist[pos+1])) { + if (bmstrcasestr(msg->cm_fields['U'], itemlist[pos+1])) { match = 1; } pos += 2; @@ -340,7 +340,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg, need_to_free_msg = 1; } for (i='A'; i<='Z'; ++i) { - if (strcasestr(msg->cm_fields[i], itemlist[pos+1])) { + if (bmstrcasestr(msg->cm_fields[i], itemlist[pos+1])) { match = 1; } } @@ -352,7 +352,7 @@ int imap_do_search_msg(int seq, struct CtdlMessage *supplied_msg, msg = CtdlFetchMessage(IMAP->msgids[seq-1], 1); need_to_free_msg = 1; } - if (strcasestr(msg->cm_fields['R'], itemlist[pos+1])) { + if (bmstrcasestr(msg->cm_fields['R'], itemlist[pos+1])) { match = 1; } pos += 2; diff --git a/citadel/internet_addressing.c b/citadel/internet_addressing.c index e53954dfd..0a0de73a1 100644 --- a/citadel/internet_addressing.c +++ b/citadel/internet_addressing.c @@ -491,13 +491,13 @@ char *rfc822_fetch_field(char *rfc822, char *fieldname) { snprintf(fieldhdr, sizeof fieldhdr, "%s:", fieldname); /* Locate the end of the headers, so we don't run past that point */ - end_of_headers = strcasestr(rfc822, "\n\r\n"); + end_of_headers = bmstrcasestr(rfc822, "\n\r\n"); if (end_of_headers == NULL) { - end_of_headers = strcasestr(rfc822, "\n\n"); + end_of_headers = bmstrcasestr(rfc822, "\n\n"); } if (end_of_headers == NULL) return (NULL); - field_start = strcasestr(rfc822, fieldhdr); + field_start = bmstrcasestr(rfc822, fieldhdr); if (field_start == NULL) return(NULL); if (field_start > end_of_headers) return(NULL); diff --git a/citadel/msgbase.c b/citadel/msgbase.c index 07accb07b..7544cfadb 100644 --- a/citadel/msgbase.c +++ b/citadel/msgbase.c @@ -2109,7 +2109,7 @@ long CtdlSubmitMsg(struct CtdlMessage *msg, /* message to save */ break; case 4: strcpy(content_type, "text/plain"); - mptr = strcasestr(msg->cm_fields['M'], "Content-type: "); + mptr = bmstrcasestr(msg->cm_fields['M'], "Content-type: "); if (mptr != NULL) { safestrncpy(content_type, &mptr[14], sizeof content_type); diff --git a/citadel/serv_imap.c b/citadel/serv_imap.c index c297ac788..b81732a68 100644 --- a/citadel/serv_imap.c +++ b/citadel/serv_imap.c @@ -1386,7 +1386,7 @@ void imap_command_loop(void) if (IMAP->authstate == imap_as_expecting_password) { lprintf(CTDL_INFO, "IMAP: \n"); } - else if (strcasestr(cmdbuf, " LOGIN ")) { + else if (bmstrcasestr(cmdbuf, " LOGIN ")) { lprintf(CTDL_INFO, "IMAP: LOGIN...\n"); } else { diff --git a/citadel/tools.c b/citadel/tools.c index 69cb23d44..b1c495532 100644 --- a/citadel/tools.c +++ b/citadel/tools.c @@ -576,3 +576,65 @@ void generate_uuid(char *buf) { ); } +/* + * bmstrcasestr() -- case-insensitive substring search + * + * This uses the Boyer-Moore search algorithm and is therefore quite fast. + * The code is roughly based on the strstr() replacement from 'tin' written + * by Urs Jannsen. + */ +char *bmstrcasestr(char *text, char *pattern) { + + register unsigned char *p, *t; + register int i, j, *delta; + register size_t p1; + int deltaspace[256]; + size_t textlen; + size_t patlen; + + textlen = strlen (text); + patlen = strlen (pattern); + + /* algorithm fails if pattern is empty */ + if ((p1 = patlen) == 0) + return (text); + + /* code below fails (whenever i is unsigned) if pattern too long */ + if (p1 > textlen) + return (NULL); + + /* set up deltas */ + delta = deltaspace; + for (i = 0; i <= 255; i++) + delta[i] = p1; + for (p = (unsigned char *) pattern, i = p1; --i > 0;) + delta[tolower(*p++)] = i; + + /* + * From now on, we want patlen - 1. + * In the loop below, p points to the end of the pattern, + * t points to the end of the text to be tested against the + * pattern, and i counts the amount of text remaining, not + * including the part to be tested. + */ + p1--; + p = (unsigned char *) pattern + p1; + t = (unsigned char *) text + p1; + i = textlen - patlen; + while(1) { + if (tolower(p[0]) == tolower(t[0])) { + if (strncasecmp ((const char *)(p - p1), (const char *)(t - p1), p1) == 0) { + return ((char *)t - p1); + } + } + j = delta[tolower(t[0])]; + if (i < j) + break; + i -= j; + t += j; + } + return (NULL); +} + + + diff --git a/citadel/tools.h b/citadel/tools.h index cb63f75bd..41bfb9407 100644 --- a/citadel/tools.h +++ b/citadel/tools.h @@ -31,3 +31,4 @@ char *CtdlTempFileName(char *prefix1, int prefix2); FILE *CtdlTempFile(void); char *ascmonths[12]; void generate_uuid(char *buf); +char *bmstrcasestr(char *text, char *pattern); diff --git a/citadel/user_ops.c b/citadel/user_ops.c index abb388924..d5880127c 100644 --- a/citadel/user_ops.c +++ b/citadel/user_ops.c @@ -1370,7 +1370,7 @@ void ListThisUser(struct ctdluser *usbuf, void *data) char *searchstring; searchstring = (char *)data; - if (strcasestr(usbuf->fullname, searchstring) == NULL) { + if (bmstrcasestr(usbuf->fullname, searchstring) == NULL) { return; } -- 2.39.2