X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Ffulltext%2Fft_wordbreaker.c;h=b236de362643d94d3b61c605c7586c63bb4a8306;hb=7a9b0685e406cc83597171cc39d008c7e5459ca8;hp=968fa9f02b0aa2bf21ddf87890a0e19887d8a56a;hpb=8e944083763c9ddcb32d763cf8f19c966d01f873;p=citadel.git diff --git a/citadel/modules/fulltext/ft_wordbreaker.c b/citadel/modules/fulltext/ft_wordbreaker.c index 968fa9f02..b236de362 100644 --- a/citadel/modules/fulltext/ft_wordbreaker.c +++ b/citadel/modules/fulltext/ft_wordbreaker.c @@ -21,18 +21,7 @@ #include #include #include - -#if TIME_WITH_SYS_TIME -# include -# include -#else -# if HAVE_SYS_TIME_H -# include -# else -# include -# endif -#endif - +#include #include #include #include @@ -56,10 +45,7 @@ * NOTE: if the noise word list is altered in any way, the FT_WORDBREAKER_ID * must also be changed, so that the index is rebuilt. */ - -noise_word *noise_words[26]; - -static char *noise_words_init[] = { +static char *noise_words[] = { "about", "after", "also", @@ -122,29 +108,7 @@ static char *noise_words_init[] = { "would", "your" }; - - -void initialize_noise_words(void) -{ - int i; - int len; - int ch; - noise_word *next; - - memset (noise_words, 0, sizeof(noise_words)); - - for (i=0; i<(sizeof(noise_words_init)/sizeof(char *)); ++i) - { - ch = noise_words_init[i][0] - 'a'; - len = strlen(noise_words_init[i]); - - next = malloc(sizeof(noise_word)); - next->len = len; - next->word = strdup(noise_words_init[i]); - next->next = noise_words[ch]; - noise_words[ch] = next; - } -} +#define NUM_NOISE (sizeof(noise_words) / sizeof(char *)) /* @@ -176,8 +140,6 @@ void wordbreaker(const char *text, int *num_tokens, int **tokens) { char word[256]; int i; int word_crc; - noise_word *noise; - if (text == NULL) { /* no NULL text please */ *num_tokens = 0; @@ -219,30 +181,22 @@ void wordbreaker(const char *text, int *num_tokens, int **tokens) { word_start = NULL; /* are we ok with the length? */ - if ( (word_len >= WB_MIN) - && (word_len <= WB_MAX) ) { + if ( (word_len >= WB_MIN) && (word_len <= WB_MAX) ) { for (i=0; ilen == word_len) - { - if (!strcmp(word, noise->word)) - { - word_len = 0; - break; - } + for (i=0; inext; } + if (word_len == 0) continue; - word_crc = (int) - CalcCRC16Bytes(word_len, word); + word_crc = (int) CalcCRC16Bytes(word_len, word); ++wb_num_tokens; if (wb_num_tokens > wb_num_alloc) {