X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Ffulltext%2Fft_wordbreaker.c;h=b236de362643d94d3b61c605c7586c63bb4a8306;hb=7a9b0685e406cc83597171cc39d008c7e5459ca8;hp=968fa9f02b0aa2bf21ddf87890a0e19887d8a56a;hpb=8e944083763c9ddcb32d763cf8f19c966d01f873;p=citadel.git

diff --git a/citadel/modules/fulltext/ft_wordbreaker.c b/citadel/modules/fulltext/ft_wordbreaker.c
index 968fa9f02..b236de362 100644
--- a/citadel/modules/fulltext/ft_wordbreaker.c
+++ b/citadel/modules/fulltext/ft_wordbreaker.c
@@ -21,18 +21,7 @@
 #include <pwd.h>
 #include <errno.h>
 #include <sys/types.h>
-
-#if TIME_WITH_SYS_TIME
-# include <sys/time.h>
-# include <time.h>
-#else
-# if HAVE_SYS_TIME_H
-#  include <sys/time.h>
-# else
-#  include <time.h>
-# endif
-#endif
-
+#include <time.h>
 #include <sys/wait.h>
 #include <ctype.h>
 #include <string.h>
@@ -56,10 +45,7 @@
  * NOTE: if the noise word list is altered in any way, the FT_WORDBREAKER_ID
  * must also be changed, so that the index is rebuilt.
  */
-
-noise_word *noise_words[26];
-
-static char *noise_words_init[] = {
+static char *noise_words[] = {
 	"about",
 	"after",
 	"also",
@@ -122,29 +108,7 @@ static char *noise_words_init[] = {
 	"would",
 	"your"
 };
-
-
-void initialize_noise_words(void)
-{
-	int i;
-	int len;
-	int ch;
-	noise_word *next;
-	
-	memset (noise_words, 0, sizeof(noise_words));
-	
-	for (i=0; i<(sizeof(noise_words_init)/sizeof(char *)); ++i)
-	{
-		ch = noise_words_init[i][0] - 'a';
-		len = strlen(noise_words_init[i]);
-		
-		next = malloc(sizeof(noise_word));
-		next->len = len;
-		next->word = strdup(noise_words_init[i]);
-		next->next = noise_words[ch];
-		noise_words[ch] = next;
-	}
-}
+#define NUM_NOISE (sizeof(noise_words) / sizeof(char *))
 
 
 /*
@@ -176,8 +140,6 @@ void wordbreaker(const char *text, int *num_tokens, int **tokens) {
 	char word[256];
 	int i;
 	int word_crc;
-	noise_word *noise;
-	
 	
 	if (text == NULL) {		/* no NULL text please */
 		*num_tokens = 0;
@@ -219,30 +181,22 @@ void wordbreaker(const char *text, int *num_tokens, int **tokens) {
 			word_start = NULL;
 
 			/* are we ok with the length? */
-			if ( (word_len >= WB_MIN)
-			   && (word_len <= WB_MAX) ) {
+			if ( (word_len >= WB_MIN) && (word_len <= WB_MAX) ) {
 				for (i=0; i<word_len; ++i) {
 					word[i] = tolower(word[i]);
 				}
 				/* disqualify noise words */
-				noise = noise_words[(int) (word[0]-'a')];
-				while (noise)
-				{
-					if (noise->len == word_len)
-					{
-						if (!strcmp(word, noise->word)) 
-						{
-							word_len = 0;
-							break;
-						}
+				for (i=0; i<NUM_NOISE; ++i) {
+					if (!strcmp(word, noise_words[i])) {
+						word_len = 0;
+						break;
 					}
-					noise = noise->next;
 				}
+
 				if (word_len == 0)
 					continue;
 
-				word_crc = (int)
-					CalcCRC16Bytes(word_len, word);
+				word_crc = (int) CalcCRC16Bytes(word_len, word);
 
 				++wb_num_tokens;
 				if (wb_num_tokens > wb_num_alloc) {