X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Ffulltext%2Fft_wordbreaker.c;h=b236de362643d94d3b61c605c7586c63bb4a8306;hb=7a9b0685e406cc83597171cc39d008c7e5459ca8;hp=44f40a45c58915a2b1748f696c0585b57b949f17;hpb=6106c1da54f0923550c7bdceb45246fb88e9ea19;p=citadel.git

diff --git a/citadel/modules/fulltext/ft_wordbreaker.c b/citadel/modules/fulltext/ft_wordbreaker.c
index 44f40a45c..b236de362 100644
--- a/citadel/modules/fulltext/ft_wordbreaker.c
+++ b/citadel/modules/fulltext/ft_wordbreaker.c
@@ -1,24 +1,17 @@
 /*
  * Default wordbreaker module for full text indexing.
  *
- * Copyright (c) 2005-2009 by the citadel.org team
+ * Copyright (c) 2005-2017 by the citadel.org team
  *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 3 of the License, or
- *  (at your option) any later version.
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3.
  *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  */
 
-
 #include "sysdep.h"
 #include <stdlib.h>
 #include <unistd.h>
@@ -28,18 +21,7 @@
 #include <pwd.h>
 #include <errno.h>
 #include <sys/types.h>
-
-#if TIME_WITH_SYS_TIME
-# include <sys/time.h>
-# include <time.h>
-#else
-# if HAVE_SYS_TIME_H
-#  include <sys/time.h>
-# else
-#  include <time.h>
-# endif
-#endif
-
+#include <time.h>
 #include <sys/wait.h>
 #include <ctype.h>
 #include <string.h>
@@ -63,10 +45,7 @@
  * NOTE: if the noise word list is altered in any way, the FT_WORDBREAKER_ID
  * must also be changed, so that the index is rebuilt.
  */
-
-noise_word *noise_words[26];
-
-static char *noise_words_init[] = {
+static char *noise_words[] = {
 	"about",
 	"after",
 	"also",
@@ -129,51 +108,9 @@ static char *noise_words_init[] = {
 	"would",
 	"your"
 };
+#define NUM_NOISE (sizeof(noise_words) / sizeof(char *))
 
 
-void initialize_noise_words(void)
-{
-	int i;
-	int len;
-	int ch;
-	noise_word *next;
-	
-	memset (noise_words, 0, sizeof(noise_words));
-	
-	for (i=0; i<(sizeof(noise_words_init)/sizeof(char *)); ++i)
-	{
-		ch = noise_words_init[i][0] - 'a';
-		len = strlen(noise_words_init[i]);
-		
-		next = malloc(sizeof(noise_word));
-		next->len = len;
-		next->word = strdup(noise_words_init[i]);
-		next->next = noise_words[ch];
-		noise_words[ch] = next;
-	}
-}
-
-
-void noise_word_cleanup(void)
-{
-	int i;
-	noise_word *cur, *next;
-	
-	CtdlLogPrintf(CTDL_INFO, "Cleaning up fulltext noise words.\n");
-	
-	for (i = 0 ; i < 26 ; i++)
-	{
-		cur = noise_words[i];
-		while (cur)
-		{
-			next = cur->next;
-			free(cur->word);
-			free(cur);
-			cur = next;
-		}
-	}
-}
-
 /*
  * Compare function
  */
@@ -203,8 +140,6 @@ void wordbreaker(const char *text, int *num_tokens, int **tokens) {
 	char word[256];
 	int i;
 	int word_crc;
-	noise_word *noise;
-	
 	
 	if (text == NULL) {		/* no NULL text please */
 		*num_tokens = 0;
@@ -231,12 +166,11 @@ void wordbreaker(const char *text, int *num_tokens, int **tokens) {
 		ch = *ptr;
 		if ( (!isalnum(ch)) && (word_start) ) {
 			word_end = ptr;
-//			--word_end;
 
 			/* extract the word */
 			word_len = word_end - word_start;
 			if (word_len >= sizeof word) {
-				CtdlLogPrintf(CTDL_DEBUG, "Invalid word length: %d\n", word_len);
+				syslog(LOG_DEBUG, "wordbreaker: invalid word length: %d", word_len);
 				safestrncpy(word, word_start, sizeof word);
 				word[(sizeof word) - 1] = 0;
 			}
@@ -247,30 +181,22 @@ void wordbreaker(const char *text, int *num_tokens, int **tokens) {
 			word_start = NULL;
 
 			/* are we ok with the length? */
-			if ( (word_len >= WB_MIN)
-			   && (word_len <= WB_MAX) ) {
+			if ( (word_len >= WB_MIN) && (word_len <= WB_MAX) ) {
 				for (i=0; i<word_len; ++i) {
 					word[i] = tolower(word[i]);
 				}
 				/* disqualify noise words */
-				noise = noise_words[(int) (word[0]-'a')];
-				while (noise)
-				{
-					if (noise->len == word_len)
-					{
-						if (!strcmp(word, noise->word)) 
-						{
-							word_len = 0;
-							break;
-						}
+				for (i=0; i<NUM_NOISE; ++i) {
+					if (!strcmp(word, noise_words[i])) {
+						word_len = 0;
+						break;
 					}
-					noise = noise->next;
 				}
+
 				if (word_len == 0)
 					continue;
 
-				word_crc = (int)
-					CalcCRC16Bytes(word_len, word);
+				word_crc = (int) CalcCRC16Bytes(word_len, word);
 
 				++wb_num_tokens;
 				if (wb_num_tokens > wb_num_alloc) {