X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Ffulltext%2Fft_wordbreaker.c;h=4e1059a7f63ad982f2ffc186d5c5d5c987f59e3f;hb=a520d904d0069654c57d2b69618bee1225ee3067;hp=aaa92a208052925c017fb0da64063c570a40bd4e;hpb=a2fda4eafb51bbf58c04471522aa2d0f116c797e;p=citadel.git diff --git a/citadel/modules/fulltext/ft_wordbreaker.c b/citadel/modules/fulltext/ft_wordbreaker.c index aaa92a208..4e1059a7f 100644 --- a/citadel/modules/fulltext/ft_wordbreaker.c +++ b/citadel/modules/fulltext/ft_wordbreaker.c @@ -1,11 +1,17 @@ /* - * $Id$ - * * Default wordbreaker module for full text indexing. * + * Copyright (c) 2005-2017 by the citadel.org team + * + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. */ - #include "sysdep.h" #include #include @@ -113,6 +119,8 @@ static char *noise_words[] = { "would", "your" }; +#define NUM_NOISE (sizeof(noise_words) / sizeof(char *)) + /* * Compare function @@ -129,21 +137,21 @@ int intcmp(const void *rec1, const void *rec2) { } -void wordbreaker(char *text, int *num_tokens, int **tokens) { +void wordbreaker(const char *text, int *num_tokens, int **tokens) { int wb_num_tokens = 0; int wb_num_alloc = 0; int *wb_tokens = NULL; - char *ptr; - char *word_start; - char *word_end; + const char *ptr; + const char *word_start; + const char *word_end; char ch; int word_len; char word[256]; int i; int word_crc; - + if (text == NULL) { /* no NULL text please */ *num_tokens = 0; *tokens = NULL; @@ -169,12 +177,11 @@ void wordbreaker(char *text, int *num_tokens, int **tokens) { ch = *ptr; if ( (!isalnum(ch)) && (word_start) ) { word_end = ptr; -// --word_end; /* extract the word */ word_len = word_end - word_start; if (word_len >= sizeof word) { - CtdlLogPrintf(CTDL_DEBUG, "Invalid word length: %d\n", word_len); + syslog(LOG_DEBUG, "wordbreaker: invalid word length: %d", word_len); safestrncpy(word, word_start, sizeof word); word[(sizeof word) - 1] = 0; } @@ -185,23 +192,22 @@ void wordbreaker(char *text, int *num_tokens, int **tokens) { word_start = NULL; /* are we ok with the length? */ - if ( (word_len >= WB_MIN) - && (word_len <= WB_MAX) ) { + if ( (word_len >= WB_MIN) && (word_len <= WB_MAX) ) { for (i=0; i wb_num_alloc) {