X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fserver%2Fmodules%2Ffulltext%2Fft_wordbreaker.c;h=5f510e82027c3d9a1dd124a9182be4681449f5cc;hb=2bf4ae9ae7e2c80380b4c8369754b2b5fa455e52;hp=d8a11e55cfd437ddfab4a104a13609d3862190bf;hpb=d0fa5441a64b2ce2be78b3a72e726fa6b0c416cd;p=citadel.git diff --git a/citadel/server/modules/fulltext/ft_wordbreaker.c b/citadel/server/modules/fulltext/ft_wordbreaker.c index d8a11e55c..5f510e820 100644 --- a/citadel/server/modules/fulltext/ft_wordbreaker.c +++ b/citadel/server/modules/fulltext/ft_wordbreaker.c @@ -1,16 +1,7 @@ -/* - * Default wordbreaker module for full text indexing. - * - * Copyright (c) 2005-2017 by the citadel.org team - * - * This program is open source software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ +// Default wordbreaker module for full text indexing. +// +// Copyright (c) 2005-2024 by the citadel.org team +// This program is open source software. Use, duplication, or disclosure is subject to the GNU General Public License v3. #include "../../sysdep.h" #include @@ -40,11 +31,8 @@ #include "crc16.h" #include "../../ctdl_module.h" -/* - * Noise words are not included in search indices. - * NOTE: if the noise word list is altered in any way, the FT_WORDBREAKER_ID - * must also be changed, so that the index is rebuilt. - */ +// Noise words are not included in search indices. +// NOTE: if the noise word list is altered in any way, the FT_WORDBREAKER_ID must also be changed, so that the index is rebuilt. static char *noise_words[] = { "about", "after", @@ -111,9 +99,7 @@ static char *noise_words[] = { #define NUM_NOISE (sizeof(noise_words) / sizeof(char *)) -/* - * Compare function - */ +// Compare function int intcmp(const void *rec1, const void *rec2) { int i1, i2; @@ -136,11 +122,11 @@ Array *wordbreaker(const char *text) { int i; int word_crc; - if (text == NULL) { /* no NULL text please */ + if (text == NULL) { // no NULL text please return(NULL); } - if (text[0] == 0) { /* no empty text either */ + if (text[0] == 0) { // no empty text either return(NULL); } @@ -163,7 +149,7 @@ Array *wordbreaker(const char *text) { if ( (!isalnum(ch)) && (word_start) ) { word_end = ptr; - /* extract the word */ + // extract the word word_len = word_end - word_start; if (word_len >= sizeof word) { syslog(LOG_DEBUG, "wordbreaker: invalid word length: %d", word_len); @@ -176,20 +162,19 @@ Array *wordbreaker(const char *text) { } word_start = NULL; - /* are we ok with the length? */ + // are we ok with the length? if ( (word_len >= WB_MIN) && (word_len <= WB_MAX) ) { for (i=0; i 0) { word_crc = (int) CalcCRC16Bytes(word_len, word); array_append(found_tokens, &word_crc); @@ -198,7 +183,7 @@ Array *wordbreaker(const char *text) { } } - /* sort and purge dups */ + // sort and purge dups if (array_len(found_tokens) > 1) { array_sort(found_tokens, intcmp); for (i=0; i<(array_len(found_tokens)); ++i) { @@ -210,4 +195,3 @@ Array *wordbreaker(const char *text) { } return(found_tokens); } -