-/*
- * Default wordbreaker module for full text indexing.
- *
- * Copyright (c) 2005-2017 by the citadel.org team
- *
- * This program is open source software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 3.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
+// Default wordbreaker module for full text indexing.
+//
+// Copyright (c) 2005-2024 by the citadel.org team
+// This program is open source software. Use, duplication, or disclosure is subject to the GNU General Public License v3.
#include "../../sysdep.h"
#include <stdlib.h>
#include "crc16.h"
#include "../../ctdl_module.h"
-/*
- * Noise words are not included in search indices.
- * NOTE: if the noise word list is altered in any way, the FT_WORDBREAKER_ID
- * must also be changed, so that the index is rebuilt.
- */
+// Noise words are not included in search indices.
+// NOTE: if the noise word list is altered in any way, the FT_WORDBREAKER_ID must also be changed, so that the index is rebuilt.
static char *noise_words[] = {
"about",
"after",
#define NUM_NOISE (sizeof(noise_words) / sizeof(char *))
-/*
- * Compare function
- */
+// Compare function
int intcmp(const void *rec1, const void *rec2) {
int i1, i2;
int i;
int word_crc;
- if (text == NULL) { /* no NULL text please */
+ if (text == NULL) { // no NULL text please
return(NULL);
}
- if (text[0] == 0) { /* no empty text either */
+ if (text[0] == 0) { // no empty text either
return(NULL);
}
if ( (!isalnum(ch)) && (word_start) ) {
word_end = ptr;
- /* extract the word */
+ // extract the word
word_len = word_end - word_start;
if (word_len >= sizeof word) {
syslog(LOG_DEBUG, "wordbreaker: invalid word length: %d", word_len);
}
word_start = NULL;
- /* are we ok with the length? */
+ // are we ok with the length?
if ( (word_len >= WB_MIN) && (word_len <= WB_MAX) ) {
for (i=0; i<word_len; ++i) {
word[i] = tolower(word[i]);
}
- /* disqualify noise words */
+ // disqualify noise words
for (i=0; i<NUM_NOISE; ++i) {
if (!strcasecmp(word, noise_words[i])) {
word_len = 0;
break;
}
}
- /* FIXME make this case insensitive */
- /* add it to the array */
+ // add it to the array (FIXME make this case insensitive)
if (word_len > 0) {
word_crc = (int) CalcCRC16Bytes(word_len, word);
array_append(found_tokens, &word_crc);
}
}
- /* sort and purge dups */
+ // sort and purge dups
if (array_len(found_tokens) > 1) {
array_sort(found_tokens, intcmp);
for (i=0; i<(array_len(found_tokens)); ++i) {
}
return(found_tokens);
}
-