/*
- * $Id$
- *
* This module handles fulltext indexing of the message base.
+ * Copyright (c) 2005-2009 by the citadel.org team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "database.h"
#include "msgbase.h"
#include "control.h"
-#include "room_ops.h"
#include "serv_fulltext.h"
#include "ft_wordbreaker.h"
#include "threads.h"
+#include "context.h"
#include "ctdl_module.h"
int *tokens = NULL;
int i, j;
struct cdbdata *cdb_bucket;
- char *msgtext;
+ StrBuf *msgtext;
+ char *txt;
int tok;
+ struct CtdlMessage *msg = NULL;
+
+ msg = CtdlFetchMessage(msgnum, 1);
+ if (msg == NULL) {
+ CtdlLogPrintf(CTDL_ERR, "ft_index_message() could not load msg %ld\n", msgnum);
+ return;
+ }
+
+ if (msg->cm_fields['1'] != NULL) {
+ CtdlLogPrintf(CTDL_DEBUG, "ft_index_message() excluded msg %ld\n", msgnum);
+ CtdlFreeMessage(msg);
+ return;
+ }
CtdlLogPrintf(CTDL_DEBUG, "ft_index_message() %s msg %ld\n",
(op ? "adding" : "removing") , msgnum
/* Output the message as text before indexing it, so we don't end up
* indexing a bunch of encoded base64, etc.
*/
- CC->redirect_buffer = malloc(SIZ);
- CC->redirect_len = 0;
- CC->redirect_alloc = SIZ;
- CtdlOutputMsg(msgnum, MT_CITADEL, HEADERS_ALL, 0, 1, NULL, 0);
+ CC->redirect_buffer = NewStrBufPlain(NULL, SIZ);
+ CtdlOutputPreLoadedMsg(msg, MT_CITADEL, HEADERS_ALL, 0, 1, 0);
+ CtdlFreeMessage(msg);
msgtext = CC->redirect_buffer;
CC->redirect_buffer = NULL;
- CC->redirect_len = 0;
- CC->redirect_alloc = 0;
CtdlLogPrintf(CTDL_DEBUG, "Wordbreaking message %ld...\n", msgnum);
- wordbreaker(msgtext, &num_tokens, &tokens);
- free(msgtext);
+ txt = SmashStrBuf(&msgtext);
+ wordbreaker(txt, &num_tokens, &tokens);
+ free(txt);
CtdlLogPrintf(CTDL_DEBUG, "Indexing message %ld [%d tokens]\n", msgnum, num_tokens);
if (num_tokens > 0) {
if (CtdlThreadCheckStop())
return;
- getroom(&CC->room, qrbuf->QRname);
+ CtdlGetRoom(&CC->room, qrbuf->QRname);
CtdlForEachMessage(MSGS_ALL, 0L, NULL, NULL, NULL, ft_index_msg, NULL);
}
/*
- * Begin the fulltext indexing process. (Called as an EVT_TIMER event)
+ * Begin the fulltext indexing process.
*/
void do_fulltext_indexing(void) {
int i;
* Check to see whether the fulltext index is up to date; if there
* are no messages to index, don't waste any more time trying.
*/
- if (CitControl.MMfulltext >= CitControl.MMhighest) {
+ if ((CitControl.MMfulltext >= CitControl.MMhighest) && (CitControl.fulltext_wordbreaker == FT_WORDBREAKER_ID)) {
return; /* nothing to do! */
}
* Now go through each room and find messages to index.
*/
ft_newhighest = CitControl.MMhighest;
- ForEachRoom(ft_index_room, NULL); /* load all msg pointers */
+ CtdlForEachRoom(ft_index_room, NULL); /* load all msg pointers */
if (ft_num_msgs > 0) {
qsort(ft_newmsgs, ft_num_msgs, sizeof(long), longcmp);
void *indexer_thread(void *arg) {
struct CitContext indexerCC;
- CtdlLogPrintf(CTDL_DEBUG, "indexer_thread() initializing\n");
- CtdlFillPrivateContext(&indexerCC, "indexer");
+ CtdlFillSystemContext(&indexerCC, "indexer");
citthread_setspecific(MyConKey, (void *)&indexerCC );
+ CtdlLogPrintf(CTDL_DEBUG, "indexer_thread() initializing\n");
while (!CtdlThreadCheckStop()) {
do_fulltext_indexing();
}
CtdlLogPrintf(CTDL_DEBUG, "indexer_thread() exiting\n");
+ CtdlClearSystemContext();
return NULL;
}
* (This one does the "all of these words" search.)
* Caller is responsible for freeing the message list.
*/
-void ft_search(int *fts_num_msgs, long **fts_msgs, char *search_string) {
+void ft_search(int *fts_num_msgs, long **fts_msgs, const char *search_string) {
int num_tokens = 0;
int *tokens = NULL;
int i, j;
if (!threading)
{
initialize_ft_cache();
+ initialize_noise_words();
CtdlRegisterProtoHook(cmd_srch, "SRCH", "Full text search");
CtdlRegisterDeleteHook(ft_delete_remove);
CtdlRegisterSearchFuncHook(ft_search, "fulltext");
+ CtdlRegisterCleanupHook(noise_word_cleanup);
}
else
{
CtdlThreadCreate("Indexer", CTDLTHREAD_BIGSTACK, indexer_thread, NULL);
}
/* return our Subversion id for the Log */
- return "$Id$";
+ return "fulltext";
}