* More license declarations
[citadel.git] / citadel / modules / fulltext / serv_fulltext.c
index b5753fbf63c95e4e156082f36da568d6d8ea9485..74e0b22eebe65334452710467a0fc48598547257 100644 (file)
@@ -3,6 +3,22 @@
  *
  * This module handles fulltext indexing of the message base.
  *
+ *
+ * Copyright (c) 2005-2009 by the citadel.org team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 
@@ -30,6 +46,7 @@
 #include <sys/wait.h>
 #include <string.h>
 #include <limits.h>
+#include <libcitadel.h>
 #include "citadel.h"
 #include "server.h"
 #include "citserver.h"
 #include "msgbase.h"
 #include "control.h"
 #include "room_ops.h"
-#include "tools.h"
 #include "serv_fulltext.h"
 #include "ft_wordbreaker.h"
-
+#include "threads.h"
 
 #include "ctdl_module.h"
 
@@ -81,7 +97,7 @@ void ft_flush_cache(void) {
 
        for (i=0; i<65536; ++i) {
                if ((time(NULL) - last_update) >= 10) {
-                       lprintf(CTDL_INFO,
+                       CtdlLogPrintf(CTDL_INFO,
                                "Flushing index cache to disk (%d%% complete)\n",
                                (i * 100 / 65536)
                        );
@@ -95,7 +111,7 @@ void ft_flush_cache(void) {
                        ftc_msgs[i] = NULL;
                }
        }
-       lprintf(CTDL_INFO, "Flushed index cache to disk (100%% complete)\n");
+       CtdlLogPrintf(CTDL_INFO, "Flushed index cache to disk (100%% complete)\n");
 }
 
 
@@ -110,7 +126,7 @@ void ft_index_message(long msgnum, int op) {
        char *msgtext;
        int tok;
 
-       lprintf(CTDL_DEBUG, "ft_index_message() %s msg %ld\n",
+       CtdlLogPrintf(CTDL_DEBUG, "ft_index_message() %s msg %ld\n",
                (op ? "adding" : "removing") , msgnum
        );
 
@@ -120,16 +136,16 @@ void ft_index_message(long msgnum, int op) {
        CC->redirect_buffer = malloc(SIZ);
        CC->redirect_len = 0;
        CC->redirect_alloc = SIZ;
-       CtdlOutputMsg(msgnum, MT_CITADEL, HEADERS_ALL, 0, 1, NULL);
+       CtdlOutputMsg(msgnum, MT_CITADEL, HEADERS_ALL, 0, 1, NULL, 0);
        msgtext = CC->redirect_buffer;
        CC->redirect_buffer = NULL;
        CC->redirect_len = 0;
        CC->redirect_alloc = 0;
-       lprintf(CTDL_DEBUG, "Wordbreaking message %ld...\n", msgnum);
+       CtdlLogPrintf(CTDL_DEBUG, "Wordbreaking message %ld...\n", msgnum);
        wordbreaker(msgtext, &num_tokens, &tokens);
        free(msgtext);
 
-       lprintf(CTDL_DEBUG, "Indexing message %ld [%d tokens]\n", msgnum, num_tokens);
+       CtdlLogPrintf(CTDL_DEBUG, "Indexing message %ld [%d tokens]\n", msgnum, num_tokens);
        if (num_tokens > 0) {
                for (i=0; i<num_tokens; ++i) {
 
@@ -175,7 +191,7 @@ void ft_index_message(long msgnum, int op) {
                                }
                        }
                        else {
-                               lprintf(CTDL_ALERT, "Invalid token %d !!\n", tok);
+                               CtdlLogPrintf(CTDL_ALERT, "Invalid token %d !!\n", tok);
                        }
                }
 
@@ -207,6 +223,9 @@ void ft_index_msg(long msgnum, void *userdata) {
  */
 void ft_index_room(struct ctdlroom *qrbuf, void *data)
 {
+       if (CtdlThreadCheckStop())
+               return;
+               
        getroom(&CC->room, qrbuf->QRname);
        CtdlForEachMessage(MSGS_ALL, 0L, NULL, NULL, NULL, ft_index_msg, NULL);
 }
@@ -219,7 +238,9 @@ void do_fulltext_indexing(void) {
        int i;
        static time_t last_index = 0L;
        static time_t last_progress = 0L;
-
+       time_t run_time = 0L;
+       time_t end_time = 0L;
+       
        /*
         * Don't do this if the site doesn't have it enabled.
         */
@@ -231,29 +252,34 @@ void do_fulltext_indexing(void) {
         * Make sure we don't run the indexer too frequently.
         * FIXME move the setting into config
         */
+/*
+ * The thread sleeps for 300 seconds so we don't need this here any more
        if ( (time(NULL) - last_index) < 300L) {
                return;
        }
+*/
 
        /*
         * Check to see whether the fulltext index is up to date; if there
         * are no messages to index, don't waste any more time trying.
         */
-       if (CitControl.MMfulltext >= CitControl.MMhighest) {
+       if ((CitControl.MMfulltext >= CitControl.MMhighest) && (CitControl.fulltext_wordbreaker == FT_WORDBREAKER_ID)) {
                return;         /* nothing to do! */
        }
-
-       lprintf(CTDL_DEBUG, "do_fulltext_indexing() started\n");
+       
+       run_time = time(NULL);
+       CtdlLogPrintf(CTDL_DEBUG, "do_fulltext_indexing() started (%ld)\n", run_time);
        
        /*
         * If we've switched wordbreaker modules, burn the index and start
         * over.
         */
        begin_critical_section(S_CONTROL);
-       lprintf(CTDL_DEBUG, "wb ver on disk = %d, code ver = %d\n",
-                       CitControl.fulltext_wordbreaker, FT_WORDBREAKER_ID);
        if (CitControl.fulltext_wordbreaker != FT_WORDBREAKER_ID) {
-               lprintf(CTDL_INFO, "(re)initializing full text index\n");
+               CtdlLogPrintf(CTDL_DEBUG, "wb ver on disk = %d, code ver = %d\n",
+                       CitControl.fulltext_wordbreaker, FT_WORDBREAKER_ID);
+               CtdlLogPrintf(CTDL_INFO, "(re)initializing full text index\n");
                cdb_trunc(CDB_FULLTEXT);
                CitControl.MMfulltext = 0L;
                put_control();
@@ -280,7 +306,7 @@ void do_fulltext_indexing(void) {
                /* Here it is ... do each message! */
                for (i=0; i<ft_num_msgs; ++i) {
                        if (time(NULL) != last_progress) {
-                               lprintf(CTDL_DEBUG,
+                               CtdlLogPrintf(CTDL_DEBUG,
                                        "Indexed %d of %d messages (%d%%)\n",
                                                i, ft_num_msgs,
                                                ((i*100) / ft_num_msgs)
@@ -290,15 +316,15 @@ void do_fulltext_indexing(void) {
                        ft_index_message(ft_newmsgs[i], 1);
 
                        /* Check to see if we need to quit early */
-                       if (time_to_die) {
-                               lprintf(CTDL_DEBUG, "Indexer quitting early\n");
+                       if (CtdlThreadCheckStop()) {
+                               CtdlLogPrintf(CTDL_DEBUG, "Indexer quitting early\n");
                                ft_newhighest = ft_newmsgs[i];
                                break;
                        }
 
                        /* Check to see if we have to maybe flush to disk */
                        if (i >= FT_MAX_CACHE) {
-                               lprintf(CTDL_DEBUG, "Time to flush.\n");
+                               CtdlLogPrintf(CTDL_DEBUG, "Time to flush.\n");
                                ft_newhighest = ft_newmsgs[i];
                                break;
                        }
@@ -310,7 +336,13 @@ void do_fulltext_indexing(void) {
                ft_num_alloc = 0;
                ft_newmsgs = NULL;
        }
+       end_time = time(NULL);
 
+       if (CtdlThreadCheckStop())
+               return;
+       
+       CtdlLogPrintf(CTDL_DEBUG, "do_fulltext_indexing() duration (%ld)\n", end_time - run_time);
+               
        /* Save our place so we don't have to do this again */
        ft_flush_cache();
        begin_critical_section(S_CONTROL);
@@ -320,7 +352,7 @@ void do_fulltext_indexing(void) {
        end_critical_section(S_CONTROL);
        last_index = time(NULL);
 
-       lprintf(CTDL_DEBUG, "do_fulltext_indexing() finished\n");
+       CtdlLogPrintf(CTDL_DEBUG, "do_fulltext_indexing() finished\n");
        return;
 }
 
@@ -330,22 +362,18 @@ void do_fulltext_indexing(void) {
 void *indexer_thread(void *arg) {
        struct CitContext indexerCC;
 
-       lprintf(CTDL_DEBUG, "indexer_thread() initializing\n");
+       CtdlLogPrintf(CTDL_DEBUG, "indexer_thread() initializing\n");
 
-       memset(&indexerCC, 0, sizeof(struct CitContext));
-       indexerCC.internal_pgm = 1;
-       indexerCC.cs_pid = 0;
-       pthread_setspecific(MyConKey, (void *)&indexerCC );
+       CtdlFillSystemContext(&indexerCC, "indexer");
+       citthread_setspecific(MyConKey, (void *)&indexerCC );
 
-       cdb_allocate_tsd();
-
-       while (!time_to_die) {
+       while (!CtdlThreadCheckStop()) {
                do_fulltext_indexing();
-               sleep(1);
+               CtdlThreadSleep(300);
        }
 
-       lprintf(CTDL_DEBUG, "indexer_thread() exiting\n");
-       pthread_exit(NULL);
+       CtdlLogPrintf(CTDL_DEBUG, "indexer_thread() exiting\n");
+       return NULL;
 }
 
 
@@ -467,14 +495,33 @@ void initialize_ft_cache(void) {
 }
 
 
+void ft_delete_remove(char *room, long msgnum)
+{
+       if (room) return;
+       
+       /* Remove from fulltext index */
+       if (config.c_enable_fulltext) {
+               ft_index_message(msgnum, 0);
+       }
+}
+
 /*****************************************************************************/
 
 CTDL_MODULE_INIT(fulltext)
 {
-       initialize_ft_cache();
-       CtdlRegisterProtoHook(cmd_srch, "SRCH", "Full text search");
-       CtdlRegisterMaintenanceThread ("indexer", indexer_thread);
-
+       if (!threading)
+       {
+               initialize_ft_cache();
+               initialize_noise_words();
+               CtdlRegisterProtoHook(cmd_srch, "SRCH", "Full text search");
+               CtdlRegisterDeleteHook(ft_delete_remove);
+               CtdlRegisterSearchFuncHook(ft_search, "fulltext");
+               CtdlRegisterCleanupHook(noise_word_cleanup);
+       }
+       else
+       {
+               CtdlThreadCreate("Indexer", CTDLTHREAD_BIGSTACK, indexer_thread, NULL);
+       }
        /* return our Subversion id for the Log */
        return "$Id$";
 }