From: Art Cancro Date: Wed, 30 Aug 2023 02:30:32 +0000 (-0400) Subject: Totally ripping apart the indexer. X-Git-Tag: v989~14 X-Git-Url: https://code.citadel.org/?a=commitdiff_plain;h=6347938a3309d486d5e01bc21cbf21520576d4f4;p=citadel.git Totally ripping apart the indexer. And yet, it doesn't seem to be anything 'deep' that's making it deadlock. If I get desperate I may have to just give it its own thread or something. --- diff --git a/citadel/server/modules/fulltext/serv_fulltext.c b/citadel/server/modules/fulltext/serv_fulltext.c index c71232e08..91dc30eef 100644 --- a/citadel/server/modules/fulltext/serv_fulltext.c +++ b/citadel/server/modules/fulltext/serv_fulltext.c @@ -24,6 +24,7 @@ #include "../../citserver.h" #include "../../support.h" #include "../../config.h" +#include "../../room_ops.h" #include "../../database.h" #include "../../msgbase.h" #include "../../control.h" @@ -33,15 +34,6 @@ #include "../../context.h" #include "../../ctdl_module.h" -long ft_newhighest = 0L; -long *ft_newmsgs = NULL; -int ft_num_msgs = 0; -int ft_num_alloc = 0; - -int ftc_num_msgs[65536]; -long *ftc_msgs[65536]; - - // Compare function int longcmp(const void *rec1, const void *rec2) { long i1, i2; @@ -55,39 +47,23 @@ int longcmp(const void *rec1, const void *rec2) { } -// Flush our index cache out to disk. -void ft_flush_cache(void) { - int i; - time_t last_update = 0; + //if (ftc_msgs[i] != NULL) { + //cdb_store(CDB_FULLTEXT, &i, sizeof(int), ftc_msgs[i], (ftc_num_msgs[i] * sizeof(long))); - cdb_begin_transaction(); - for (i=0; i<65536; ++i) { - if ((time(NULL) - last_update) >= 10) { - syslog(LOG_INFO, "fulltext: flushing index cache to disk (%d%% complete)", (i * 100 / 65536)); - last_update = time(NULL); - } - if (ftc_msgs[i] != NULL) { - cdb_store(CDB_FULLTEXT, &i, sizeof(int), ftc_msgs[i], (ftc_num_msgs[i] * sizeof(long))); - ftc_num_msgs[i] = 0; - free(ftc_msgs[i]); - ftc_msgs[i] = NULL; - } - } - cdb_end_transaction(); - syslog(LOG_INFO, "fulltext: flushed index cache to disk (100%% complete)"); -} // Index or de-index a message. (op == 1 to index, 0 to de-index) void ft_index_message(long msgnum, int op) { int i, j; - Array *t = NULL; + Array *tokens_in_this_message = NULL; struct cdbdata cdb_bucket; StrBuf *msgtext; char *txt; int tok; struct CtdlMessage *msg = NULL; + if (msgnum == 0) return; + msg = CtdlFetchMessage(msgnum, 1); if (msg == NULL) { syslog(LOG_ERR, "fulltext: ft_index_message() could not load msg %ld", msgnum); @@ -102,8 +78,7 @@ void ft_index_message(long msgnum, int op) { syslog(LOG_DEBUG, "fulltext: ft_index_message() %s msg %ld", (op ? "adding" : "removing") , msgnum); - // Output the message as text before indexing it, so we don't end up - // indexing a bunch of encoded base64, etc. + // Output the message as text before indexing it, so we don't end up indexing a bunch of encoded base64, etc. CC->redirect_buffer = NewStrBufPlain(NULL, SIZ); CtdlOutputPreLoadedMsg(msg, MT_CITADEL, HEADERS_ALL, 0, 1, 0); CM_Free(msg); @@ -113,83 +88,63 @@ void ft_index_message(long msgnum, int op) { syslog(LOG_DEBUG, "fulltext: wordbreaking message %ld (%d bytes)", msgnum, StrLength(msgtext)); } txt = SmashStrBuf(&msgtext); - t = wordbreaker(txt); + tokens_in_this_message = wordbreaker(txt); free(txt); - syslog(LOG_DEBUG, "fulltext: %sindexing message %ld [%d tokens]", (op ? "" : "de"), msgnum, array_len(t)); - if (array_len(t) > 0) { - for (i=0; i 0) { + for (i=0; i= 0) && (tok <= 65535) ) { - // fetch the bucket, Liza - if (ftc_msgs[tok] == NULL) { - cdb_bucket = cdb_fetch(CDB_FULLTEXT, &tok, sizeof(int)); - if (cdb_bucket.ptr != NULL) { - ftc_num_msgs[tok] = cdb_bucket.len / sizeof(long); - ftc_msgs[tok] = malloc(cdb_bucket.len); - memcpy(ftc_msgs[tok], cdb_bucket.ptr, cdb_bucket.len); - } - else { - ftc_num_msgs[tok] = 0; - ftc_msgs[tok] = malloc(sizeof(long)); - } - } + // Identify the bucket which we will be modifying + memcpy(&tok, array_get_element_at(tokens_in_this_message, i), sizeof(int)); - if (op == 1) { // add to index - ++ftc_num_msgs[tok]; - ftc_msgs[tok] = realloc(ftc_msgs[tok], ftc_num_msgs[tok]*sizeof(long)); - ftc_msgs[tok][ftc_num_msgs[tok] - 1] = msgnum; - } - - if (op == 0) { // remove from index - if (ftc_num_msgs[tok] >= 1) { - for (j=0; j CtdlGetConfigLong("MMfulltext")) && (msgnum <= ft_newhighest)) { - ++ft_num_msgs; - if (ft_num_msgs > ft_num_alloc) { - ft_num_alloc += 1024; - ft_newmsgs = realloc(ft_newmsgs, (ft_num_alloc * sizeof(long))); - } - ft_newmsgs[ft_num_msgs - 1] = msgnum; - } - -} +Array *messages_to_be_indexed = NULL; +long highest_msg_already_indexed = 0; +long highest_msg_to_be_indexed = 0; // Scan a room for messages to index. void ft_index_room(struct ctdlroom *qrbuf, void *data) { - if (server_shutting_down) + if (server_shutting_down) { return; - + } CtdlGetRoom(&CC->room, qrbuf->QRname); - CtdlForEachMessage(MSGS_ALL, 0L, NULL, NULL, NULL, ft_index_msg, NULL); + +#if 0 + int num_msgs = 0; + long *msglist; + int i; + num_msgs = CtdlFetchMsgList(CC->room.QRnumber, &msglist); + if (msglist != NULL) { + for (i=0; i 0) + && (msglist[i] > highest_msg_already_indexed) + && (msglist[i] <= highest_msg_to_be_indexed) + ) { + array_append(messages_to_be_indexed, &msglist[i]); + } + } + } +#endif } @@ -205,6 +160,7 @@ void do_fulltext_indexing(void) { if (!CtdlGetConfigInt("c_enable_fulltext")) { return; } + syslog(LOG_DEBUG, "fulltext: indexing started"); // If we've switched wordbreaker modules, burn the index and start over. begin_critical_section(S_CONTROL); @@ -223,62 +179,46 @@ void do_fulltext_indexing(void) { return; // nothing to do! } - // Now go through each room and find messages to index. - ft_newhighest = CtdlGetConfigLong("MMhighest"); - CtdlForEachRoom(ft_index_room, NULL); // load all msg pointers - - if (ft_num_msgs > 0) { - qsort(ft_newmsgs, ft_num_msgs, sizeof(long), longcmp); - for (i=0; i<(ft_num_msgs-1); ++i) { // purge dups - if (ft_newmsgs[i] == ft_newmsgs[i+1]) { - memmove(&ft_newmsgs[i], &ft_newmsgs[i+1], ((ft_num_msgs - i - 1)*sizeof(long))); - --ft_num_msgs; - --i; - } - } - - // Here it is ... do each message! - for (i=0; i= FT_MAX_CACHE) { - syslog(LOG_DEBUG, "fulltext: time to flush."); - ft_newhighest = ft_newmsgs[i]; - break; - } + // Here it is ... do each message! + long msgnum = 0; + long prev_msgnum = 0; + for (i=0; i 0) { for (i=0; i 0) { for (i=0; i