#include "../../context.h"
#include "../../ctdl_module.h"
+
+// These can be global variables because only one indexer runs at a time.
+Array *messages_to_be_indexed = NULL;
+long highest_msg_already_indexed = 0;
+long highest_msg_to_be_indexed = 0;
+
+
// Compare function
int longcmp(const void *rec1, const void *rec2) {
long i1, i2;
}
- //if (ftc_msgs[i] != NULL) {
- //cdb_store(CDB_FULLTEXT, &i, sizeof(int), ftc_msgs[i], (ftc_num_msgs[i] * sizeof(long)));
-
-
-
// Index or de-index a message. (op == 1 to index, 0 to de-index)
void ft_index_message(long msgnum, int op) {
int i, j;
msg = CtdlFetchMessage(msgnum, 1);
if (msg == NULL) {
+ // This is not necessarily an error condition; it could simply mean that the message was
+ // deleted before it could be indexed. This happens often when the load tester is running.
syslog(LOG_ERR, "fulltext: ft_index_message() could not load msg %ld", msgnum);
return;
}
);
if (array_len(tokens_in_this_message) > 0) {
+ cdb_begin_transaction();
for (i=0; i<array_len(tokens_in_this_message); ++i) {
// Identify the bucket which we will be modifying
memcpy(&tok, array_get_element_at(tokens_in_this_message, i), sizeof(int));
// fetch the bucket
- // cdb_bucket = cdb_fetch(CDB_FULLTEXT, &tok, sizeof(int));
- // if op == 1 , add this msgnum to the record.
- // if op == 0 , remove this msgnum from the record.
- // FIXME do this
- // FIXME then write it back to disk
+ cdb_bucket = cdb_fetch(CDB_FULLTEXT, &tok, sizeof(int));
+ long *newbucket = malloc(cdb_bucket.len + sizeof(long));
+ int nmsgs = cdb_bucket.len / sizeof(long);
+
+ if (op == 1) { // indexing, add this message to the bucket
+ memcpy(&newbucket[0], cdb_bucket.ptr, cdb_bucket.len);
+ memcpy(&newbucket[nmsgs++], &msgnum, sizeof(long));
+ }
+
+ else if (op == 0) { // deindexing, remove this message from the bucket
+ memcpy(newbucket, cdb_bucket.ptr, cdb_bucket.len);
+ for (j=0; j<nmsgs; ++j) {
+ if ((newbucket[j] == msgnum) || (newbucket[j] == 0)) {
+ fprintf(stderr, "removing %ld\n", newbucket[j]);
+ memcpy(&newbucket[j], &newbucket[j+1], ((nmsgs-j)*sizeof(long)));
+ --j;
+ --nmsgs;
+ }
+ }
+ }
+
+ // Then write it back to disk
+ cdb_store(CDB_FULLTEXT, &tok, sizeof(int), newbucket, (nmsgs*sizeof(long)));
+ free(newbucket);
+
+ if (server_shutting_down) break;
}
+ cdb_end_transaction();
+ CtdlSetConfigLong("MMfulltext", msgnum);
}
array_free(tokens_in_this_message);
}
-
-Array *messages_to_be_indexed = NULL;
-long highest_msg_already_indexed = 0;
-long highest_msg_to_be_indexed = 0;
-
-
// Scan a room for messages to index.
void ft_index_room(struct ctdlroom *qrbuf, void *data) {
if (server_shutting_down) {
int num_msgs = 0;
long *msglist;
int i;
+
+ // 2023aug30 ajc - old code did another CtdlGetRoom() here. Not only is that redundant,
+ // but for some reason it also made Berkeley DB deadlock after a while. I don't know why.
+
+ // qrbuf is already populated. fetch the list of messages in this room.
num_msgs = CtdlFetchMsgList(qrbuf->QRnumber, &msglist);
+
+ // Identify messages which have NOT yet been seen by the indexer.
if (msglist != NULL) {
for (i=0; i<num_msgs; ++i) {
if (
array_free(messages_to_be_indexed);
-#if 0
- syslog(LOG_DEBUG,
- "fulltext: indexed %d of %d messages (%d%%)", i, ft_num_msgs, ((i*100) / ft_num_msgs));
- last_progress = time(NULL);
-
- // Check to see if we need to quit early
- if (server_shutting_down) {
- syslog(LOG_DEBUG, "fulltext: indexer quitting early");
- ft_newhighest = ft_newmsgs[i];
- break;
- }
-
- begin_critical_section(S_CONTROL);
- CtdlSetConfigLong("MMfulltext", ft_newhighest);
CtdlSetConfigInt("MM_fulltext_wordbreaker", FT_WORDBREAKER_ID);
- end_critical_section(S_CONTROL);
-#endif
syslog(LOG_DEBUG, "fulltext: indexing finished");
is_running = 0;
}
-
void ft_delete_remove(char *room, long msgnum) {
if (room) return;