]> code.citadel.org Git - citadel.git/commitdiff
* Indexer is completed; also began work on the search function itself.
authorArt Cancro <ajc@citadel.org>
Tue, 17 May 2005 20:36:49 +0000 (20:36 +0000)
committerArt Cancro <ajc@citadel.org>
Tue, 17 May 2005 20:36:49 +0000 (20:36 +0000)
  Still need to add de-indexing so deleted messages are removed from index.

citadel/ChangeLog
citadel/ft_wordbreaker.c
citadel/ft_wordbreaker.h
citadel/ft_wordbreaker.o
citadel/serv_fulltext.c
citadel/serv_fulltext.o

index 2ae16a9a407062095094c1a965204c81d094279a..08e13c69a1b24ce52baf6a3503200b9c6f57f9f8 100644 (file)
@@ -1,4 +1,8 @@
  $Log$
+ Revision 647.7  2005/05/17 20:36:48  ajc
+ * Indexer is completed; also began work on the search function itself.
+   Still need to add de-indexing so deleted messages are removed from index.
+
  Revision 647.6  2005/05/17 16:25:23  ajc
  * Completed the wordbreaker for the fulltext indexer.
 
@@ -6705,3 +6709,4 @@ Sat Jul 11 00:20:48 EDT 1998 Nathan Bryant <bryant@cs.usm.maine.edu>
 
 Fri Jul 10 1998 Art Cancro <ajc@uncensored.citadel.org>
        * Initial CVS import
+
index 6c69175d1b93b79066d9fa330bef48d33025dd9b..f330ac573b162dc189eb70c5e990de639cfa1f1a 100644 (file)
@@ -75,40 +75,40 @@ void wordbreaker(char *text, int *num_tokens, int **tokens) {
 
        ptr = text;
        word_start = NULL;
-       while (ptr++, *ptr) {
+       while (*ptr) {
                ch = *ptr;
                if (isalnum(ch)) {
                        if (!word_start) {
                                word_start = ptr;
                        }
                }
-               else {
-                       if (word_start) {
-                               word_end = ptr;
-                               --word_end;
-
-                               /* extract the word */
-                               word_len = word_end - word_start + 1;
-                               safestrncpy(word, word_start, sizeof word);
-                               word[word_len] = 0;
-                               word_start = NULL;
-
-                               /* are we ok with the length? */
-                               if ( (word_len >= WB_MIN)
-                                  && (word_len <= WB_MAX) ) {
-                                       for (i=0; i<word_len; ++i) {
-                                               word[i] = tolower(word[i]);
-                                       }
-                                       word_crc = (int)
-                                               CalcCRC16Bytes(word_len, word);
-
-                                       ++wb_num_tokens;
-                                       if (wb_num_tokens > wb_num_alloc) {
-                                               wb_num_alloc += 512;
-                                               wb_tokens = realloc(wb_tokens, (sizeof(int) * wb_num_alloc));
-                                       }
-                                       wb_tokens[wb_num_tokens - 1] = word_crc;
+               ++ptr;
+               ch = *ptr;
+               if ( (!isalnum(ch)) && (word_start) ) {
+                       word_end = ptr;
+                       --word_end;
+
+                       /* extract the word */
+                       word_len = word_end - word_start + 1;
+                       safestrncpy(word, word_start, sizeof word);
+                       word[word_len] = 0;
+                       word_start = NULL;
+
+                       /* are we ok with the length? */
+                       if ( (word_len >= WB_MIN)
+                          && (word_len <= WB_MAX) ) {
+                               for (i=0; i<word_len; ++i) {
+                                       word[i] = tolower(word[i]);
+                               }
+                               word_crc = (int)
+                                       CalcCRC16Bytes(word_len, word);
+
+                               ++wb_num_tokens;
+                               if (wb_num_tokens > wb_num_alloc) {
+                                       wb_num_alloc += 512;
+                                       wb_tokens = realloc(wb_tokens, (sizeof(int) * wb_num_alloc));
                                }
+                               wb_tokens[wb_num_tokens - 1] = word_crc;
                        }
                }
        }
index 57bd97b7aeb8f0b20773f33f44b4a2855e5044bb..03b2be10e7f2960cb0b28e0e1c3e12d2f3a77758 100644 (file)
@@ -9,7 +9,7 @@
  * later on, or even if we update this one, we can use a different ID so the
  * system knows it needs to throw away the existing index and rebuild it.
  */
-#define        FT_WORDBREAKER_ID       0x0001
+#define        FT_WORDBREAKER_ID       0x0002
 
 /*
  * Minimum and maximum length of words to index
index a90949c621edab9c32d56581ea9dbd7c369d17db..9c10c42c68f8ec3967df2e577f6a30d1cfb85a59 100644 (file)
Binary files a/citadel/ft_wordbreaker.o and b/citadel/ft_wordbreaker.o differ
index 018ca9ab6b711554a0f05ae95de1904337e62733..c47e2cb264076d728a72f030b9e1ddf95c5bb811 100644 (file)
@@ -62,6 +62,7 @@ void ft_index_message(long msgnum, int op) {
        int i;
        struct cdbdata *cdb_bucket;
        int num_msgs;
+       long *msgs;
 
        msg = CtdlFetchMessage(msgnum, 1);
        if (msg == NULL) return;
@@ -75,10 +76,11 @@ void ft_index_message(long msgnum, int op) {
                for (i=0; i<num_tokens; ++i) {
 
                        /* Add the message to the relevant token bucket */
-                       lprintf(CTDL_DEBUG, "msg %ld, token %d\n", msgnum, tokens[i]);
+
+                       /* FIXME do "if op=1" ... */
 
                        /* FIXME lock the file */
-                       cdb_bucket = cdb_fetch(CDB_FULLTEXT, &tokens[i], sizeof(long));
+                       cdb_bucket = cdb_fetch(CDB_FULLTEXT, &tokens[i], sizeof(int));
                        if (cdb_bucket == NULL) {
                                cdb_bucket = malloc(sizeof(struct cdbdata));
                                cdb_bucket->len = 0;
@@ -86,7 +88,13 @@ void ft_index_message(long msgnum, int op) {
                        }
                        num_msgs = cdb_bucket->len / sizeof(long);
 
-                       /* FIXME finish this */
+                       ++num_msgs;
+                       cdb_bucket->ptr = realloc(cdb_bucket->ptr, num_msgs*sizeof(long) );
+                       msgs = (long *) cdb_bucket->ptr;
+                       msgs[num_msgs - 1] = msgnum;
+
+                       cdb_store(CDB_FULLTEXT, &tokens[i], sizeof(int),
+                               cdb_bucket->ptr, num_msgs*sizeof(long) );
 
                        cdb_free(cdb_bucket);
 
@@ -209,15 +217,48 @@ void do_fulltext_indexing(void) {
                ft_newmsgs = NULL;
        }
 
+       /* Save our place so we don't have to do this again */
+       CitControl.MMfulltext = ft_newhighest;
+       CitControl.fulltext_wordbreaker = FT_WORDBREAKER_ID;
+       put_control();
+       last_index = time(NULL);
+
        lprintf(CTDL_DEBUG, "do_fulltext_indexing() finished\n");
        return;
 }
 
 
+/*
+ * Tentative form of our search command
+ */
+void cmd_srch(char *argbuf) {
+       char search_string[256];
+       int num_tokens = 0;
+       int *tokens = NULL;
+       int i;
+
+       if (CtdlAccessCheck(ac_logged_in)) return;
+       extract_token(search_string, argbuf, 0, '|', sizeof search_string);
+       wordbreaker(search_string, &num_tokens, &tokens);
+
+       cprintf("%d msgs matching search words:\n", LISTING_FOLLOWS);
+       if (num_tokens > 0) {
+               for (i=0; i<num_tokens; ++i) {
+
+                       cprintf("FIXME search for token %d\n", tokens[i]);
+
+               }
+               free(tokens);
+       }
+       cprintf("000\n");
+}
+
+
 /*****************************************************************************/
 
 char *serv_fulltext_init(void)
 {
        CtdlRegisterSessionHook(do_fulltext_indexing, EVT_TIMER);
+        CtdlRegisterProtoHook(cmd_srch, "SRCH", "Full text search");
        return "$Id$";
 }
index 935d5092a8565a6dd2b12d1133d1b2db900450e4..4601893f801d6580d0f135fad943aaaeabc03186 100644 (file)
Binary files a/citadel/serv_fulltext.o and b/citadel/serv_fulltext.o differ