SEEN-Database: refactor database interface for remembering whether we already aggrega...
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
index 638300feb439abdbee4bb7cb42afa3fbcd825f82..7cc8160fbb09fe172e8f4bbec9224c4560bc1c7f 100644 (file)
@@ -164,7 +164,9 @@ void DeleteRssCfg(void *vptr)
 {
        rss_aggregator *RSSAggr = (rss_aggregator *)vptr;
        AsyncIO *IO = &RSSAggr->IO;
-       EVRSSCM_syslog(LOG_DEBUG, "RSS: destroying\n");
+
+       if (IO->CitContext != NULL)
+               EVRSSCM_syslog(LOG_DEBUG, "RSS: destroying\n");
 
        FreeStrBuf(&RSSAggr->Url);
        FreeStrBuf(&RSSAggr->rooms);
@@ -186,6 +188,7 @@ void DeleteRssCfg(void *vptr)
        }
 
        FreeAsyncIOContents(&RSSAggr->IO);
+       memset(RSSAggr, 0, sizeof(rss_aggregator));
        free(RSSAggr);
 }
 
@@ -195,7 +198,7 @@ eNextState RSSAggregator_Terminate(AsyncIO *IO)
 
        EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n");
 
-
+       StopCurlWatchers(IO);
        UnlinkRSSAggregator(RSSAggr);
        return eAbort;
 }
@@ -207,6 +210,7 @@ eNextState RSSAggregator_TerminateDB(AsyncIO *IO)
        EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n");
 
 
+       StopDBWatchers(&RSSAggr->IO);
        UnlinkRSSAggregator(RSSAggr);
        return eAbort;
 }
@@ -222,15 +226,135 @@ eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO)
 
        EVRSSC_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl);
 
-
+       StopCurlWatchers(IO);
        UnlinkRSSAggregator(RSSAggr);
        return eAbort;
 }
 
+void AppendLink(StrBuf *Message,
+               StrBuf *link,
+               StrBuf *LinkTitle,
+               const char *Title)
+{
+       if (StrLength(link) > 0)
+       {
+               StrBufAppendBufPlain(Message, HKEY("<a href=\""), 0);
+               StrBufAppendBuf(Message, link, 0);
+               StrBufAppendBufPlain(Message, HKEY("\">"), 0);
+               if (StrLength(LinkTitle) > 0)
+                       StrBufAppendBuf(Message, LinkTitle, 0);
+               else if ((Title != NULL) && !IsEmptyStr(Title))
+                       StrBufAppendBufPlain(Message, Title, -1, 0);
+               else
+                       StrBufAppendBuf(Message, link, 0);
+               StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
+       }
+}
+
 
-eNextState AbortNetworkSaveMessage (AsyncIO *IO)
+void rss_format_item(networker_save_message *SaveMsg)
 {
-       return eAbort; ///TODO
+       StrBuf *Message;
+       int msglen = 0;
+
+       if (SaveMsg->author_or_creator != NULL) {
+
+               char *From;
+               StrBuf *Encoded = NULL;
+               int FromAt;
+
+               From = html_to_ascii(ChrPtr(SaveMsg->author_or_creator),
+                                    StrLength(SaveMsg->author_or_creator),
+                                    512, 0);
+               StrBufPlain(SaveMsg->author_or_creator, From, -1);
+               StrBufTrim(SaveMsg->author_or_creator);
+               free(From);
+
+               FromAt = strchr(ChrPtr(SaveMsg->author_or_creator), '@') != NULL;
+               if (!FromAt && StrLength (SaveMsg->author_email) > 0)
+               {
+                       StrBufRFC2047encode(&Encoded, SaveMsg->author_or_creator);
+                       SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded);
+                       SaveMsg->Msg.cm_fields['P'] =
+                               SmashStrBuf(&SaveMsg->author_email);
+               }
+               else
+               {
+                       if (FromAt)
+                       {
+                               SaveMsg->Msg.cm_fields['A'] =
+                                       SmashStrBuf(&SaveMsg->author_or_creator);
+                               SaveMsg->Msg.cm_fields['P'] =
+                                       strdup(SaveMsg->Msg.cm_fields['A']);
+                       }
+                       else
+                       {
+                               StrBufRFC2047encode(&Encoded,
+                                                   SaveMsg->author_or_creator);
+                               SaveMsg->Msg.cm_fields['A'] =
+                                       SmashStrBuf(&Encoded);
+                               SaveMsg->Msg.cm_fields['P'] =
+                                       strdup("rss@localhost");
+
+                       }
+               }
+       }
+       else {
+               SaveMsg->Msg.cm_fields['A'] = strdup("rss");
+       }
+
+       SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME);
+       if (SaveMsg->title != NULL) {
+               long len;
+               char *Sbj;
+               StrBuf *Encoded, *QPEncoded;
+
+               QPEncoded = NULL;
+               StrBufSpaceToBlank(SaveMsg->title);
+               len = StrLength(SaveMsg->title);
+               Sbj = html_to_ascii(ChrPtr(SaveMsg->title), len, 512, 0);
+               len = strlen(Sbj);
+               if ((len > 0) && (Sbj[len - 1] == '\n'))
+               {
+                       len --;
+                       Sbj[len] = '\0';
+               }
+               Encoded = NewStrBufPlain(Sbj, len);
+               free(Sbj);
+
+               StrBufTrim(Encoded);
+               StrBufRFC2047encode(&QPEncoded, Encoded);
+
+               SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded);
+               FreeStrBuf(&Encoded);
+       }
+       if (SaveMsg->link == NULL)
+               SaveMsg->link = NewStrBufPlain(HKEY(""));
+
+#if 0 /* temporarily disable shorter urls. */
+       SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] =
+               GetShorterUrls(SaveMsg->description);
+#endif
+
+       msglen += 1024 + StrLength(SaveMsg->link) + StrLength(SaveMsg->description) ;
+
+       Message = NewStrBufPlain(NULL, msglen);
+
+       StrBufPlain(Message, HKEY(
+                           "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
+                           "<html><body>\n"));
+#if 0 /* disable shorter url for now. */
+       SaveMsg->Msg.cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message);
+#endif
+       StrBufAppendBuf(Message, SaveMsg->description, 0);
+       StrBufAppendBufPlain(Message, HKEY("<br><br>\n"), 0);
+
+       AppendLink(Message, SaveMsg->link, SaveMsg->linkTitle, NULL);
+       AppendLink(Message, SaveMsg->reLink, SaveMsg->reLinkTitle, "Reply to this");
+       StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
+
+
+       SaveMsg->Message = Message;
 }
 
 eNextState RSSSaveMessage(AsyncIO *IO)
@@ -239,16 +363,16 @@ eNextState RSSSaveMessage(AsyncIO *IO)
        const char *Key;
        rss_aggregator *RSSAggr = (rss_aggregator *) IO->Data;
 
+       rss_format_item(RSSAggr->ThisMsg);
+
        RSSAggr->ThisMsg->Msg.cm_fields['M'] =
                SmashStrBuf(&RSSAggr->ThisMsg->Message);
 
        CtdlSubmitMsg(&RSSAggr->ThisMsg->Msg, &RSSAggr->recp, NULL, 0);
 
        /* write the uidl to the use table so we don't store this item again */
-       cdb_store(CDB_USETABLE,
-                 SKEY(RSSAggr->ThisMsg->MsgGUID),
-                 &RSSAggr->ThisMsg->ut,
-                 sizeof(struct UseTable) );
+
+       CheckIfAlreadySeen("RSS Item Insert", RSSAggr->ThisMsg->MsgGUID, IO->Now, 0, eWrite, IO->ID, CCID);
 
        if (GetNextHashPos(RSSAggr->Messages,
                           RSSAggr->Pos,
@@ -263,27 +387,23 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO)
 {
        const char *Key;
        long len;
-       struct cdbdata *cdbut;
        rss_aggregator *Ctx = (rss_aggregator *) IO->Data;
 
        /* Find out if we've already seen this item */
-       strcpy(Ctx->ThisMsg->ut.ut_msgid,
-              ChrPtr(Ctx->ThisMsg->MsgGUID)); /// TODO
-       Ctx->ThisMsg->ut.ut_timestamp = time(NULL);
-
-       cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->ThisMsg->MsgGUID));
+// todo: expiry?
 #ifndef DEBUG_RSS
-       if (cdbut != NULL) {
+       if (CheckIfAlreadySeen("RSS Item Seen",
+                              Ctx->ThisMsg->MsgGUID,
+                              IO->Now,
+                              IO->Now - USETABLE_ANTIEXPIRE,
+                              eCheckUpdate,
+                              IO->ID, CCID)
+           != 0)
+       {
                /* Item has already been seen */
                EVRSSC_syslog(LOG_DEBUG,
                          "%s has already been seen\n",
                          ChrPtr(Ctx->ThisMsg->MsgGUID));
-               cdb_free(cdbut);
-
-               /* rewrite the record anyway, to update the timestamp */
-               cdb_store(CDB_USETABLE,
-                         SKEY(Ctx->ThisMsg->MsgGUID),
-                         &Ctx->ThisMsg->ut, sizeof(struct UseTable) );
 
                if (GetNextHashPos(Ctx->Messages,
                                   Ctx->Pos,
@@ -301,6 +421,88 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO)
                NextDBOperation(IO, RSSSaveMessage);
                return eSendMore;
        }
+       return eSendMore;
+}
+
+eNextState RSSAggregator_AnalyseReply(AsyncIO *IO)
+{
+       u_char rawdigest[MD5_DIGEST_LEN];
+       struct MD5Context md5context;
+       StrBuf *guid;
+       rss_aggregator *Ctx = (rss_aggregator *) IO->Data;
+
+       if (IO->HttpReq.httpcode != 200)
+       {
+               StrBuf *ErrMsg;
+               long lens[2];
+               const char *strs[2];
+
+               ErrMsg = NewStrBuf();
+               EVRSSC_syslog(LOG_ALERT, "need a 200, got a %ld !\n",
+                             IO->HttpReq.httpcode);
+               
+               strs[0] = ChrPtr(Ctx->Url);
+               lens[0] = StrLength(Ctx->Url);
+
+               strs[1] = ChrPtr(Ctx->rooms);
+               lens[1] = StrLength(Ctx->rooms);
+               StrBufPrintf(ErrMsg,
+                            "Error while RSS-Aggregation Run of %s\n"
+                            " need a 200, got a %ld !\n"
+                            " Response text was: \n"
+                            " \n %s\n",
+                            ChrPtr(Ctx->Url),
+                            IO->HttpReq.httpcode,
+                            ChrPtr(IO->HttpReq.ReplyData));
+               CtdlAideFPMessage(
+                       ChrPtr(ErrMsg),
+                       "RSS Aggregation run failure",
+                       2, strs, (long*) &lens,
+                       IO->Now,
+                       IO->ID, CCID);
+               FreeStrBuf(&ErrMsg);
+               return eAbort;
+       }
+
+       MD5Init(&md5context);
+
+       MD5Update(&md5context,
+                 (const unsigned char*)SKEY(IO->HttpReq.ReplyData));
+
+       MD5Update(&md5context,
+                 (const unsigned char*)SKEY(Ctx->Url));
+
+       MD5Final(rawdigest, &md5context);
+       guid = NewStrBufPlain(NULL,
+                             MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
+       StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN);
+       StrBufAppendBufPlain(guid, HKEY("_rssFM"), 0);
+       if (StrLength(guid) > 40)
+               StrBufCutAt(guid, 40, NULL);
+       /* Find out if we've already seen this item */
+
+#ifndef DEBUG_RSS
+
+       if (CheckIfAlreadySeen("RSS Whole",
+                              guid,
+                              IO->Now,
+                              IO->Now - USETABLE_ANTIEXPIRE,
+                              eCheckUpdate,
+                              IO->ID, CCID)
+           != 0)
+       {
+               FreeStrBuf(&guid);
+
+               return eAbort;
+       }
+       FreeStrBuf(&guid);
+#endif
+       return RSSAggregator_ParseReply(IO);
+}
+
+eNextState RSSAggregator_FinishHttp(AsyncIO *IO)
+{
+       return QueueDBOperation(IO, RSSAggregator_AnalyseReply);
 }
 
 /*
@@ -324,7 +526,7 @@ int rss_do_fetching(rss_aggregator *RSSAggr)
        if (! InitcURLIOStruct(&RSSAggr->IO,
                               RSSAggr,
                               "Citadel RSS Client",
-                              RSSAggregator_ParseReply,
+                              RSSAggregator_FinishHttp,
                               RSSAggregator_Terminate,
                               RSSAggregator_TerminateDB,
                               RSSAggregator_ShutdownAbort))
@@ -348,21 +550,12 @@ int rss_do_fetching(rss_aggregator *RSSAggr)
 /*
  * Scan a room's netconfig to determine whether it is requesting any RSS feeds
  */
-void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
+void rssclient_scan_room(struct ctdlroom *qrbuf, void *data, OneRoomNetCfg *OneRNCFG)
 {
-       StrBuf *CfgData=NULL;
-       StrBuf *CfgType;
-       StrBuf *Line;
-       rss_room_counter *Count = NULL;
-       struct stat statbuf;
-       char filename[PATH_MAX];
-       int fd;
-       int Done;
+       const RoomNetCfgLine *pLine;
        rss_aggregator *RSSAggr = NULL;
        rss_aggregator *use_this_RSSAggr = NULL;
        void *vptr;
-       const char *CfgPtr, *lPtr;
-       const char *Err;
 
        pthread_mutex_lock(&RSSQueueMutex);
        if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr))
@@ -376,143 +569,77 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
        }
        pthread_mutex_unlock(&RSSQueueMutex);
 
-       assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir);
-
-       if (server_shutting_down)
-               return;
+       if (server_shutting_down) return;
 
-       /* Only do net processing for rooms that have netconfigs */
-       fd = open(filename, 0);
-       if (fd <= 0) {
-               /* syslog(LOG_DEBUG,
-                  "rssclient: %s no config.\n",
-                  qrbuf->QRname); */
-               return;
-       }
+       pLine = OneRNCFG->NetConfigs[rssclient];
 
-       if (server_shutting_down)
-               return;
-
-       if (fstat(fd, &statbuf) == -1) {
-               EVRSSQ_syslog(LOG_DEBUG,
-                             "ERROR: could not stat configfile '%s' - %s\n",
-                             filename,
-                             strerror(errno));
-               return;
-       }
+       while (pLine != NULL)
+       {
+               const char *lPtr = NULL;
 
-       if (server_shutting_down)
-               return;
+               RSSAggr = (rss_aggregator *) malloc(
+                       sizeof(rss_aggregator));
 
-       CfgData = NewStrBufPlain(NULL, statbuf.st_size + 1);
+               memset (RSSAggr, 0, sizeof(rss_aggregator));
+               RSSAggr->QRnumber = qrbuf->QRnumber;
+               RSSAggr->roomlist_parts = 1;
+               RSSAggr->Url = NewStrBufPlain(NULL, StrLength(pLine->Value[0]));
+               StrBufExtract_NextToken(RSSAggr->Url,
+                                       pLine->Value[0],
+                                       &lPtr,
+                                       '|');
 
-       if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) {
-               close(fd);
-               FreeStrBuf(&CfgData);
-               EVRSSQ_syslog(LOG_ERR, "ERROR: reading config '%s' - %s<br>\n",
-                             filename, strerror(errno));
-               return;
-       }
-       close(fd);
-       if (server_shutting_down)
-               return;
+               pthread_mutex_lock(&RSSQueueMutex);
+               GetHash(RSSFetchUrls,
+                       SKEY(RSSAggr->Url),
+                       &vptr);
 
-       CfgPtr = NULL;
-       CfgType = NewStrBuf();
-       Line = NewStrBufPlain(NULL, StrLength(CfgData));
-       Done = 0;
-       while (!Done)
-       {
-               Done = StrBufSipLine(Line, CfgData, &CfgPtr) == 0;
-               if (StrLength(Line) > 0)
+               use_this_RSSAggr = (rss_aggregator *)vptr;
+               if (use_this_RSSAggr != NULL)
                {
-                       lPtr = NULL;
-                       StrBufExtract_NextToken(CfgType, Line, &lPtr, '|');
-                       if (!strcasecmp("rssclient", ChrPtr(CfgType)))
+                       long *QRnumber;
+                       StrBufAppendBufPlain(
+                               use_this_RSSAggr->rooms,
+                               qrbuf->QRname,
+                               -1, 0);
+                       if (use_this_RSSAggr->roomlist_parts==1)
                        {
-                               if (Count == NULL)
-                               {
-                                       Count = malloc(
-                                               sizeof(rss_room_counter));
-                                       Count->count = 0;
-                               }
-                               Count->count ++;
-                               RSSAggr = (rss_aggregator *) malloc(
-                                       sizeof(rss_aggregator));
-
-                               memset (RSSAggr, 0, sizeof(rss_aggregator));
-                               RSSAggr->QRnumber = qrbuf->QRnumber;
-                               RSSAggr->roomlist_parts = 1;
-                               RSSAggr->Url = NewStrBuf();
-
-                               StrBufExtract_NextToken(RSSAggr->Url,
-                                                       Line,
-                                                       &lPtr,
-                                                       '|');
-
-                               pthread_mutex_lock(&RSSQueueMutex);
-                               GetHash(RSSFetchUrls,
-                                       SKEY(RSSAggr->Url),
-                                       &vptr);
-
-                               use_this_RSSAggr = (rss_aggregator *)vptr;
-                               if (use_this_RSSAggr != NULL)
-                               {
-                                       long *QRnumber;
-                                       StrBufAppendBufPlain(
-                                               use_this_RSSAggr->rooms,
-                                               qrbuf->QRname,
-                                               -1, 0);
-                                       if (use_this_RSSAggr->roomlist_parts==1)
-                                       {
-                                               use_this_RSSAggr->OtherQRnumbers
-                                                       = NewHash(1, lFlathash);
-                                       }
-                                       QRnumber = (long*)malloc(sizeof(long));
-                                       *QRnumber = qrbuf->QRnumber;
-                                       Put(use_this_RSSAggr->OtherQRnumbers,
-                                           LKEY(qrbuf->QRnumber),
-                                           QRnumber,
-                                           NULL);
-                                       use_this_RSSAggr->roomlist_parts++;
-
-                                       pthread_mutex_unlock(&RSSQueueMutex);
-
-                                       FreeStrBuf(&RSSAggr->Url);
-                                       free(RSSAggr);
-                                       RSSAggr = NULL;
-                                       continue;
-                               }
-                               pthread_mutex_unlock(&RSSQueueMutex);
-
-                               RSSAggr->ItemType = RSS_UNSET;
-
-                               RSSAggr->rooms = NewStrBufPlain(
-                                       qrbuf->QRname, -1);
-
-                               pthread_mutex_lock(&RSSQueueMutex);
-
-                               Put(RSSFetchUrls,
-                                   SKEY(RSSAggr->Url),
-                                   RSSAggr,
-                                   DeleteRssCfg);
-
-                               pthread_mutex_unlock(&RSSQueueMutex);
+                               use_this_RSSAggr->OtherQRnumbers
+                                       = NewHash(1, lFlathash);
                        }
+                       QRnumber = (long*)malloc(sizeof(long));
+                       *QRnumber = qrbuf->QRnumber;
+                       Put(use_this_RSSAggr->OtherQRnumbers,
+                           LKEY(qrbuf->QRnumber),
+                           QRnumber,
+                           NULL);
+                       use_this_RSSAggr->roomlist_parts++;
+
+                       pthread_mutex_unlock(&RSSQueueMutex);
+
+                       FreeStrBuf(&RSSAggr->Url);
+                       free(RSSAggr);
+                       RSSAggr = NULL;
+                       pLine = pLine->next;
+                       continue;
                }
-       }
-       if (Count != NULL)
-       {
-               Count->QRnumber = qrbuf->QRnumber;
+               pthread_mutex_unlock(&RSSQueueMutex);
+
+               RSSAggr->ItemType = RSS_UNSET;
+
+               RSSAggr->rooms = NewStrBufPlain(
+                       qrbuf->QRname, -1);
+
                pthread_mutex_lock(&RSSQueueMutex);
-               EVRSSQ_syslog(LOG_DEBUG, "client: [%ld] %s now starting.\n",
-                             qrbuf->QRnumber, qrbuf->QRname);
-               Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL);
+
+               Put(RSSFetchUrls,
+                   SKEY(RSSAggr->Url),
+                   RSSAggr,
+                   DeleteRssCfg);
+
                pthread_mutex_unlock(&RSSQueueMutex);
+               pLine = pLine->next;
        }
-       FreeStrBuf(&CfgData);
-       FreeStrBuf(&CfgType);
-       FreeStrBuf(&Line);
 }
 
 /*
@@ -556,7 +683,7 @@ void rssclient_scan(void) {
 
        become_session(&rss_CC);
        EVRSSQM_syslog(LOG_DEBUG, "rssclient started\n");
-       CtdlForEachRoom(rssclient_scan_room, NULL);
+       CtdlForEachNetCfgRoom(rssclient_scan_room, NULL, rssclient);
 
        pthread_mutex_lock(&RSSQueueMutex);
 
@@ -582,23 +709,27 @@ void rss_cleanup(void)
        DeleteHash(&RSSQueueRooms);
 }
 
-void LogDebugEnableRSSClient(void)
+void LogDebugEnableRSSClient(const int n)
 {
-       RSSClientDebugEnabled = 1;
+       RSSClientDebugEnabled = n;
 }
 
 CTDL_MODULE_INIT(rssclient)
 {
-       if (threading)
+       if (!threading)
        {
-               CtdlFillSystemContext(&rss_CC, "rssclient");
+               CtdlREGISTERRoomCfgType(rssclient, ParseGeneric, 0, 1, SerializeGeneric, DeleteGenericCfgLine); /// todo: implement rss specific parser
                pthread_mutex_init(&RSSQueueMutex, NULL);
                RSSQueueRooms = NewHash(1, lFlathash);
                RSSFetchUrls = NewHash(1, NULL);
                syslog(LOG_INFO, "%s\n", curl_version());
-               CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);
+               CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300);
                CtdlRegisterEVCleanupHook(rss_cleanup);
-               CtdlRegisterDebugFlagHook(HKEY("rssclient"), LogDebugEnableRSSClient);
+               CtdlRegisterDebugFlagHook(HKEY("rssclient"), LogDebugEnableRSSClient, &RSSClientDebugEnabled);
+       }
+       else
+       {
+               CtdlFillSystemContext(&rss_CC, "rssclient");
        }
        return "rssclient";
 }