RSS-CLIENT: remember checksum for whole feed, so we can save parsing work if we know...
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
index 638300feb439abdbee4bb7cb42afa3fbcd825f82..68b70a2652c5037997721c950bfe0d53783d6a03 100644 (file)
@@ -186,6 +186,7 @@ void DeleteRssCfg(void *vptr)
        }
 
        FreeAsyncIOContents(&RSSAggr->IO);
+       memset(RSSAggr, 0, sizeof(rss_aggregator));
        free(RSSAggr);
 }
 
@@ -195,7 +196,7 @@ eNextState RSSAggregator_Terminate(AsyncIO *IO)
 
        EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n");
 
-
+       StopCurlWatchers(IO);
        UnlinkRSSAggregator(RSSAggr);
        return eAbort;
 }
@@ -207,6 +208,7 @@ eNextState RSSAggregator_TerminateDB(AsyncIO *IO)
        EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n");
 
 
+       StopDBWatchers(&RSSAggr->IO);
        UnlinkRSSAggregator(RSSAggr);
        return eAbort;
 }
@@ -222,17 +224,11 @@ eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO)
 
        EVRSSC_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl);
 
-
+       StopCurlWatchers(IO);
        UnlinkRSSAggregator(RSSAggr);
        return eAbort;
 }
 
-
-eNextState AbortNetworkSaveMessage (AsyncIO *IO)
-{
-       return eAbort; ///TODO
-}
-
 eNextState RSSSaveMessage(AsyncIO *IO)
 {
        long len;
@@ -303,6 +299,68 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO)
        }
 }
 
+eNextState RSSAggregator_AnalyseReply(AsyncIO *IO)
+{
+       struct UseTable ut;
+       u_char rawdigest[MD5_DIGEST_LEN];
+       struct MD5Context md5context;
+       StrBuf *guid;
+       struct cdbdata *cdbut;
+       rss_aggregator *Ctx = (rss_aggregator *) IO->Data;
+
+       if (IO->HttpReq.httpcode != 200)
+       {
+
+               EVRSSC_syslog(LOG_ALERT, "need a 200, got a %ld !\n",
+                             IO->HttpReq.httpcode);
+// TODO: aide error message with rate limit
+               return eAbort;
+       }
+
+       MD5Init(&md5context);
+
+       MD5Update(&md5context,
+                 (const unsigned char*)SKEY(IO->HttpReq.ReplyData));
+
+       MD5Update(&md5context,
+                 (const unsigned char*)SKEY(Ctx->Url));
+
+       MD5Final(rawdigest, &md5context);
+       guid = NewStrBufPlain(NULL,
+                             MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
+       StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN);
+       StrBufAppendBufPlain(guid, HKEY("_rssFM"), 0);
+       if (StrLength(guid) > 40)
+               StrBufCutAt(guid, 40, NULL);
+       /* Find out if we've already seen this item */
+       memcpy(ut.ut_msgid, SKEY(guid));
+       ut.ut_timestamp = time(NULL);
+
+       cdbut = cdb_fetch(CDB_USETABLE, SKEY(guid));
+#ifndef DEBUG_RSS
+       if (cdbut != NULL) {
+               /* Item has already been seen */
+               EVRSSC_syslog(LOG_DEBUG,
+                             "%s has already been seen\n",
+                             ChrPtr(Ctx->Url));
+               cdb_free(cdbut);
+       }
+
+       /* rewrite the record anyway, to update the timestamp */
+       cdb_store(CDB_USETABLE,
+                 SKEY(guid),
+                 &ut, sizeof(struct UseTable) );
+
+       if (cdbut != NULL) return eAbort;
+#endif
+       return RSSAggregator_ParseReply(IO);
+}
+
+eNextState RSSAggregator_FinishHttp(AsyncIO *IO)
+{
+       return QueueDBOperation(IO, RSSAggregator_AnalyseReply);
+}
+
 /*
  * Begin a feed parse
  */
@@ -324,7 +382,7 @@ int rss_do_fetching(rss_aggregator *RSSAggr)
        if (! InitcURLIOStruct(&RSSAggr->IO,
                               RSSAggr,
                               "Citadel RSS Client",
-                              RSSAggregator_ParseReply,
+                              RSSAggregator_FinishHttp,
                               RSSAggregator_Terminate,
                               RSSAggregator_TerminateDB,
                               RSSAggregator_ShutdownAbort))
@@ -582,9 +640,9 @@ void rss_cleanup(void)
        DeleteHash(&RSSQueueRooms);
 }
 
-void LogDebugEnableRSSClient(void)
+void LogDebugEnableRSSClient(const int n)
 {
-       RSSClientDebugEnabled = 1;
+       RSSClientDebugEnabled = n;
 }
 
 CTDL_MODULE_INIT(rssclient)
@@ -596,9 +654,9 @@ CTDL_MODULE_INIT(rssclient)
                RSSQueueRooms = NewHash(1, lFlathash);
                RSSFetchUrls = NewHash(1, NULL);
                syslog(LOG_INFO, "%s\n", curl_version());
-               CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);
+               CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300);
                CtdlRegisterEVCleanupHook(rss_cleanup);
-               CtdlRegisterDebugFlagHook(HKEY("rssclient"), LogDebugEnableRSSClient);
+               CtdlRegisterDebugFlagHook(HKEY("rssclient"), LogDebugEnableRSSClient, &RSSClientDebugEnabled);
        }
        return "rssclient";
 }