X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=68b70a2652c5037997721c950bfe0d53783d6a03;hb=e296d96701baded9253071b142887af824a1db84;hp=c6f065fa9ca7497a1a38c79a13edc57a3b736397;hpb=66f72c07b70ed9500c49e8ff3c3f895e5269d339;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index c6f065fa9..68b70a265 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -1,21 +1,15 @@ /* * Bring external RSS feeds into rooms. * - * Copyright (c) 2007-2010 by the citadel.org team + * Copyright (c) 2007-2012 by the citadel.org team * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA02111-1307USA */ #include @@ -69,11 +63,35 @@ HashList *RSSQueueRooms = NULL; /* rss_room_counter */ HashList *RSSFetchUrls = NULL; /*->rss_aggregator;->RefCount access locked*/ eNextState RSSAggregator_Terminate(AsyncIO *IO); +eNextState RSSAggregator_TerminateDB(AsyncIO *IO); eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO); struct CitContext rss_CC; struct rssnetcfg *rnclist = NULL; +int RSSClientDebugEnabled = 0; +#define N ((rss_aggregator*)IO->Data)->QRnumber + +#define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSClientDebugEnabled != 0)) + +#define EVRSSC_syslog(LEVEL, FORMAT, ...) \ + DBGLOG(LEVEL) syslog(LEVEL, \ + "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ + IO->ID, CCID, N, __VA_ARGS__) + +#define EVRSSCM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, \ + "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ + IO->ID, CCID, N) +#define EVRSSQ_syslog(LEVEL, FORMAT, ...) \ + DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT, \ + __VA_ARGS__) +#define EVRSSQM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT) + +#define EVRSSCSM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSS" FORMAT, \ + IO->ID, N) void DeleteRoomReference(long QRnumber) { @@ -99,19 +117,19 @@ void DeleteRoomReference(long QRnumber) DeleteHashPos(&At); } -void UnlinkRooms(rss_aggregator *Cfg) +void UnlinkRooms(rss_aggregator *RSSAggr) { - DeleteRoomReference(Cfg->QRnumber); - if (Cfg->OtherQRnumbers != NULL) + DeleteRoomReference(RSSAggr->QRnumber); + if (RSSAggr->OtherQRnumbers != NULL) { long HKLen; const char *HK; HashPos *At; void *vData; - At = GetNewHashPos(Cfg->OtherQRnumbers, 0); + At = GetNewHashPos(RSSAggr->OtherQRnumbers, 0); while (! server_shutting_down && - GetNextHashPos(Cfg->OtherQRnumbers, + GetNextHashPos(RSSAggr->OtherQRnumbers, At, &HKLen, &HK, &vData) && @@ -125,26 +143,28 @@ void UnlinkRooms(rss_aggregator *Cfg) } } -void UnlinkRSSAggregator(rss_aggregator *Cfg) +void UnlinkRSSAggregator(rss_aggregator *RSSAggr) { HashPos *At; - UnlinkRooms(Cfg); + pthread_mutex_lock(&RSSQueueMutex); + UnlinkRooms(RSSAggr); At = GetNewHashPos(RSSFetchUrls, 0); - if (GetHashPosFromKey(RSSFetchUrls, SKEY(Cfg->Url), At)) + if (GetHashPosFromKey(RSSFetchUrls, SKEY(RSSAggr->Url), At)) { DeleteEntryFromHash(RSSFetchUrls, At); } DeleteHashPos(&At); last_run = time(NULL); + pthread_mutex_unlock(&RSSQueueMutex); } void DeleteRssCfg(void *vptr) { rss_aggregator *RSSAggr = (rss_aggregator *)vptr; AsyncIO *IO = &RSSAggr->IO; - EVM_syslog(LOG_DEBUG, "RSS: destroying\n"); + EVRSSCM_syslog(LOG_DEBUG, "RSS: destroying\n"); FreeStrBuf(&RSSAggr->Url); FreeStrBuf(&RSSAggr->rooms); @@ -166,6 +186,7 @@ void DeleteRssCfg(void *vptr) } FreeAsyncIOContents(&RSSAggr->IO); + memset(RSSAggr, 0, sizeof(rss_aggregator)); free(RSSAggr); } @@ -173,9 +194,21 @@ eNextState RSSAggregator_Terminate(AsyncIO *IO) { rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - EVM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + + StopCurlWatchers(IO); + UnlinkRSSAggregator(RSSAggr); + return eAbort; +} + +eNextState RSSAggregator_TerminateDB(AsyncIO *IO) +{ + rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; + EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + + StopDBWatchers(&RSSAggr->IO); UnlinkRSSAggregator(RSSAggr); return eAbort; } @@ -189,19 +222,13 @@ eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO) if (pUrl == NULL) pUrl = ""; - EV_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); - + EVRSSC_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); + StopCurlWatchers(IO); UnlinkRSSAggregator(RSSAggr); return eAbort; } - -eNextState AbortNetworkSaveMessage (AsyncIO *IO) -{ - return eAbort; ///TODO -} - eNextState RSSSaveMessage(AsyncIO *IO) { long len; @@ -244,7 +271,7 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) #ifndef DEBUG_RSS if (cdbut != NULL) { /* Item has already been seen */ - EV_syslog(LOG_DEBUG, + EVRSSC_syslog(LOG_DEBUG, "%s has already been seen\n", ChrPtr(Ctx->ThisMsg->MsgGUID)); cdb_free(cdbut); @@ -272,43 +299,107 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) } } +eNextState RSSAggregator_AnalyseReply(AsyncIO *IO) +{ + struct UseTable ut; + u_char rawdigest[MD5_DIGEST_LEN]; + struct MD5Context md5context; + StrBuf *guid; + struct cdbdata *cdbut; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; + + if (IO->HttpReq.httpcode != 200) + { + + EVRSSC_syslog(LOG_ALERT, "need a 200, got a %ld !\n", + IO->HttpReq.httpcode); +// TODO: aide error message with rate limit + return eAbort; + } + + MD5Init(&md5context); + + MD5Update(&md5context, + (const unsigned char*)SKEY(IO->HttpReq.ReplyData)); + + MD5Update(&md5context, + (const unsigned char*)SKEY(Ctx->Url)); + + MD5Final(rawdigest, &md5context); + guid = NewStrBufPlain(NULL, + MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); + StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); + StrBufAppendBufPlain(guid, HKEY("_rssFM"), 0); + if (StrLength(guid) > 40) + StrBufCutAt(guid, 40, NULL); + /* Find out if we've already seen this item */ + memcpy(ut.ut_msgid, SKEY(guid)); + ut.ut_timestamp = time(NULL); + + cdbut = cdb_fetch(CDB_USETABLE, SKEY(guid)); +#ifndef DEBUG_RSS + if (cdbut != NULL) { + /* Item has already been seen */ + EVRSSC_syslog(LOG_DEBUG, + "%s has already been seen\n", + ChrPtr(Ctx->Url)); + cdb_free(cdbut); + } + + /* rewrite the record anyway, to update the timestamp */ + cdb_store(CDB_USETABLE, + SKEY(guid), + &ut, sizeof(struct UseTable) ); + + if (cdbut != NULL) return eAbort; +#endif + return RSSAggregator_ParseReply(IO); +} + +eNextState RSSAggregator_FinishHttp(AsyncIO *IO) +{ + return QueueDBOperation(IO, RSSAggregator_AnalyseReply); +} + /* * Begin a feed parse */ -int rss_do_fetching(rss_aggregator *Cfg) +int rss_do_fetching(rss_aggregator *RSSAggr) { + AsyncIO *IO = &RSSAggr->IO; rss_item *ri; time_t now; now = time(NULL); - if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) + if ((RSSAggr->next_poll != 0) && (now < RSSAggr->next_poll)) return 0; ri = (rss_item*) malloc(sizeof(rss_item)); memset(ri, 0, sizeof(rss_item)); - Cfg->Item = ri; + RSSAggr->Item = ri; - if (! InitcURLIOStruct(&Cfg->IO, - Cfg, + if (! InitcURLIOStruct(&RSSAggr->IO, + RSSAggr, "Citadel RSS Client", - RSSAggregator_ParseReply, + RSSAggregator_FinishHttp, RSSAggregator_Terminate, + RSSAggregator_TerminateDB, RSSAggregator_ShutdownAbort)) { - syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); + EVRSSCM_syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); return 0; } - safestrncpy(((CitContext*)Cfg->IO.CitContext)->cs_host, - ChrPtr(Cfg->Url), - sizeof(((CitContext*)Cfg->IO.CitContext)->cs_host)); + safestrncpy(((CitContext*)RSSAggr->IO.CitContext)->cs_host, + ChrPtr(RSSAggr->Url), + sizeof(((CitContext*)RSSAggr->IO.CitContext)->cs_host)); - syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url)); - ParseURL(&Cfg->IO.ConnectMe, Cfg->Url, 80); - CurlPrepareURL(Cfg->IO.ConnectMe); + EVRSSC_syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(RSSAggr->Url)); + ParseURL(&RSSAggr->IO.ConnectMe, RSSAggr->Url, 80); + CurlPrepareURL(RSSAggr->IO.ConnectMe); - QueueCurlContext(&Cfg->IO); + QueueCurlContext(&RSSAggr->IO); return 1; } @@ -334,10 +425,10 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) pthread_mutex_lock(&RSSQueueMutex); if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr)) { - syslog(LOG_DEBUG, - "rssclient: [%ld] %s already in progress.\n", - qrbuf->QRnumber, - qrbuf->QRname); + EVRSSQ_syslog(LOG_DEBUG, + "rssclient: [%ld] %s already in progress.\n", + qrbuf->QRnumber, + qrbuf->QRname); pthread_mutex_unlock(&RSSQueueMutex); return; } @@ -361,10 +452,10 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) return; if (fstat(fd, &statbuf) == -1) { - syslog(LOG_DEBUG, - "ERROR: could not stat configfile '%s' - %s\n", - filename, - strerror(errno)); + EVRSSQ_syslog(LOG_DEBUG, + "ERROR: could not stat configfile '%s' - %s\n", + filename, + strerror(errno)); return; } @@ -376,8 +467,8 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) { close(fd); FreeStrBuf(&CfgData); - syslog(LOG_DEBUG, "ERROR: reading config '%s' - %s
\n", - filename, strerror(errno)); + EVRSSQ_syslog(LOG_ERR, "ERROR: reading config '%s' - %s
\n", + filename, strerror(errno)); return; } close(fd); @@ -408,6 +499,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) sizeof(rss_aggregator)); memset (RSSAggr, 0, sizeof(rss_aggregator)); + RSSAggr->QRnumber = qrbuf->QRnumber; RSSAggr->roomlist_parts = 1; RSSAggr->Url = NewStrBuf(); @@ -471,8 +563,8 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) { Count->QRnumber = qrbuf->QRnumber; pthread_mutex_lock(&RSSQueueMutex); - syslog(LOG_DEBUG, "rssclient: [%ld] %s now starting.\n", - qrbuf->QRnumber, qrbuf->QRname); + EVRSSQ_syslog(LOG_DEBUG, "client: [%ld] %s now starting.\n", + qrbuf->QRnumber, qrbuf->QRname); Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL); pthread_mutex_unlock(&RSSQueueMutex); } @@ -485,31 +577,43 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) * Scan for rooms that have RSS client requests configured */ void rssclient_scan(void) { - static int doing_rssclient = 0; + int RSSRoomCount, RSSCount; rss_aggregator *rptr = NULL; void *vrptr = NULL; HashPos *it; long len; const char *Key; + time_t now = time(NULL); /* Run no more than once every 15 minutes. */ - if ((time(NULL) - last_run) < 900) { + if ((now - last_run) < 900) { + EVRSSQ_syslog(LOG_DEBUG, + "Client: polling interval not yet reached; last run was %ldm%lds ago", + ((now - last_run) / 60), + ((now - last_run) % 60) + ); return; } /* * This is a simple concurrency check to make sure only one rssclient - * run is done at a time.We could do this with a mutex, but since we - * don't really require extremely fine granularity here, we'll do it - * with a static variable instead. + * run is done at a time. */ - if (doing_rssclient) return; - doing_rssclient = 1; - if ((GetCount(RSSQueueRooms) > 0) || (GetCount(RSSFetchUrls) > 0)) + pthread_mutex_lock(&RSSQueueMutex); + RSSCount = GetCount(RSSFetchUrls); + RSSRoomCount = GetCount(RSSQueueRooms); + pthread_mutex_unlock(&RSSQueueMutex); + + if ((RSSRoomCount > 0) || (RSSCount > 0)) { + EVRSSQ_syslog(LOG_DEBUG, + "rssclient: concurrency check failed; %d rooms and %d url's are queued", + RSSRoomCount, RSSCount + ); return; + } become_session(&rss_CC); - syslog(LOG_DEBUG, "rssclient started\n"); + EVRSSQM_syslog(LOG_DEBUG, "rssclient started\n"); CtdlForEachRoom(rssclient_scan_room, NULL); pthread_mutex_lock(&RSSQueueMutex); @@ -525,8 +629,7 @@ void rssclient_scan(void) { DeleteHashPos(&it); pthread_mutex_unlock(&RSSQueueMutex); - syslog(LOG_DEBUG, "rssclient ended\n"); - doing_rssclient = 0; + EVRSSQM_syslog(LOG_DEBUG, "rssclient ended\n"); return; } @@ -537,6 +640,10 @@ void rss_cleanup(void) DeleteHash(&RSSQueueRooms); } +void LogDebugEnableRSSClient(const int n) +{ + RSSClientDebugEnabled = n; +} CTDL_MODULE_INIT(rssclient) { @@ -547,8 +654,9 @@ CTDL_MODULE_INIT(rssclient) RSSQueueRooms = NewHash(1, lFlathash); RSSFetchUrls = NewHash(1, NULL); syslog(LOG_INFO, "%s\n", curl_version()); - CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); - CtdlRegisterCleanupHook(rss_cleanup); + CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300); + CtdlRegisterEVCleanupHook(rss_cleanup); + CtdlRegisterDebugFlagHook(HKEY("rssclient"), LogDebugEnableRSSClient, &RSSClientDebugEnabled); } return "rssclient"; }