X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=82e206792bb77ea32d404e609769be0911341929;hb=222d2f63b0f523ce6b79e4b2ca9767f80e0bf85e;hp=8a3ba63549a5aadacfa2bfd88d0cc25138541a01;hpb=cdc34fa661ed54fdcebf86521496220cc28f4943;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 8a3ba6354..82e206792 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -5,17 +5,11 @@ * * This program is open source software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3. - * - * * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the * GNU General Public License for more details. - * - * - * - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA02111-1307USA */ #include @@ -69,11 +63,35 @@ HashList *RSSQueueRooms = NULL; /* rss_room_counter */ HashList *RSSFetchUrls = NULL; /*->rss_aggregator;->RefCount access locked*/ eNextState RSSAggregator_Terminate(AsyncIO *IO); +eNextState RSSAggregator_TerminateDB(AsyncIO *IO); eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO); struct CitContext rss_CC; struct rssnetcfg *rnclist = NULL; +int RSSClientDebugEnabled = 0; +#define N ((rss_aggregator*)IO->Data)->QRnumber + +#define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSClientDebugEnabled != 0)) + +#define EVRSSC_syslog(LEVEL, FORMAT, ...) \ + DBGLOG(LEVEL) syslog(LEVEL, \ + "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ + IO->ID, CCID, N, __VA_ARGS__) +#define EVRSSCM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, \ + "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ + IO->ID, CCID, N) + +#define EVRSSQ_syslog(LEVEL, FORMAT, ...) \ + DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT, \ + __VA_ARGS__) +#define EVRSSQM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT) + +#define EVRSSCSM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSS" FORMAT, \ + IO->ID, N) void DeleteRoomReference(long QRnumber) { @@ -99,19 +117,19 @@ void DeleteRoomReference(long QRnumber) DeleteHashPos(&At); } -void UnlinkRooms(rss_aggregator *Cfg) +void UnlinkRooms(rss_aggregator *RSSAggr) { - DeleteRoomReference(Cfg->QRnumber); - if (Cfg->OtherQRnumbers != NULL) + DeleteRoomReference(RSSAggr->QRnumber); + if (RSSAggr->OtherQRnumbers != NULL) { long HKLen; const char *HK; HashPos *At; void *vData; - At = GetNewHashPos(Cfg->OtherQRnumbers, 0); + At = GetNewHashPos(RSSAggr->OtherQRnumbers, 0); while (! server_shutting_down && - GetNextHashPos(Cfg->OtherQRnumbers, + GetNextHashPos(RSSAggr->OtherQRnumbers, At, &HKLen, &HK, &vData) && @@ -125,26 +143,28 @@ void UnlinkRooms(rss_aggregator *Cfg) } } -void UnlinkRSSAggregator(rss_aggregator *Cfg) +void UnlinkRSSAggregator(rss_aggregator *RSSAggr) { HashPos *At; - UnlinkRooms(Cfg); + pthread_mutex_lock(&RSSQueueMutex); + UnlinkRooms(RSSAggr); At = GetNewHashPos(RSSFetchUrls, 0); - if (GetHashPosFromKey(RSSFetchUrls, SKEY(Cfg->Url), At)) + if (GetHashPosFromKey(RSSFetchUrls, SKEY(RSSAggr->Url), At)) { DeleteEntryFromHash(RSSFetchUrls, At); } DeleteHashPos(&At); last_run = time(NULL); + pthread_mutex_unlock(&RSSQueueMutex); } void DeleteRssCfg(void *vptr) { rss_aggregator *RSSAggr = (rss_aggregator *)vptr; AsyncIO *IO = &RSSAggr->IO; - EVM_syslog(LOG_DEBUG, "RSS: destroying\n"); + EVRSSCM_syslog(LOG_DEBUG, "RSS: destroying\n"); FreeStrBuf(&RSSAggr->Url); FreeStrBuf(&RSSAggr->rooms); @@ -166,6 +186,7 @@ void DeleteRssCfg(void *vptr) } FreeAsyncIOContents(&RSSAggr->IO); + memset(RSSAggr, 0, sizeof(rss_aggregator)); free(RSSAggr); } @@ -173,9 +194,21 @@ eNextState RSSAggregator_Terminate(AsyncIO *IO) { rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - EVM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + + StopCurlWatchers(IO); + UnlinkRSSAggregator(RSSAggr); + return eAbort; +} + +eNextState RSSAggregator_TerminateDB(AsyncIO *IO) +{ + rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; + + EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + StopDBWatchers(&RSSAggr->IO); UnlinkRSSAggregator(RSSAggr); return eAbort; } @@ -189,17 +222,137 @@ eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO) if (pUrl == NULL) pUrl = ""; - EV_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); - + EVRSSC_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); + StopCurlWatchers(IO); UnlinkRSSAggregator(RSSAggr); return eAbort; } +void AppendLink(StrBuf *Message, + StrBuf *link, + StrBuf *LinkTitle, + const char *Title) +{ + if (StrLength(link) > 0) + { + StrBufAppendBufPlain(Message, HKEY(""), 0); + if (StrLength(LinkTitle) > 0) + StrBufAppendBuf(Message, LinkTitle, 0); + else if ((Title != NULL) && !IsEmptyStr(Title)) + StrBufAppendBufPlain(Message, Title, -1, 0); + else + StrBufAppendBuf(Message, link, 0); + StrBufAppendBufPlain(Message, HKEY("
\n"), 0); + } +} + -eNextState AbortNetworkSaveMessage (AsyncIO *IO) +void rss_format_item(networker_save_message *SaveMsg) { - return eAbort; ///TODO + StrBuf *Message; + int msglen = 0; + + if (SaveMsg->author_or_creator != NULL) { + + char *From; + StrBuf *Encoded = NULL; + int FromAt; + + From = html_to_ascii(ChrPtr(SaveMsg->author_or_creator), + StrLength(SaveMsg->author_or_creator), + 512, 0); + StrBufPlain(SaveMsg->author_or_creator, From, -1); + StrBufTrim(SaveMsg->author_or_creator); + free(From); + + FromAt = strchr(ChrPtr(SaveMsg->author_or_creator), '@') != NULL; + if (!FromAt && StrLength (SaveMsg->author_email) > 0) + { + StrBufRFC2047encode(&Encoded, SaveMsg->author_or_creator); + SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded); + SaveMsg->Msg.cm_fields['P'] = + SmashStrBuf(&SaveMsg->author_email); + } + else + { + if (FromAt) + { + SaveMsg->Msg.cm_fields['A'] = + SmashStrBuf(&SaveMsg->author_or_creator); + SaveMsg->Msg.cm_fields['P'] = + strdup(SaveMsg->Msg.cm_fields['A']); + } + else + { + StrBufRFC2047encode(&Encoded, + SaveMsg->author_or_creator); + SaveMsg->Msg.cm_fields['A'] = + SmashStrBuf(&Encoded); + SaveMsg->Msg.cm_fields['P'] = + strdup("rss@localhost"); + + } + } + } + else { + SaveMsg->Msg.cm_fields['A'] = strdup("rss"); + } + + SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME); + if (SaveMsg->title != NULL) { + long len; + char *Sbj; + StrBuf *Encoded, *QPEncoded; + + QPEncoded = NULL; + StrBufSpaceToBlank(SaveMsg->title); + len = StrLength(SaveMsg->title); + Sbj = html_to_ascii(ChrPtr(SaveMsg->title), len, 512, 0); + len = strlen(Sbj); + if ((len > 0) && (Sbj[len - 1] == '\n')) + { + len --; + Sbj[len] = '\0'; + } + Encoded = NewStrBufPlain(Sbj, len); + free(Sbj); + + StrBufTrim(Encoded); + StrBufRFC2047encode(&QPEncoded, Encoded); + + SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded); + FreeStrBuf(&Encoded); + } + if (SaveMsg->link == NULL) + SaveMsg->link = NewStrBufPlain(HKEY("")); + +#if 0 /* temporarily disable shorter urls. */ + SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] = + GetShorterUrls(SaveMsg->description); +#endif + + msglen += 1024 + StrLength(SaveMsg->link) + StrLength(SaveMsg->description) ; + + Message = NewStrBufPlain(NULL, msglen); + + StrBufPlain(Message, HKEY( + "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" + "\n")); +#if 0 /* disable shorter url for now. */ + SaveMsg->Msg.cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message); +#endif + StrBufAppendBuf(Message, SaveMsg->description, 0); + StrBufAppendBufPlain(Message, HKEY("

\n"), 0); + + AppendLink(Message, SaveMsg->link, SaveMsg->linkTitle, NULL); + AppendLink(Message, SaveMsg->reLink, SaveMsg->reLinkTitle, "Reply to this"); + StrBufAppendBufPlain(Message, HKEY("\n"), 0); + + + SaveMsg->Message = Message; } eNextState RSSSaveMessage(AsyncIO *IO) @@ -208,6 +361,8 @@ eNextState RSSSaveMessage(AsyncIO *IO) const char *Key; rss_aggregator *RSSAggr = (rss_aggregator *) IO->Data; + rss_format_item(RSSAggr->ThisMsg); + RSSAggr->ThisMsg->Msg.cm_fields['M'] = SmashStrBuf(&RSSAggr->ThisMsg->Message); @@ -244,7 +399,7 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) #ifndef DEBUG_RSS if (cdbut != NULL) { /* Item has already been seen */ - EV_syslog(LOG_DEBUG, + EVRSSC_syslog(LOG_DEBUG, "%s has already been seen\n", ChrPtr(Ctx->ThisMsg->MsgGUID)); cdb_free(cdbut); @@ -272,43 +427,129 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) } } +eNextState RSSAggregator_AnalyseReply(AsyncIO *IO) +{ + struct UseTable ut; + u_char rawdigest[MD5_DIGEST_LEN]; + struct MD5Context md5context; + StrBuf *guid; + struct cdbdata *cdbut; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; + + if (IO->HttpReq.httpcode != 200) + { + StrBuf *ErrMsg; + long lens[2]; + const char *strs[2]; + + ErrMsg = NewStrBuf(); + EVRSSC_syslog(LOG_ALERT, "need a 200, got a %ld !\n", + IO->HttpReq.httpcode); + + strs[0] = ChrPtr(Ctx->Url); + lens[0] = StrLength(Ctx->Url); + + strs[1] = ChrPtr(Ctx->rooms); + lens[1] = StrLength(Ctx->rooms); + StrBufPrintf(ErrMsg, + "Error while RSS-Aggregation Run of %s\n" + " need a 200, got a %ld !\n" + " Response text was: \n" + " \n %s\n", + ChrPtr(Ctx->Url), + IO->HttpReq.httpcode, + ChrPtr(IO->HttpReq.ReplyData)); + CtdlAideFPMessage( + ChrPtr(ErrMsg), + "RSS Aggregation run failure", + 2, strs, (long*) &lens); + FreeStrBuf(&ErrMsg); + return eAbort; + } + + MD5Init(&md5context); + + MD5Update(&md5context, + (const unsigned char*)SKEY(IO->HttpReq.ReplyData)); + + MD5Update(&md5context, + (const unsigned char*)SKEY(Ctx->Url)); + + MD5Final(rawdigest, &md5context); + guid = NewStrBufPlain(NULL, + MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); + StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); + StrBufAppendBufPlain(guid, HKEY("_rssFM"), 0); + if (StrLength(guid) > 40) + StrBufCutAt(guid, 40, NULL); + /* Find out if we've already seen this item */ + memcpy(ut.ut_msgid, SKEY(guid)); + ut.ut_timestamp = time(NULL); + + cdbut = cdb_fetch(CDB_USETABLE, SKEY(guid)); +#ifndef DEBUG_RSS + if (cdbut != NULL) { + /* Item has already been seen */ + EVRSSC_syslog(LOG_DEBUG, + "%s has already been seen\n", + ChrPtr(Ctx->Url)); + cdb_free(cdbut); + } + + /* rewrite the record anyway, to update the timestamp */ + cdb_store(CDB_USETABLE, + SKEY(guid), + &ut, sizeof(struct UseTable) ); + FreeStrBuf(&guid); + if (cdbut != NULL) return eAbort; +#endif + return RSSAggregator_ParseReply(IO); +} + +eNextState RSSAggregator_FinishHttp(AsyncIO *IO) +{ + return QueueDBOperation(IO, RSSAggregator_AnalyseReply); +} + /* * Begin a feed parse */ -int rss_do_fetching(rss_aggregator *Cfg) +int rss_do_fetching(rss_aggregator *RSSAggr) { + AsyncIO *IO = &RSSAggr->IO; rss_item *ri; time_t now; now = time(NULL); - if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) + if ((RSSAggr->next_poll != 0) && (now < RSSAggr->next_poll)) return 0; ri = (rss_item*) malloc(sizeof(rss_item)); memset(ri, 0, sizeof(rss_item)); - Cfg->Item = ri; + RSSAggr->Item = ri; - if (! InitcURLIOStruct(&Cfg->IO, - Cfg, + if (! InitcURLIOStruct(&RSSAggr->IO, + RSSAggr, "Citadel RSS Client", - RSSAggregator_ParseReply, + RSSAggregator_FinishHttp, RSSAggregator_Terminate, + RSSAggregator_TerminateDB, RSSAggregator_ShutdownAbort)) { - syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); + EVRSSCM_syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); return 0; } - safestrncpy(((CitContext*)Cfg->IO.CitContext)->cs_host, - ChrPtr(Cfg->Url), - sizeof(((CitContext*)Cfg->IO.CitContext)->cs_host)); + safestrncpy(((CitContext*)RSSAggr->IO.CitContext)->cs_host, + ChrPtr(RSSAggr->Url), + sizeof(((CitContext*)RSSAggr->IO.CitContext)->cs_host)); - syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url)); - ParseURL(&Cfg->IO.ConnectMe, Cfg->Url, 80); - CurlPrepareURL(Cfg->IO.ConnectMe); + EVRSSC_syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(RSSAggr->Url)); + ParseURL(&RSSAggr->IO.ConnectMe, RSSAggr->Url, 80); + CurlPrepareURL(RSSAggr->IO.ConnectMe); - QueueCurlContext(&Cfg->IO); + QueueCurlContext(&RSSAggr->IO); return 1; } @@ -334,10 +575,10 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) pthread_mutex_lock(&RSSQueueMutex); if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr)) { - syslog(LOG_DEBUG, - "rssclient: [%ld] %s already in progress.\n", - qrbuf->QRnumber, - qrbuf->QRname); + EVRSSQ_syslog(LOG_DEBUG, + "rssclient: [%ld] %s already in progress.\n", + qrbuf->QRnumber, + qrbuf->QRname); pthread_mutex_unlock(&RSSQueueMutex); return; } @@ -361,10 +602,10 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) return; if (fstat(fd, &statbuf) == -1) { - syslog(LOG_DEBUG, - "ERROR: could not stat configfile '%s' - %s\n", - filename, - strerror(errno)); + EVRSSQ_syslog(LOG_DEBUG, + "ERROR: could not stat configfile '%s' - %s\n", + filename, + strerror(errno)); return; } @@ -376,8 +617,8 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) { close(fd); FreeStrBuf(&CfgData); - syslog(LOG_DEBUG, "ERROR: reading config '%s' - %s
\n", - filename, strerror(errno)); + EVRSSQ_syslog(LOG_ERR, "ERROR: reading config '%s' - %s
\n", + filename, strerror(errno)); return; } close(fd); @@ -472,8 +713,8 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) { Count->QRnumber = qrbuf->QRnumber; pthread_mutex_lock(&RSSQueueMutex); - syslog(LOG_DEBUG, "rssclient: [%ld] %s now starting.\n", - qrbuf->QRnumber, qrbuf->QRname); + EVRSSQ_syslog(LOG_DEBUG, "client: [%ld] %s now starting.\n", + qrbuf->QRnumber, qrbuf->QRname); Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL); pthread_mutex_unlock(&RSSQueueMutex); } @@ -486,6 +727,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) * Scan for rooms that have RSS client requests configured */ void rssclient_scan(void) { + int RSSRoomCount, RSSCount; rss_aggregator *rptr = NULL; void *vrptr = NULL; HashPos *it; @@ -495,21 +737,33 @@ void rssclient_scan(void) { /* Run no more than once every 15 minutes. */ if ((now - last_run) < 900) { + EVRSSQ_syslog(LOG_DEBUG, + "Client: polling interval not yet reached; last run was %ldm%lds ago", + ((now - last_run) / 60), + ((now - last_run) % 60) + ); return; } /* * This is a simple concurrency check to make sure only one rssclient - * run is done at a time.We could do this with a mutex, but since we - * don't really require extremely fine granularity here, we'll do it - * with a static variable instead. + * run is done at a time. */ + pthread_mutex_lock(&RSSQueueMutex); + RSSCount = GetCount(RSSFetchUrls); + RSSRoomCount = GetCount(RSSQueueRooms); + pthread_mutex_unlock(&RSSQueueMutex); - if ((GetCount(RSSQueueRooms) > 0) || (GetCount(RSSFetchUrls) > 0)) + if ((RSSRoomCount > 0) || (RSSCount > 0)) { + EVRSSQ_syslog(LOG_DEBUG, + "rssclient: concurrency check failed; %d rooms and %d url's are queued", + RSSRoomCount, RSSCount + ); return; + } become_session(&rss_CC); - syslog(LOG_DEBUG, "rssclient started\n"); + EVRSSQM_syslog(LOG_DEBUG, "rssclient started\n"); CtdlForEachRoom(rssclient_scan_room, NULL); pthread_mutex_lock(&RSSQueueMutex); @@ -525,7 +779,7 @@ void rssclient_scan(void) { DeleteHashPos(&it); pthread_mutex_unlock(&RSSQueueMutex); - syslog(LOG_DEBUG, "rssclient ended\n"); + EVRSSQM_syslog(LOG_DEBUG, "rssclient ended\n"); return; } @@ -536,6 +790,10 @@ void rss_cleanup(void) DeleteHash(&RSSQueueRooms); } +void LogDebugEnableRSSClient(const int n) +{ + RSSClientDebugEnabled = n; +} CTDL_MODULE_INIT(rssclient) { @@ -546,8 +804,9 @@ CTDL_MODULE_INIT(rssclient) RSSQueueRooms = NewHash(1, lFlathash); RSSFetchUrls = NewHash(1, NULL); syslog(LOG_INFO, "%s\n", curl_version()); - CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); + CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300); CtdlRegisterEVCleanupHook(rss_cleanup); + CtdlRegisterDebugFlagHook(HKEY("rssclient"), LogDebugEnableRSSClient, &RSSClientDebugEnabled); } return "rssclient"; }