X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=5f1357603df48ffdb838948c6f301a4e5fa308a6;hb=15fc2bf9cd4d2a34fd91aa16c4f632ee46e72dd8;hp=530bed03fe6f689b8bb96eb6af547744f980f33c;hpb=c855d497545dad80942a194624c111a54cd1fdc7;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 530bed03f..5f1357603 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -1,21 +1,15 @@ /* * Bring external RSS feeds into rooms. * - * Copyright (c) 2007-2010 by the citadel.org team + * Copyright (c) 2007-2012 by the citadel.org team * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -27,9 +21,9 @@ # include #else # if HAVE_SYS_TIME_H -# include +#include # else -# include +#include # endif #endif @@ -69,31 +63,58 @@ HashList *RSSQueueRooms = NULL; /* rss_room_counter */ HashList *RSSFetchUrls = NULL; /*->rss_aggregator;->RefCount access locked*/ eNextState RSSAggregator_Terminate(AsyncIO *IO); +eNextState RSSAggregator_TerminateDB(AsyncIO *IO); eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO); struct CitContext rss_CC; struct rssnetcfg *rnclist = NULL; -void AppendLink(StrBuf *Message, - StrBuf *link, - StrBuf *LinkTitle, - const char *Title) +int RSSClientDebugEnabled = 0; +#define N ((rss_aggregator*)IO->Data)->Cfg.QRnumber + +#define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSClientDebugEnabled != 0)) + +#define EVRSSC_syslog(LEVEL, FORMAT, ...) \ + DBGLOG(LEVEL) syslog(LEVEL, \ + "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ + IO->ID, CCID, N, __VA_ARGS__) + +#define EVRSSCM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, \ + "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ + IO->ID, CCID, N) + +#define EVRSSQ_syslog(LEVEL, FORMAT, ...) \ + DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT, \ + __VA_ARGS__) +#define EVRSSQM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT) + +#define EVRSSCSM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSS" FORMAT, \ + IO->ID, N) + +typedef enum _RSSState { + eRSSCreated, + eRSSFetching, + eRSSFailure, + eRSSParsing, + eRSSUT +} RSSState; +ConstStr RSSStates[] = { + {HKEY("Aggregator created")}, + {HKEY("Fetching content")}, + {HKEY("Failed")}, + {HKEY("parsing content")}, + {HKEY("checking usetable")} +}; + +static void SetRSSState(AsyncIO *IO, RSSState State) { - if (StrLength(link) > 0) - { - StrBufAppendBufPlain(Message, HKEY(""), 0); - if (StrLength(LinkTitle) > 0) - StrBufAppendBuf(Message, LinkTitle, 0); - else if ((Title != NULL) && !IsEmptyStr(Title)) - StrBufAppendBufPlain(Message, Title, -1, 0); - else - StrBufAppendBuf(Message, link, 0); - StrBufAppendBufPlain(Message, HKEY("
\n"), 0); - } + CitContext* CCC = IO->CitContext; + if (CCC != NULL) + memcpy(CCC->cs_clientname, RSSStates[State].Key, RSSStates[State].len + 1); } - void DeleteRoomReference(long QRnumber) { HashPos *At; @@ -118,230 +139,186 @@ void DeleteRoomReference(long QRnumber) DeleteHashPos(&At); } -void UnlinkRooms(rss_aggregator *Cfg) +void UnlinkRooms(rss_aggregator *RSSAggr) { - DeleteRoomReference(Cfg->QRnumber); - if (Cfg->OtherQRnumbers != NULL) + DeleteRoomReference(RSSAggr->Cfg.QRnumber); + if (RSSAggr->OtherQRnumbers != NULL) { long HKLen; const char *HK; HashPos *At; void *vData; - At = GetNewHashPos(Cfg->OtherQRnumbers, 0); + At = GetNewHashPos(RSSAggr->OtherQRnumbers, 0); while (! server_shutting_down && - GetNextHashPos(Cfg->OtherQRnumbers, + GetNextHashPos(RSSAggr->OtherQRnumbers, At, &HKLen, &HK, &vData) && (vData != NULL)) { - long *lData = (long*) vData; - DeleteRoomReference(*lData); + pRSSConfig *Data = (pRSSConfig*) vData; + DeleteRoomReference(Data->QRnumber); } DeleteHashPos(&At); } } -void UnlinkRSSAggregator(rss_aggregator *Cfg) +void UnlinkRSSAggregator(rss_aggregator *RSSAggr) { HashPos *At; - UnlinkRooms(Cfg); + pthread_mutex_lock(&RSSQueueMutex); + UnlinkRooms(RSSAggr); At = GetNewHashPos(RSSFetchUrls, 0); - if (GetHashPosFromKey(RSSFetchUrls, SKEY(Cfg->Url), At)) + if (GetHashPosFromKey(RSSFetchUrls, SKEY(RSSAggr->Url), At)) { DeleteEntryFromHash(RSSFetchUrls, At); } DeleteHashPos(&At); last_run = time(NULL); + pthread_mutex_unlock(&RSSQueueMutex); } -void FreeNetworkSaveMessage (void *vMsg) +void DeleteRssCfg(void *vptr) { - networker_save_message *Msg = (networker_save_message *) vMsg; + rss_aggregator *RSSAggr = (rss_aggregator *)vptr; + AsyncIO *IO = &RSSAggr->IO; + + if (IO->CitContext != NULL) + EVRSSCM_syslog(LOG_DEBUG, "RSS: destroying\n"); + + FreeStrBuf(&RSSAggr->Url); + FreeStrBuf(&RSSAggr->rooms); + FreeStrBuf(&RSSAggr->CData); + FreeStrBuf(&RSSAggr->Key); + DeleteHash(&RSSAggr->OtherQRnumbers); + + DeleteHashPos (&RSSAggr->Pos); + DeleteHash (&RSSAggr->Messages); + if (RSSAggr->recp.recp_room != NULL) + free(RSSAggr->recp.recp_room); + + + if (RSSAggr->Item != NULL) + { + flush_rss_item(RSSAggr->Item); + + free(RSSAggr->Item); + } - CtdlFreeMessageContents(&Msg->Msg); - FreeStrBuf(&Msg->Message); - FreeStrBuf(&Msg->MsgGUID); - free(Msg); + FreeAsyncIOContents(&RSSAggr->IO); + memset(RSSAggr, 0, sizeof(rss_aggregator)); + free(RSSAggr); } -eNextState AbortNetworkSaveMessage (AsyncIO *IO) +eNextState RSSAggregator_Terminate(AsyncIO *IO) { - return eAbort; ///TODO + rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; + + EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + + StopCurlWatchers(IO); + UnlinkRSSAggregator(RSSAggr); + return eAbort; } -eNextState RSSSaveMessage(AsyncIO *IO) +eNextState RSSAggregator_TerminateDB(AsyncIO *IO) { - long len; - const char *Key; - rss_aggregator *Ctx = (rss_aggregator *) IO->Data; + rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - Ctx->ThisMsg->Msg.cm_fields['M'] = SmashStrBuf(&Ctx->ThisMsg->Message); + EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); - CtdlSubmitMsg(&Ctx->ThisMsg->Msg, &Ctx->recp, NULL, 0); - - /* write the uidl to the use table so we don't store this item again */ - cdb_store(CDB_USETABLE, - SKEY(Ctx->ThisMsg->MsgGUID), - &Ctx->ThisMsg->ut, - sizeof(struct UseTable) ); - if (GetNextHashPos(Ctx->Messages, - Ctx->Pos, - &len, &Key, - (void**) &Ctx->ThisMsg)) - return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry); - else - return eAbort; + StopDBWatchers(&RSSAggr->IO); + UnlinkRSSAggregator(RSSAggr); + return eAbort; } -eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) +eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO) { - const char *Key; - long len; - struct cdbdata *cdbut; - rss_aggregator *Ctx = (rss_aggregator *) IO->Data; + const char *pUrl; + rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - /* Find out if we've already seen this item */ - strcpy(Ctx->ThisMsg->ut.ut_msgid, - ChrPtr(Ctx->ThisMsg->MsgGUID)); /// TODO - Ctx->ThisMsg->ut.ut_timestamp = time(NULL); + pUrl = IO->ConnectMe->PlainUrl; + if (pUrl == NULL) + pUrl = ""; - cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->ThisMsg->MsgGUID)); -#ifndef DEBUG_RSS - if (cdbut != NULL) { - /* Item has already been seen */ - EV_syslog(LOG_DEBUG, - "%s has already been seen\n", - ChrPtr(Ctx->ThisMsg->MsgGUID)); - cdb_free(cdbut); + EVRSSC_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); - /* rewrite the record anyway, to update the timestamp */ - cdb_store(CDB_USETABLE, - SKEY(Ctx->ThisMsg->MsgGUID), - &Ctx->ThisMsg->ut, sizeof(struct UseTable) ); + StopCurlWatchers(IO); + UnlinkRSSAggregator(RSSAggr); + return eAbort; +} - if (GetNextHashPos(Ctx->Messages, - Ctx->Pos, - &len, &Key, - (void**) &Ctx->ThisMsg)) - return NextDBOperation( - IO, - RSS_FetchNetworkUsetableEntry); - else - return eAbort; - } - else -#endif +void AppendLink(StrBuf *Message, + StrBuf *link, + StrBuf *LinkTitle, + const char *Title) +{ + if (StrLength(link) > 0) { - NextDBOperation(IO, RSSSaveMessage); - return eSendMore; + StrBufAppendBufPlain(Message, HKEY(""), 0); + if (StrLength(LinkTitle) > 0) + StrBufAppendBuf(Message, LinkTitle, 0); + else if ((Title != NULL) && !IsEmptyStr(Title)) + StrBufAppendBufPlain(Message, Title, -1, 0); + else + StrBufAppendBuf(Message, link, 0); + StrBufAppendBufPlain(Message, HKEY("
\n"), 0); } } -/* - * Commit a fetched and parsed RSS item to disk - */ -void rss_save_item(rss_item *ri, rss_aggregator *Cfg) + +void rss_format_item(networker_save_message *SaveMsg) { - networker_save_message *SaveMsg; - struct MD5Context md5context; - u_char rawdigest[MD5_DIGEST_LEN]; - int msglen = 0; StrBuf *Message; - StrBuf *guid; - AsyncIO *IO = &Cfg->IO; - int n; - - - SaveMsg = (networker_save_message *) malloc( - sizeof(networker_save_message)); - memset(SaveMsg, 0, sizeof(networker_save_message)); - - /* Construct a GUID to use in the S_USETABLE table. - * If one is not present in the item itself, make one up. - */ - if (ri->guid != NULL) { - StrBufSpaceToBlank(ri->guid); - StrBufTrim(ri->guid); - guid = NewStrBufPlain(HKEY("rss/")); - StrBufAppendBuf(guid, ri->guid, 0); - } - else { - MD5Init(&md5context); - if (ri->title != NULL) { - MD5Update(&md5context, - (const unsigned char*)SKEY(ri->title)); - } - if (ri->link != NULL) { - MD5Update(&md5context, - (const unsigned char*)SKEY(ri->link)); - } - MD5Final(rawdigest, &md5context); - guid = NewStrBufPlain(NULL, - MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); - StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); - StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0); - } - - /* translate Item into message. */ - EVM_syslog(LOG_DEBUG, "RSS: translating item...\n"); - if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); - StrBufSpaceToBlank(ri->description); - SaveMsg->Msg.cm_magic = CTDLMESSAGE_MAGIC; - SaveMsg->Msg.cm_anon_type = MES_NORMAL; - SaveMsg->Msg.cm_format_type = FMT_RFC822; + int msglen = 0; - if (ri->guid != NULL) { - SaveMsg->Msg.cm_fields['E'] = strdup(ChrPtr(ri->guid)); - } + if (SaveMsg->author_or_creator != NULL) { - if (ri->author_or_creator != NULL) { char *From; StrBuf *Encoded = NULL; int FromAt; - From = html_to_ascii(ChrPtr(ri->author_or_creator), - StrLength(ri->author_or_creator), + From = html_to_ascii(ChrPtr(SaveMsg->author_or_creator), + StrLength(SaveMsg->author_or_creator), 512, 0); - StrBufPlain(ri->author_or_creator, From, -1); - StrBufTrim(ri->author_or_creator); + StrBufPlain(SaveMsg->author_or_creator, From, -1); + StrBufTrim(SaveMsg->author_or_creator); free(From); - FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL; - if (!FromAt && StrLength (ri->author_email) > 0) + FromAt = strchr(ChrPtr(SaveMsg->author_or_creator), '@') != NULL; + if (!FromAt && StrLength (SaveMsg->author_email) > 0) { - StrBufRFC2047encode(&Encoded, ri->author_or_creator); + StrBufRFC2047encode(&Encoded, SaveMsg->author_or_creator); SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded); SaveMsg->Msg.cm_fields['P'] = - SmashStrBuf(&ri->author_email); + SmashStrBuf(&SaveMsg->author_email); } else { if (FromAt) { SaveMsg->Msg.cm_fields['A'] = - SmashStrBuf(&ri->author_or_creator); + SmashStrBuf(&SaveMsg->author_or_creator); SaveMsg->Msg.cm_fields['P'] = strdup(SaveMsg->Msg.cm_fields['A']); } else { StrBufRFC2047encode(&Encoded, - ri->author_or_creator); + SaveMsg->author_or_creator); SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded); SaveMsg->Msg.cm_fields['P'] = strdup("rss@localhost"); } - if (ri->pubdate <= 0) { - ri->pubdate = time(NULL); - } } } else { @@ -349,17 +326,17 @@ void rss_save_item(rss_item *ri, rss_aggregator *Cfg) } SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME); - if (ri->title != NULL) { + if (SaveMsg->title != NULL) { long len; char *Sbj; StrBuf *Encoded, *QPEncoded; QPEncoded = NULL; - StrBufSpaceToBlank(ri->title); - len = StrLength(ri->title); - Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); + StrBufSpaceToBlank(SaveMsg->title); + len = StrLength(SaveMsg->title); + Sbj = html_to_ascii(ChrPtr(SaveMsg->title), len, 512, 0); len = strlen(Sbj); - if (Sbj[len - 1] == '\n') + if ((len > 0) && (Sbj[len - 1] == '\n')) { len --; Sbj[len] = '\0'; @@ -373,25 +350,17 @@ void rss_save_item(rss_item *ri, rss_aggregator *Cfg) SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded); FreeStrBuf(&Encoded); } - SaveMsg->Msg.cm_fields['T'] = malloc(64); - snprintf(SaveMsg->Msg.cm_fields['T'], 64, "%ld", ri->pubdate); - if (ri->channel_title != NULL) { - if (StrLength(ri->channel_title) > 0) { - SaveMsg->Msg.cm_fields['O'] = - strdup(ChrPtr(ri->channel_title)); - } - } - if (ri->link == NULL) - ri->link = NewStrBufPlain(HKEY("")); + if (SaveMsg->link == NULL) + SaveMsg->link = NewStrBufPlain(HKEY("")); #if 0 /* temporarily disable shorter urls. */ SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] = - GetShorterUrls(ri->description); + GetShorterUrls(SaveMsg->description); #endif - msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; + msglen += 1024 + StrLength(SaveMsg->link) + StrLength(SaveMsg->description) ; - Message = NewStrBufPlain(NULL, StrLength(ri->description)); + Message = NewStrBufPlain(NULL, msglen); StrBufPlain(Message, HKEY( "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" @@ -399,326 +368,432 @@ void rss_save_item(rss_item *ri, rss_aggregator *Cfg) #if 0 /* disable shorter url for now. */ SaveMsg->Msg.cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message); #endif - StrBufAppendBuf(Message, ri->description, 0); + StrBufAppendBuf(Message, SaveMsg->description, 0); StrBufAppendBufPlain(Message, HKEY("

\n"), 0); - AppendLink(Message, ri->link, ri->linkTitle, NULL); - AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this"); + AppendLink(Message, SaveMsg->link, SaveMsg->linkTitle, NULL); + AppendLink(Message, SaveMsg->reLink, SaveMsg->reLinkTitle, "Reply to this"); StrBufAppendBufPlain(Message, HKEY("\n"), 0); - SaveMsg->MsgGUID = guid; + SaveMsg->Message = Message; +} + +eNextState RSSSaveMessage(AsyncIO *IO) +{ + long len; + const char *Key; + rss_aggregator *RSSAggr = (rss_aggregator *) IO->Data; - n = GetCount(Cfg->Messages) + 1; - Put(Cfg->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage); + rss_format_item(RSSAggr->ThisMsg); + + RSSAggr->ThisMsg->Msg.cm_fields['M'] = + SmashStrBuf(&RSSAggr->ThisMsg->Message); + + CtdlSubmitMsg(&RSSAggr->ThisMsg->Msg, &RSSAggr->recp, NULL, 0); + + /* write the uidl to the use table so we don't store this item again */ + + CheckIfAlreadySeen("RSS Item Insert", RSSAggr->ThisMsg->MsgGUID, IO->Now, 0, eWrite, IO->ID, CCID); + + if (GetNextHashPos(RSSAggr->Messages, + RSSAggr->Pos, + &len, &Key, + (void**) &RSSAggr->ThisMsg)) + return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry); + else + return eAbort; } +eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) +{ + const char *Key; + long len; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; + /* Find out if we've already seen this item */ +// todo: expiry? +#ifndef DEBUG_RSS + SetRSSState(IO, eRSSUT); + if (CheckIfAlreadySeen("RSS Item Seen", + Ctx->ThisMsg->MsgGUID, + IO->Now, + IO->Now - USETABLE_ANTIEXPIRE, + eCheckUpdate, + IO->ID, CCID) + != 0) + { + /* Item has already been seen */ + EVRSSC_syslog(LOG_DEBUG, + "%s has already been seen\n", + ChrPtr(Ctx->ThisMsg->MsgGUID)); + SetRSSState(IO, eRSSParsing); -/* - * Begin a feed parse - */ -int rss_do_fetching(rss_aggregator *Cfg) + if (GetNextHashPos(Ctx->Messages, + Ctx->Pos, + &len, &Key, + (void**) &Ctx->ThisMsg)) + return NextDBOperation( + IO, + RSS_FetchNetworkUsetableEntry); + else + return eAbort; + } + else +#endif + { + SetRSSState(IO, eRSSParsing); + + NextDBOperation(IO, RSSSaveMessage); + return eSendMore; + } + return eSendMore; +} + +void UpdateLastKnownGood(pRSSConfig *pCfg, time_t now) { - rss_item *ri; - time_t now; + OneRoomNetCfg* pRNCfg; + begin_critical_section(S_NETCONFIGS); + pRNCfg = CtdlGetNetCfgForRoom (pCfg->QRnumber); + if (pRNCfg != NULL) + { + RSSCfgLine *RSSCfg = (RSSCfgLine *)pRNCfg->NetConfigs[rssclient]; - now = time(NULL); + while (RSSCfg != NULL) + { + if (RSSCfg == pCfg->pCfg) + break; - if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) - return 0; + RSSCfg = RSSCfg->next; + } + if (RSSCfg != NULL) + { + pRNCfg->changed = 1; + RSSCfg->last_known_good = now; + } + } - ri = (rss_item*) malloc(sizeof(rss_item)); - memset(ri, 0, sizeof(rss_item)); - Cfg->Item = ri; + end_critical_section(S_NETCONFIGS); +} - if (! InitcURLIOStruct(&Cfg->IO, - Cfg, - "Citadel RSS Client", - RSSAggregator_ParseReply, - RSSAggregator_Terminate, - RSSAggregator_ShutdownAbort)) +eNextState RSSAggregator_AnalyseReply(AsyncIO *IO) +{ + HashPos *it = NULL; + long len; + const char *Key; + pRSSConfig *pCfg; + u_char rawdigest[MD5_DIGEST_LEN]; + struct MD5Context md5context; + StrBuf *guid; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; + + if (IO->HttpReq.httpcode != 200) { - syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); - return 0; + StrBuf *ErrMsg; + long lens[2]; + const char *strs[2]; + + SetRSSState(IO, eRSSFailure); + ErrMsg = NewStrBuf(); + EVRSSC_syslog(LOG_ALERT, "need a 200, got a %ld !\n", + IO->HttpReq.httpcode); + + strs[0] = ChrPtr(Ctx->Url); + lens[0] = StrLength(Ctx->Url); + + strs[1] = ChrPtr(Ctx->rooms); + lens[1] = StrLength(Ctx->rooms); + StrBufPrintf(ErrMsg, + "Error while RSS-Aggregation Run of %s\n" + " need a 200, got a %ld !\n" + " Response text was: \n" + " \n %s\n", + ChrPtr(Ctx->Url), + IO->HttpReq.httpcode, + ChrPtr(IO->HttpReq.ReplyData)); + CtdlAideFPMessage( + ChrPtr(ErrMsg), + "RSS Aggregation run failure", + 2, strs, (long*) &lens, + IO->Now, + IO->ID, CCID); + + FreeStrBuf(&ErrMsg); + EVRSSC_syslog(LOG_DEBUG, + "RSS feed returned an invalid http status code. <%s>\n", + ChrPtr(Ctx->Url), + IO->HttpReq.httpcode); + return eAbort; } - safestrncpy(((CitContext*)Cfg->IO.CitContext)->cs_host, - ChrPtr(Cfg->Url), - sizeof(((CitContext*)Cfg->IO.CitContext)->cs_host)); + pCfg = &Ctx->Cfg; - syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url)); - ParseURL(&Cfg->IO.ConnectMe, Cfg->Url, 80); - CurlPrepareURL(Cfg->IO.ConnectMe); + while (pCfg != NULL) + { + UpdateLastKnownGood (pCfg, IO->Now); + if ((Ctx->roomlist_parts > 1) && + (it == NULL)) + { + it = GetNewHashPos(RSSFetchUrls, 0); + } + if (it != NULL) + { + void *vptr; + if (GetNextHashPos(Ctx->OtherQRnumbers, it, &len, &Key, &vptr)) + pCfg = vptr; + else + pCfg = NULL; + } + else + pCfg = NULL; + } + DeleteHashPos (&it); - QueueCurlContext(&Cfg->IO); - return 1; -} + SetRSSState(IO, eRSSUT); + MD5Init(&md5context); -void DeleteRssCfg(void *vptr) -{ - rss_aggregator *rncptr = (rss_aggregator *)vptr; - AsyncIO *IO = &rncptr->IO; - EVM_syslog(LOG_DEBUG, "RSS: destroying\n"); + MD5Update(&md5context, + (const unsigned char*)SKEY(IO->HttpReq.ReplyData)); - FreeStrBuf(&rncptr->Url); - FreeStrBuf(&rncptr->rooms); - FreeStrBuf(&rncptr->CData); - FreeStrBuf(&rncptr->Key); - FreeStrBuf(&rncptr->IO.HttpReq.ReplyData); - DeleteHash(&rncptr->OtherQRnumbers); - FreeURL(&rncptr->IO.ConnectMe); + MD5Update(&md5context, + (const unsigned char*)SKEY(Ctx->Url)); - DeleteHashPos (&rncptr->Pos); - DeleteHash (&rncptr->Messages); - if (rncptr->recp.recp_room != NULL) - free(rncptr->recp.recp_room); + MD5Final(rawdigest, &md5context); + guid = NewStrBufPlain(NULL, + MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); + StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); + StrBufAppendBufPlain(guid, HKEY("_rssFM"), 0); + if (StrLength(guid) > 40) + StrBufCutAt(guid, 40, NULL); + /* Find out if we've already seen this item */ +#ifndef DEBUG_RSS - if (rncptr->Item != NULL) + if (CheckIfAlreadySeen("RSS Whole", + guid, + IO->Now, + IO->Now - USETABLE_ANTIEXPIRE, + eCheckUpdate, + IO->ID, CCID) + != 0) { - FreeStrBuf(&rncptr->Item->guid); - FreeStrBuf(&rncptr->Item->title); - FreeStrBuf(&rncptr->Item->link); - FreeStrBuf(&rncptr->Item->linkTitle); - FreeStrBuf(&rncptr->Item->reLink); - FreeStrBuf(&rncptr->Item->reLinkTitle); - FreeStrBuf(&rncptr->Item->description); - FreeStrBuf(&rncptr->Item->channel_title); - FreeStrBuf(&rncptr->Item->author_or_creator); - FreeStrBuf(&rncptr->Item->author_url); - FreeStrBuf(&rncptr->Item->author_email); - - free(rncptr->Item); + FreeStrBuf(&guid); + + EVRSSC_syslog(LOG_DEBUG, "RSS feed already seen. <%s>\n", ChrPtr(Ctx->Url)); + return eAbort; } - free(rncptr); + FreeStrBuf(&guid); +#endif + SetRSSState(IO, eRSSParsing); + return RSSAggregator_ParseReply(IO); } -eNextState RSSAggregator_Terminate(AsyncIO *IO) +eNextState RSSAggregator_FinishHttp(AsyncIO *IO) { - rss_aggregator *rncptr = (rss_aggregator *)IO->Data; + StopCurlWatchers(IO); + return CurlQueueDBOperation(IO, RSSAggregator_AnalyseReply); +} - EVM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); +/* + * Begin a feed parse + */ +int rss_do_fetching(rss_aggregator *RSSAggr) +{ + AsyncIO *IO = &RSSAggr->IO; + rss_item *ri; + time_t now; + now = time(NULL); - UnlinkRSSAggregator(rncptr); - return eAbort; -} -eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO) -{ - const char *pUrl; - rss_aggregator *rncptr = (rss_aggregator *)IO->Data; + if ((RSSAggr->next_poll != 0) && (now < RSSAggr->next_poll)) + return 0; - pUrl = IO->ConnectMe->PlainUrl; - if (pUrl == NULL) - pUrl = ""; + ri = (rss_item*) malloc(sizeof(rss_item)); + memset(ri, 0, sizeof(rss_item)); + RSSAggr->Item = ri; - EV_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); + if (! InitcURLIOStruct(&RSSAggr->IO, + RSSAggr, + "Citadel RSS Client", + RSSAggregator_FinishHttp, + RSSAggregator_Terminate, + RSSAggregator_TerminateDB, + RSSAggregator_ShutdownAbort)) + { + EVRSSCM_syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); + return 0; + } + SetRSSState(IO, eRSSCreated); + safestrncpy(((CitContext*)RSSAggr->IO.CitContext)->cs_host, + ChrPtr(RSSAggr->Url), + sizeof(((CitContext*)RSSAggr->IO.CitContext)->cs_host)); - UnlinkRSSAggregator(rncptr); - return eAbort; + EVRSSC_syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(RSSAggr->Url)); + ParseURL(&RSSAggr->IO.ConnectMe, RSSAggr->Url, 80); + CurlPrepareURL(RSSAggr->IO.ConnectMe); + + SetRSSState(IO, eRSSFetching); + QueueCurlContext(&RSSAggr->IO); + return 1; } /* * Scan a room's netconfig to determine whether it is requesting any RSS feeds */ -void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) +void rssclient_scan_room(struct ctdlroom *qrbuf, void *data, OneRoomNetCfg *OneRNCFG) { - StrBuf *CfgData=NULL; - StrBuf *CfgType; - StrBuf *Line; - rss_room_counter *Count = NULL; - struct stat statbuf; - char filename[PATH_MAX]; - int fd; - int Done; - rss_aggregator *rncptr = NULL; - rss_aggregator *use_this_rncptr = NULL; + const RSSCfgLine *RSSCfg = (RSSCfgLine *)OneRNCFG->NetConfigs[rssclient]; + rss_aggregator *RSSAggr = NULL; + rss_aggregator *use_this_RSSAggr = NULL; void *vptr; - const char *CfgPtr, *lPtr; - const char *Err; pthread_mutex_lock(&RSSQueueMutex); if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr)) { - syslog(LOG_DEBUG, - "rssclient: [%ld] %s already in progress.\n", - qrbuf->QRnumber, - qrbuf->QRname); + EVRSSQ_syslog(LOG_DEBUG, + "rssclient: [%ld] %s already in progress.\n", + qrbuf->QRnumber, + qrbuf->QRname); pthread_mutex_unlock(&RSSQueueMutex); return; } pthread_mutex_unlock(&RSSQueueMutex); - assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); + if (server_shutting_down) return; - if (server_shutting_down) - return; + while (RSSCfg != NULL) + { + pthread_mutex_lock(&RSSQueueMutex); + GetHash(RSSFetchUrls, + SKEY(RSSCfg->Url), + &vptr); - /* Only do net processing for rooms that have netconfigs */ - fd = open(filename, 0); - if (fd <= 0) { - /* syslog(LOG_DEBUG, - "rssclient: %s no config.\n", - qrbuf->QRname); */ - return; - } + use_this_RSSAggr = (rss_aggregator *)vptr; + if (use_this_RSSAggr != NULL) + { + pRSSConfig *pRSSCfg; - if (server_shutting_down) - return; + StrBufAppendBufPlain( + use_this_RSSAggr->rooms, + qrbuf->QRname, + -1, 0); + if (use_this_RSSAggr->roomlist_parts==1) + { + use_this_RSSAggr->OtherQRnumbers + = NewHash(1, lFlathash); + } - if (fstat(fd, &statbuf) == -1) { - syslog(LOG_DEBUG, - "ERROR: could not stat configfile '%s' - %s\n", - filename, - strerror(errno)); - return; - } + pRSSCfg = (pRSSConfig *) malloc(sizeof(pRSSConfig)); - if (server_shutting_down) - return; + pRSSCfg->QRnumber = qrbuf->QRnumber; + pRSSCfg->pCfg = RSSCfg; - CfgData = NewStrBufPlain(NULL, statbuf.st_size + 1); + Put(use_this_RSSAggr->OtherQRnumbers, + LKEY(qrbuf->QRnumber), + pRSSCfg, + NULL); + use_this_RSSAggr->roomlist_parts++; - if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) { - close(fd); - FreeStrBuf(&CfgData); - syslog(LOG_DEBUG, "ERROR: reading config '%s' - %s
\n", - filename, strerror(errno)); - return; - } - close(fd); - if (server_shutting_down) - return; + pthread_mutex_unlock(&RSSQueueMutex); - CfgPtr = NULL; - CfgType = NewStrBuf(); - Line = NewStrBufPlain(NULL, StrLength(CfgData)); - Done = 0; - while (!Done) - { - Done = StrBufSipLine(Line, CfgData, &CfgPtr) == 0; - if (StrLength(Line) > 0) - { - lPtr = NULL; - StrBufExtract_NextToken(CfgType, Line, &lPtr, '|'); - if (!strcasecmp("rssclient", ChrPtr(CfgType))) - { - if (Count == NULL) - { - Count = malloc(sizeof(rss_room_counter)); - Count->count = 0; - } - Count->count ++; - rncptr = (rss_aggregator *) malloc(sizeof(rss_aggregator)); - memset (rncptr, 0, sizeof(rss_aggregator)); - rncptr->roomlist_parts = 1; - rncptr->Url = NewStrBuf(); - StrBufExtract_NextToken(rncptr->Url, Line, &lPtr, '|'); - - pthread_mutex_lock(&RSSQueueMutex); - GetHash(RSSFetchUrls, SKEY(rncptr->Url), &vptr); - use_this_rncptr = (rss_aggregator *)vptr; - if (use_this_rncptr != NULL) - { - long *QRnumber; - StrBufAppendBufPlain(use_this_rncptr->rooms, - qrbuf->QRname, - -1, 0); - if (use_this_rncptr->roomlist_parts == 1) - { - use_this_rncptr->OtherQRnumbers = - NewHash(1, lFlathash); - } - QRnumber = (long*)malloc(sizeof(long)); - *QRnumber = qrbuf->QRnumber; - Put(use_this_rncptr->OtherQRnumbers, - LKEY(qrbuf->QRnumber), - QRnumber, - NULL); - use_this_rncptr->roomlist_parts++; - - pthread_mutex_unlock(&RSSQueueMutex); - - FreeStrBuf(&rncptr->Url); - free(rncptr); - rncptr = NULL; - continue; - } - pthread_mutex_unlock(&RSSQueueMutex); - - rncptr->ItemType = RSS_UNSET; - - rncptr->rooms = NewStrBufPlain(qrbuf->QRname, -1); - - pthread_mutex_lock(&RSSQueueMutex); - Put(RSSFetchUrls, SKEY(rncptr->Url), rncptr, DeleteRssCfg); - pthread_mutex_unlock(&RSSQueueMutex); + RSSCfg = RSSCfg->next; + continue; } - } - } - if (Count != NULL) - { - Count->QRnumber = qrbuf->QRnumber; + pthread_mutex_unlock(&RSSQueueMutex); + + RSSAggr = (rss_aggregator *) malloc( + sizeof(rss_aggregator)); + + memset (RSSAggr, 0, sizeof(rss_aggregator)); + RSSAggr->Cfg.QRnumber = qrbuf->QRnumber; + RSSAggr->Cfg.pCfg = RSSCfg; + RSSAggr->roomlist_parts = 1; + RSSAggr->Url = NewStrBufDup(RSSCfg->Url); + + RSSAggr->ItemType = RSS_UNSET; + + RSSAggr->rooms = NewStrBufPlain( + qrbuf->QRname, -1); + pthread_mutex_lock(&RSSQueueMutex); - syslog(LOG_DEBUG, "rssclient: [%ld] %s now starting.\n", - qrbuf->QRnumber, qrbuf->QRname); - Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL); + + Put(RSSFetchUrls, + SKEY(RSSAggr->Url), + RSSAggr, + DeleteRssCfg); + pthread_mutex_unlock(&RSSQueueMutex); + RSSCfg = RSSCfg->next; } - FreeStrBuf(&CfgData); - FreeStrBuf(&CfgType); - FreeStrBuf(&Line); } /* * Scan for rooms that have RSS client requests configured */ void rssclient_scan(void) { - static int doing_rssclient = 0; + int RSSRoomCount, RSSCount; rss_aggregator *rptr = NULL; void *vrptr = NULL; - HashPos *it; + HashPos *it; long len; const char *Key; + time_t now = time(NULL); /* Run no more than once every 15 minutes. */ - if ((time(NULL) - last_run) < 900) { + if ((now - last_run) < 900) { + EVRSSQ_syslog(LOG_DEBUG, + "Client: polling interval not yet reached; last run was %ldm%lds ago", + ((now - last_run) / 60), + ((now - last_run) % 60) + ); return; } /* * This is a simple concurrency check to make sure only one rssclient - * run is done at a time. We could do this with a mutex, but since we - * don't really require extremely fine granularity here, we'll do it - * with a static variable instead. + * run is done at a time. */ - if (doing_rssclient) return; - doing_rssclient = 1; - if ((GetCount(RSSQueueRooms) > 0) || (GetCount(RSSFetchUrls) > 0)) + pthread_mutex_lock(&RSSQueueMutex); + RSSCount = GetCount(RSSFetchUrls); + RSSRoomCount = GetCount(RSSQueueRooms); + pthread_mutex_unlock(&RSSQueueMutex); + + if ((RSSRoomCount > 0) || (RSSCount > 0)) { + EVRSSQ_syslog(LOG_DEBUG, + "rssclient: concurrency check failed; %d rooms and %d url's are queued", + RSSRoomCount, RSSCount + ); return; + } become_session(&rss_CC); - syslog(LOG_DEBUG, "rssclient started\n"); - CtdlForEachRoom(rssclient_scan_room, NULL); + EVRSSQM_syslog(LOG_DEBUG, "rssclient started"); + CtdlForEachNetCfgRoom(rssclient_scan_room, NULL, rssclient); - pthread_mutex_lock(&RSSQueueMutex); - - it = GetNewHashPos(RSSFetchUrls, 0); - while (!server_shutting_down && - GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && - (vrptr != NULL)) { - rptr = (rss_aggregator *)vrptr; - if (!rss_do_fetching(rptr)) - UnlinkRSSAggregator(rptr); + if (GetCount(RSSFetchUrls) > 0) + { + pthread_mutex_lock(&RSSQueueMutex); + EVRSSQ_syslog(LOG_DEBUG, + "rssclient starting %d Clients", + GetCount(RSSFetchUrls)); + + it = GetNewHashPos(RSSFetchUrls, 0); + while (!server_shutting_down && + GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && + (vrptr != NULL)) { + rptr = (rss_aggregator *)vrptr; + if (!rss_do_fetching(rptr)) + UnlinkRSSAggregator(rptr); + } + DeleteHashPos(&it); + pthread_mutex_unlock(&RSSQueueMutex); } - DeleteHashPos(&it); - pthread_mutex_unlock(&RSSQueueMutex); + else + EVRSSQM_syslog(LOG_DEBUG, "Nothing to do."); - syslog(LOG_DEBUG, "rssclient ended\n"); - doing_rssclient = 0; + EVRSSQM_syslog(LOG_DEBUG, "rssclient ended\n"); return; } @@ -729,18 +804,109 @@ void rss_cleanup(void) DeleteHash(&RSSQueueRooms); } +void LogDebugEnableRSSClient(const int n) +{ + RSSClientDebugEnabled = n; +} + + +typedef struct __RSSVetoInfo { + StrBuf *ErrMsg; + time_t Now; + int Veto; +}RSSVetoInfo; + +void rssclient_veto_scan_room(struct ctdlroom *qrbuf, void *data, OneRoomNetCfg *OneRNCFG) +{ + RSSVetoInfo *Info = (RSSVetoInfo *) data; + const RSSCfgLine *RSSCfg = (RSSCfgLine *)OneRNCFG->NetConfigs[rssclient]; + + while (RSSCfg != NULL) + { + if ((RSSCfg->last_known_good != 0) && + (RSSCfg->last_known_good + USETABLE_ANTIEXPIRE < Info->Now)) + { + StrBufAppendPrintf(Info->ErrMsg, + "RSS feed not seen for a %d days:: <", + (Info->Now - RSSCfg->last_known_good) / (24 * 60 * 60)); + + StrBufAppendBuf(Info->ErrMsg, RSSCfg->Url, 0); + StrBufAppendBufPlain(Info->ErrMsg, HKEY(">\n"), 0); + } + RSSCfg = RSSCfg->next; + } +} + +int RSSCheckUsetableVeto(StrBuf *ErrMsg) +{ + RSSVetoInfo Info; + + Info.ErrMsg = ErrMsg; + Info.Now = time (NULL); + Info.Veto = 0; + + CtdlForEachNetCfgRoom(rssclient_veto_scan_room, &Info, rssclient); + + return Info.Veto;; +} + + + + +void ParseRSSClientCfgLine(const CfgLineType *ThisOne, StrBuf *Line, const char *LinePos, OneRoomNetCfg *OneRNCFG) +{ + RSSCfgLine *RSSCfg; + + RSSCfg = (RSSCfgLine *) malloc (sizeof(RSSCfgLine)); + RSSCfg->Url = NewStrBufPlain (NULL, StrLength (Line)); + + + StrBufExtract_NextToken(RSSCfg->Url, Line, &LinePos, '|'); + RSSCfg->last_known_good = StrBufExtractNext_long(Line, &LinePos, '|'); + + + RSSCfg->next = (RSSCfgLine *)OneRNCFG->NetConfigs[ThisOne->C]; + OneRNCFG->NetConfigs[ThisOne->C] = (RoomNetCfgLine*) RSSCfg; +} + +void SerializeRSSClientCfgLine(const CfgLineType *ThisOne, StrBuf *OutputBuffer, OneRoomNetCfg *RNCfg, RoomNetCfgLine *data) +{ + RSSCfgLine *RSSCfg = (RSSCfgLine*) data; + + StrBufAppendBufPlain(OutputBuffer, CKEY(ThisOne->Str), 0); + StrBufAppendBufPlain(OutputBuffer, HKEY("|"), 0); + StrBufAppendBufPlain(OutputBuffer, SKEY(RSSCfg->Url), 0); + StrBufAppendPrintf(OutputBuffer, "|%ld\n", RSSCfg->last_known_good); +} + +void DeleteRSSClientCfgLine(const CfgLineType *ThisOne, RoomNetCfgLine **data) +{ + RSSCfgLine *RSSCfg = (RSSCfgLine*) *data; + + FreeStrBuf(&RSSCfg->Url); + free(*data); + *data = NULL; +} + CTDL_MODULE_INIT(rssclient) { - if (threading) + if (!threading) { - CtdlFillSystemContext(&rss_CC, "rssclient"); + CtdlRegisterTDAPVetoHook (RSSCheckUsetableVeto, CDB_USETABLE, 0); + + CtdlREGISTERRoomCfgType(rssclient, ParseRSSClientCfgLine, 0, 1, SerializeRSSClientCfgLine, DeleteRSSClientCfgLine); pthread_mutex_init(&RSSQueueMutex, NULL); RSSQueueRooms = NewHash(1, lFlathash); RSSFetchUrls = NewHash(1, NULL); syslog(LOG_INFO, "%s\n", curl_version()); - CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); - CtdlRegisterCleanupHook(rss_cleanup); + CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300); + CtdlRegisterEVCleanupHook(rss_cleanup); + CtdlRegisterDebugFlagHook(HKEY("rssclient"), LogDebugEnableRSSClient, &RSSClientDebugEnabled); + } + else + { + CtdlFillSystemContext(&rss_CC, "rssclient"); } return "rssclient"; }