X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=82e206792bb77ea32d404e609769be0911341929;hb=222d2f63b0f523ce6b79e4b2ca9767f80e0bf85e;hp=4466d7f621431e869eb11261fd91cb6a7fb0e36b;hpb=f1ee61891901850ebbdee1e9440b363dc6df540a;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 4466d7f62..82e206792 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -1,21 +1,15 @@ /* * Bring external RSS feeds into rooms. * - * Copyright (c) 2007-2010 by the citadel.org team + * Copyright (c) 2007-2012 by the citadel.org team * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -27,9 +21,9 @@ # include #else # if HAVE_SYS_TIME_H -# include +#include # else -# include +#include # endif #endif @@ -62,40 +56,42 @@ #define TMP_SHORTER_URL_OFFSET 0xFE #define TMP_SHORTER_URLS 0xFD -citthread_mutex_t RSSQueueMutex; /* locks the access to the following vars: */ -HashList *RSSQueueRooms = NULL; /* rss_room_counter */ -HashList *RSSFetchUrls = NULL; /* -> rss_aggregator; ->RefCount access to be locked too. */ +time_t last_run = 0L; -eNextState RSSAggregatorTerminate(AsyncIO *IO); +pthread_mutex_t RSSQueueMutex; /* locks the access to the following vars: */ +HashList *RSSQueueRooms = NULL; /* rss_room_counter */ +HashList *RSSFetchUrls = NULL; /*->rss_aggregator;->RefCount access locked*/ +eNextState RSSAggregator_Terminate(AsyncIO *IO); +eNextState RSSAggregator_TerminateDB(AsyncIO *IO); +eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO); +struct CitContext rss_CC; struct rssnetcfg *rnclist = NULL; -void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title) -{ - if (StrLength(link) > 0) - { - StrBufAppendBufPlain(Message, HKEY(""), 0); - if (StrLength(LinkTitle) > 0) - StrBufAppendBuf(Message, LinkTitle, 0); - else if ((Title != NULL) && !IsEmptyStr(Title)) - StrBufAppendBufPlain(Message, Title, -1, 0); - else - StrBufAppendBuf(Message, link, 0); - StrBufAppendBufPlain(Message, HKEY("
\n"), 0); - } -} -typedef struct __networker_save_message { - AsyncIO IO; - struct CtdlMessage *Msg; - struct recptypes *recp; - rss_aggregator *Cfg; - StrBuf *MsgGUID; - StrBuf *Message; - struct UseTable ut; -} networker_save_message; +int RSSClientDebugEnabled = 0; +#define N ((rss_aggregator*)IO->Data)->QRnumber + +#define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSClientDebugEnabled != 0)) + +#define EVRSSC_syslog(LEVEL, FORMAT, ...) \ + DBGLOG(LEVEL) syslog(LEVEL, \ + "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ + IO->ID, CCID, N, __VA_ARGS__) +#define EVRSSCM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, \ + "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ + IO->ID, CCID, N) + +#define EVRSSQ_syslog(LEVEL, FORMAT, ...) \ + DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT, \ + __VA_ARGS__) +#define EVRSSQM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT) + +#define EVRSSCSM_syslog(LEVEL, FORMAT) \ + DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSS" FORMAT, \ + IO->ID, N) void DeleteRoomReference(long QRnumber) { @@ -107,31 +103,36 @@ void DeleteRoomReference(long QRnumber) At = GetNewHashPos(RSSQueueRooms, 0); - GetHashPosFromKey(RSSQueueRooms, LKEY(QRnumber), At); - GetHashPos(RSSQueueRooms, At, &HKLen, &HK, &vData); - if (vData != NULL) + if (GetHashPosFromKey(RSSQueueRooms, LKEY(QRnumber), At)) { - pRoomC = (rss_room_counter *) vData; - pRoomC->count --; - if (pRoomC->count == 0) - DeleteEntryFromHash(RSSQueueRooms, At); + GetHashPos(RSSQueueRooms, At, &HKLen, &HK, &vData); + if (vData != NULL) + { + pRoomC = (rss_room_counter *) vData; + pRoomC->count --; + if (pRoomC->count == 0) + DeleteEntryFromHash(RSSQueueRooms, At); + } } DeleteHashPos(&At); } -void UnlinkRooms(rss_aggregator *Cfg) +void UnlinkRooms(rss_aggregator *RSSAggr) { - - DeleteRoomReference(Cfg->QRnumber); - if (Cfg->OtherQRnumbers != NULL) + DeleteRoomReference(RSSAggr->QRnumber); + if (RSSAggr->OtherQRnumbers != NULL) { long HKLen; const char *HK; HashPos *At; void *vData; - At = GetNewHashPos(Cfg->OtherQRnumbers, 0); - while (GetNextHashPos(Cfg->OtherQRnumbers, At, &HKLen, &HK, &vData) && + At = GetNewHashPos(RSSAggr->OtherQRnumbers, 0); + while (! server_shutting_down && + GetNextHashPos(RSSAggr->OtherQRnumbers, + At, + &HKLen, &HK, + &vData) && (vData != NULL)) { long *lData = (long*) vData; @@ -140,223 +141,178 @@ void UnlinkRooms(rss_aggregator *Cfg) DeleteHashPos(&At); } - } -void UnlinkRSSAggregator(rss_aggregator *Cfg) +void UnlinkRSSAggregator(rss_aggregator *RSSAggr) { HashPos *At; - UnlinkRooms(Cfg); + pthread_mutex_lock(&RSSQueueMutex); + UnlinkRooms(RSSAggr); At = GetNewHashPos(RSSFetchUrls, 0); - if (GetHashPosFromKey(RSSFetchUrls, SKEY(Cfg->Url), At) == 0) + if (GetHashPosFromKey(RSSFetchUrls, SKEY(RSSAggr->Url), At)) { DeleteEntryFromHash(RSSFetchUrls, At); } DeleteHashPos(&At); + last_run = time(NULL); + pthread_mutex_unlock(&RSSQueueMutex); } -eNextState FreeNetworkSaveMessage (AsyncIO *IO) +void DeleteRssCfg(void *vptr) { - networker_save_message *Ctx = (networker_save_message *) IO->Data; + rss_aggregator *RSSAggr = (rss_aggregator *)vptr; + AsyncIO *IO = &RSSAggr->IO; + EVRSSCM_syslog(LOG_DEBUG, "RSS: destroying\n"); - citthread_mutex_lock(&RSSQueueMutex); - Ctx->Cfg->RefCount --; + FreeStrBuf(&RSSAggr->Url); + FreeStrBuf(&RSSAggr->rooms); + FreeStrBuf(&RSSAggr->CData); + FreeStrBuf(&RSSAggr->Key); + DeleteHash(&RSSAggr->OtherQRnumbers); - if (Ctx->Cfg->RefCount == 0) + DeleteHashPos (&RSSAggr->Pos); + DeleteHash (&RSSAggr->Messages); + if (RSSAggr->recp.recp_room != NULL) + free(RSSAggr->recp.recp_room); + + + if (RSSAggr->Item != NULL) { - UnlinkRSSAggregator(Ctx->Cfg); + flush_rss_item(RSSAggr->Item); + free(RSSAggr->Item); } - citthread_mutex_unlock(&RSSQueueMutex); - CtdlFreeMessage(Ctx->Msg); - free_recipients(Ctx->recp); - FreeStrBuf(&Ctx->Message); - FreeStrBuf(&Ctx->MsgGUID); - free(Ctx); - return eAbort; + FreeAsyncIOContents(&RSSAggr->IO); + memset(RSSAggr, 0, sizeof(rss_aggregator)); + free(RSSAggr); } -eNextState AbortNetworkSaveMessage (AsyncIO *IO) +eNextState RSSAggregator_Terminate(AsyncIO *IO) { - return eAbort; ///TODO + rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; + + EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + + StopCurlWatchers(IO); + UnlinkRSSAggregator(RSSAggr); + return eAbort; } -eNextState RSSSaveMessage(AsyncIO *IO) +eNextState RSSAggregator_TerminateDB(AsyncIO *IO) { - networker_save_message *Ctx = (networker_save_message *) IO->Data; - - Ctx->Msg->cm_fields['M'] = SmashStrBuf(&Ctx->Message); + rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - CtdlSubmitMsg(Ctx->Msg, Ctx->recp, NULL, 0); + EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); - /* write the uidl to the use table so we don't store this item again */ - cdb_store(CDB_USETABLE, SKEY(Ctx->MsgGUID), &Ctx->ut, sizeof(struct UseTable) ); - return eTerminateConnection; + StopDBWatchers(&RSSAggr->IO); + UnlinkRSSAggregator(RSSAggr); + return eAbort; } -// TODO: relink me: ExpandShortUrls(ri->description); - -eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) +eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO) { - struct cdbdata *cdbut; - networker_save_message *Ctx = (networker_save_message *) IO->Data; + const char *pUrl; + rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - /* Find out if we've already seen this item */ - strcpy(Ctx->ut.ut_msgid, ChrPtr(Ctx->MsgGUID)); /// TODO - Ctx->ut.ut_timestamp = time(NULL); + pUrl = IO->ConnectMe->PlainUrl; + if (pUrl == NULL) + pUrl = ""; - cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->MsgGUID)); -#ifndef DEBUG_RSS - if (cdbut != NULL) { - /* Item has already been seen */ - syslog(LOG_DEBUG, "%s has already been seen\n", ChrPtr(Ctx->MsgGUID)); - cdb_free(cdbut); + EVRSSC_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); - /* rewrite the record anyway, to update the timestamp */ - cdb_store(CDB_USETABLE, - SKEY(Ctx->MsgGUID), - &Ctx->ut, sizeof(struct UseTable) ); - return eAbort; - } - else -#endif - { - NextDBOperation(IO, RSSSaveMessage); - return eSendMore; - } + StopCurlWatchers(IO); + UnlinkRSSAggregator(RSSAggr); + return eAbort; } -void RSSQueueSaveMessage(struct CtdlMessage *Msg, struct recptypes *recp, StrBuf *MsgGUID, StrBuf *MessageBody, rss_aggregator *Cfg) + +void AppendLink(StrBuf *Message, + StrBuf *link, + StrBuf *LinkTitle, + const char *Title) { - networker_save_message *Ctx; - - Ctx = (networker_save_message *) malloc(sizeof(networker_save_message)); - memset(Ctx, 0, sizeof(networker_save_message)); - - Ctx->MsgGUID = MsgGUID; - Ctx->Message = MessageBody; - Ctx->Msg = Msg; - Ctx->Cfg = Cfg; - Ctx->recp = recp; - Ctx->IO.Data = Ctx; - Ctx->IO.CitContext = CloneContext(CC); - Ctx->IO.Terminate = FreeNetworkSaveMessage; - Ctx->IO.ShutdownAbort = AbortNetworkSaveMessage; - QueueDBOperation(&Ctx->IO, RSS_FetchNetworkUsetableEntry); + if (StrLength(link) > 0) + { + StrBufAppendBufPlain(Message, HKEY(""), 0); + if (StrLength(LinkTitle) > 0) + StrBufAppendBuf(Message, LinkTitle, 0); + else if ((Title != NULL) && !IsEmptyStr(Title)) + StrBufAppendBufPlain(Message, Title, -1, 0); + else + StrBufAppendBuf(Message, link, 0); + StrBufAppendBufPlain(Message, HKEY("
\n"), 0); + } } -/* - * Commit a fetched and parsed RSS item to disk - */ -void rss_save_item(rss_item *ri, rss_aggregator *Cfg) +void rss_format_item(networker_save_message *SaveMsg) { - - struct MD5Context md5context; - u_char rawdigest[MD5_DIGEST_LEN]; - struct CtdlMessage *msg; - struct recptypes *recp = NULL; - int msglen = 0; StrBuf *Message; - StrBuf *guid; - StrBuf *Buf; - - recp = (struct recptypes *) malloc(sizeof(struct recptypes)); - if (recp == NULL) return; - memset(recp, 0, sizeof(struct recptypes)); - Buf = NewStrBufDup(Cfg->rooms); - recp->recp_room = SmashStrBuf(&Buf); - recp->num_room = Cfg->roomlist_parts; - recp->recptypes_magic = RECPTYPES_MAGIC; - - Cfg->RefCount ++; - /* Construct a GUID to use in the S_USETABLE table. - * If one is not present in the item itself, make one up. - */ - if (ri->guid != NULL) { - StrBufSpaceToBlank(ri->guid); - StrBufTrim(ri->guid); - guid = NewStrBufPlain(HKEY("rss/")); - StrBufAppendBuf(guid, ri->guid, 0); - } - else { - MD5Init(&md5context); - if (ri->title != NULL) { - MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->title), StrLength(ri->title)); - } - if (ri->link != NULL) { - MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->link), StrLength(ri->link)); - } - MD5Final(rawdigest, &md5context); - guid = NewStrBufPlain(NULL, MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); - StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); - StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0); - } + int msglen = 0; - /* translate Item into message. */ - syslog(LOG_DEBUG, "RSS: translating item...\n"); - if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); - StrBufSpaceToBlank(ri->description); - msg = malloc(sizeof(struct CtdlMessage)); - memset(msg, 0, sizeof(struct CtdlMessage)); - msg->cm_magic = CTDLMESSAGE_MAGIC; - msg->cm_anon_type = MES_NORMAL; - msg->cm_format_type = FMT_RFC822; - - if (ri->guid != NULL) { - msg->cm_fields['E'] = strdup(ChrPtr(ri->guid)); - } + if (SaveMsg->author_or_creator != NULL) { - if (ri->author_or_creator != NULL) { char *From; StrBuf *Encoded = NULL; int FromAt; - - From = html_to_ascii(ChrPtr(ri->author_or_creator), - StrLength(ri->author_or_creator), + + From = html_to_ascii(ChrPtr(SaveMsg->author_or_creator), + StrLength(SaveMsg->author_or_creator), 512, 0); - StrBufPlain(ri->author_or_creator, From, -1); - StrBufTrim(ri->author_or_creator); + StrBufPlain(SaveMsg->author_or_creator, From, -1); + StrBufTrim(SaveMsg->author_or_creator); free(From); - FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL; - if (!FromAt && StrLength (ri->author_email) > 0) + FromAt = strchr(ChrPtr(SaveMsg->author_or_creator), '@') != NULL; + if (!FromAt && StrLength (SaveMsg->author_email) > 0) { - StrBufRFC2047encode(&Encoded, ri->author_or_creator); - msg->cm_fields['A'] = SmashStrBuf(&Encoded); - msg->cm_fields['P'] = SmashStrBuf(&ri->author_email); + StrBufRFC2047encode(&Encoded, SaveMsg->author_or_creator); + SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded); + SaveMsg->Msg.cm_fields['P'] = + SmashStrBuf(&SaveMsg->author_email); } else { if (FromAt) - msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator); - else { - StrBufRFC2047encode(&Encoded, ri->author_or_creator); - msg->cm_fields['A'] = SmashStrBuf(&Encoded); - msg->cm_fields['P'] = strdup("rss@localhost"); + SaveMsg->Msg.cm_fields['A'] = + SmashStrBuf(&SaveMsg->author_or_creator); + SaveMsg->Msg.cm_fields['P'] = + strdup(SaveMsg->Msg.cm_fields['A']); + } + else + { + StrBufRFC2047encode(&Encoded, + SaveMsg->author_or_creator); + SaveMsg->Msg.cm_fields['A'] = + SmashStrBuf(&Encoded); + SaveMsg->Msg.cm_fields['P'] = + strdup("rss@localhost"); + } } } else { - msg->cm_fields['A'] = strdup("rss"); + SaveMsg->Msg.cm_fields['A'] = strdup("rss"); } - msg->cm_fields['N'] = strdup(NODENAME); - if (ri->title != NULL) { + SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME); + if (SaveMsg->title != NULL) { long len; char *Sbj; StrBuf *Encoded, *QPEncoded; QPEncoded = NULL; - StrBufSpaceToBlank(ri->title); - len = StrLength(ri->title); - Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); + StrBufSpaceToBlank(SaveMsg->title); + len = StrLength(SaveMsg->title); + Sbj = html_to_ascii(ChrPtr(SaveMsg->title), len, 512, 0); len = strlen(Sbj); - if (Sbj[len - 1] == '\n') + if ((len > 0) && (Sbj[len - 1] == '\n')) { len --; Sbj[len] = '\0'; @@ -367,148 +323,234 @@ void rss_save_item(rss_item *ri, rss_aggregator *Cfg) StrBufTrim(Encoded); StrBufRFC2047encode(&QPEncoded, Encoded); - msg->cm_fields['U'] = SmashStrBuf(&QPEncoded); + SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded); FreeStrBuf(&Encoded); } - msg->cm_fields['T'] = malloc(64); - snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); - if (ri->channel_title != NULL) { - if (StrLength(ri->channel_title) > 0) { - msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title)); - } - } - if (ri->link == NULL) - ri->link = NewStrBufPlain(HKEY("")); + if (SaveMsg->link == NULL) + SaveMsg->link = NewStrBufPlain(HKEY("")); #if 0 /* temporarily disable shorter urls. */ - msg->cm_fields[TMP_SHORTER_URLS] = GetShorterUrls(ri->description); + SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] = + GetShorterUrls(SaveMsg->description); #endif - msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; + msglen += 1024 + StrLength(SaveMsg->link) + StrLength(SaveMsg->description) ; - Message = NewStrBufPlain(NULL, StrLength(ri->description)); + Message = NewStrBufPlain(NULL, msglen); StrBufPlain(Message, HKEY( "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" "\n")); #if 0 /* disable shorter url for now. */ - msg->cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message); + SaveMsg->Msg.cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message); #endif - StrBufAppendBuf(Message, ri->description, 0); + StrBufAppendBuf(Message, SaveMsg->description, 0); StrBufAppendBufPlain(Message, HKEY("

\n"), 0); - AppendLink(Message, ri->link, ri->linkTitle, NULL); - AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this"); + AppendLink(Message, SaveMsg->link, SaveMsg->linkTitle, NULL); + AppendLink(Message, SaveMsg->reLink, SaveMsg->reLinkTitle, "Reply to this"); StrBufAppendBufPlain(Message, HKEY("\n"), 0); - RSSQueueSaveMessage(msg, recp, guid, Message, Cfg); + + SaveMsg->Message = Message; } +eNextState RSSSaveMessage(AsyncIO *IO) +{ + long len; + const char *Key; + rss_aggregator *RSSAggr = (rss_aggregator *) IO->Data; + rss_format_item(RSSAggr->ThisMsg); -/* - * Begin a feed parse - */ -int rss_do_fetching(rss_aggregator *Cfg) + RSSAggr->ThisMsg->Msg.cm_fields['M'] = + SmashStrBuf(&RSSAggr->ThisMsg->Message); + + CtdlSubmitMsg(&RSSAggr->ThisMsg->Msg, &RSSAggr->recp, NULL, 0); + + /* write the uidl to the use table so we don't store this item again */ + cdb_store(CDB_USETABLE, + SKEY(RSSAggr->ThisMsg->MsgGUID), + &RSSAggr->ThisMsg->ut, + sizeof(struct UseTable) ); + + if (GetNextHashPos(RSSAggr->Messages, + RSSAggr->Pos, + &len, &Key, + (void**) &RSSAggr->ThisMsg)) + return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry); + else + return eAbort; +} + +eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) { - rss_item *ri; - - time_t now; - AsyncIO *IO; + const char *Key; + long len; + struct cdbdata *cdbut; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; - now = time(NULL); + /* Find out if we've already seen this item */ + strcpy(Ctx->ThisMsg->ut.ut_msgid, + ChrPtr(Ctx->ThisMsg->MsgGUID)); /// TODO + Ctx->ThisMsg->ut.ut_timestamp = time(NULL); - if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) - return 0; - Cfg->RefCount = 1; + cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->ThisMsg->MsgGUID)); +#ifndef DEBUG_RSS + if (cdbut != NULL) { + /* Item has already been seen */ + EVRSSC_syslog(LOG_DEBUG, + "%s has already been seen\n", + ChrPtr(Ctx->ThisMsg->MsgGUID)); + cdb_free(cdbut); - ri = (rss_item*) malloc(sizeof(rss_item)); - memset(ri, 0, sizeof(rss_item)); - Cfg->Item = ri; - IO = &Cfg->IO; - IO->CitContext = CloneContext(CC); - IO->Data = Cfg; - - - syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url)); - ParseURL(&IO->ConnectMe, Cfg->Url, 80); - CurlPrepareURL(IO->ConnectMe); - - if (! evcurl_init(IO, -// Ctx, - NULL, - "Citadel RSS Client", - ParseRSSReply, - RSSAggregatorTerminate)) + /* rewrite the record anyway, to update the timestamp */ + cdb_store(CDB_USETABLE, + SKEY(Ctx->ThisMsg->MsgGUID), + &Ctx->ThisMsg->ut, sizeof(struct UseTable) ); + + if (GetNextHashPos(Ctx->Messages, + Ctx->Pos, + &len, &Key, + (void**) &Ctx->ThisMsg)) + return NextDBOperation( + IO, + RSS_FetchNetworkUsetableEntry); + else + return eAbort; + } + else +#endif { - syslog(LOG_DEBUG, "Unable to initialize libcurl.\n"); - return 0; + NextDBOperation(IO, RSSSaveMessage); + return eSendMore; } - - evcurl_handle_start(IO); - return 1; } +eNextState RSSAggregator_AnalyseReply(AsyncIO *IO) +{ + struct UseTable ut; + u_char rawdigest[MD5_DIGEST_LEN]; + struct MD5Context md5context; + StrBuf *guid; + struct cdbdata *cdbut; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; + + if (IO->HttpReq.httpcode != 200) + { + StrBuf *ErrMsg; + long lens[2]; + const char *strs[2]; + ErrMsg = NewStrBuf(); + EVRSSC_syslog(LOG_ALERT, "need a 200, got a %ld !\n", + IO->HttpReq.httpcode); + + strs[0] = ChrPtr(Ctx->Url); + lens[0] = StrLength(Ctx->Url); + + strs[1] = ChrPtr(Ctx->rooms); + lens[1] = StrLength(Ctx->rooms); + StrBufPrintf(ErrMsg, + "Error while RSS-Aggregation Run of %s\n" + " need a 200, got a %ld !\n" + " Response text was: \n" + " \n %s\n", + ChrPtr(Ctx->Url), + IO->HttpReq.httpcode, + ChrPtr(IO->HttpReq.ReplyData)); + CtdlAideFPMessage( + ChrPtr(ErrMsg), + "RSS Aggregation run failure", + 2, strs, (long*) &lens); + FreeStrBuf(&ErrMsg); + return eAbort; + } -void DeleteRssCfg(void *vptr) -{ - rss_aggregator *rncptr = (rss_aggregator *)vptr; + MD5Init(&md5context); - FreeStrBuf(&rncptr->Url); - FreeStrBuf(&rncptr->rooms); - FreeStrBuf(&rncptr->CData); - FreeStrBuf(&rncptr->Key); + MD5Update(&md5context, + (const unsigned char*)SKEY(IO->HttpReq.ReplyData)); - DeleteHash(&rncptr->OtherQRnumbers); + MD5Update(&md5context, + (const unsigned char*)SKEY(Ctx->Url)); - if (rncptr->Item != NULL) - { - FreeStrBuf(&rncptr->Item->guid); - FreeStrBuf(&rncptr->Item->title); - FreeStrBuf(&rncptr->Item->link); - FreeStrBuf(&rncptr->Item->linkTitle); - FreeStrBuf(&rncptr->Item->reLink); - FreeStrBuf(&rncptr->Item->reLinkTitle); - FreeStrBuf(&rncptr->Item->description); - FreeStrBuf(&rncptr->Item->channel_title); - FreeStrBuf(&rncptr->Item->author_or_creator); - FreeStrBuf(&rncptr->Item->author_url); - FreeStrBuf(&rncptr->Item->author_email); - - free(rncptr->Item); + MD5Final(rawdigest, &md5context); + guid = NewStrBufPlain(NULL, + MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); + StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); + StrBufAppendBufPlain(guid, HKEY("_rssFM"), 0); + if (StrLength(guid) > 40) + StrBufCutAt(guid, 40, NULL); + /* Find out if we've already seen this item */ + memcpy(ut.ut_msgid, SKEY(guid)); + ut.ut_timestamp = time(NULL); + + cdbut = cdb_fetch(CDB_USETABLE, SKEY(guid)); +#ifndef DEBUG_RSS + if (cdbut != NULL) { + /* Item has already been seen */ + EVRSSC_syslog(LOG_DEBUG, + "%s has already been seen\n", + ChrPtr(Ctx->Url)); + cdb_free(cdbut); } - free(rncptr); + + /* rewrite the record anyway, to update the timestamp */ + cdb_store(CDB_USETABLE, + SKEY(guid), + &ut, sizeof(struct UseTable) ); + FreeStrBuf(&guid); + if (cdbut != NULL) return eAbort; +#endif + return RSSAggregator_ParseReply(IO); } -eNextState RSSAggregatorTerminate(AsyncIO *IO) +eNextState RSSAggregator_FinishHttp(AsyncIO *IO) { - rss_aggregator *rncptr = (rss_aggregator *)IO->Data; - HashPos *At; - long HKLen; - const char *HK; - void *vData; + return QueueDBOperation(IO, RSSAggregator_AnalyseReply); +} - citthread_mutex_lock(&RSSQueueMutex); - rncptr->RefCount --; - if (rncptr->RefCount == 0) - { - UnlinkRSSAggregator(rncptr); +/* + * Begin a feed parse + */ +int rss_do_fetching(rss_aggregator *RSSAggr) +{ + AsyncIO *IO = &RSSAggr->IO; + rss_item *ri; + time_t now; + + now = time(NULL); + + if ((RSSAggr->next_poll != 0) && (now < RSSAggr->next_poll)) + return 0; + ri = (rss_item*) malloc(sizeof(rss_item)); + memset(ri, 0, sizeof(rss_item)); + RSSAggr->Item = ri; + + if (! InitcURLIOStruct(&RSSAggr->IO, + RSSAggr, + "Citadel RSS Client", + RSSAggregator_FinishHttp, + RSSAggregator_Terminate, + RSSAggregator_TerminateDB, + RSSAggregator_ShutdownAbort)) + { + EVRSSCM_syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); + return 0; } - citthread_mutex_unlock(&RSSQueueMutex); -/* - At = GetNewHashPos(RSSFetchUrls, 0); - citthread_mutex_lock(&RSSQueueMutex); - GetHashPosFromKey(RSSFetchUrls, SKEY(rncptr->Url), At); - GetHashPos(RSSFetchUrls, At, &HKLen, &HK, &vData); - DeleteEntryFromHash(RSSFetchUrls, At); - citthread_mutex_unlock(&RSSQueueMutex); + safestrncpy(((CitContext*)RSSAggr->IO.CitContext)->cs_host, + ChrPtr(RSSAggr->Url), + sizeof(((CitContext*)RSSAggr->IO.CitContext)->cs_host)); - DeleteHashPos(&At); -*/ - return eAbort; + EVRSSC_syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(RSSAggr->Url)); + ParseURL(&RSSAggr->IO.ConnectMe, RSSAggr->Url, 80); + CurlPrepareURL(RSSAggr->IO.ConnectMe); + + QueueCurlContext(&RSSAggr->IO); + return 1; } /* @@ -516,138 +558,165 @@ eNextState RSSAggregatorTerminate(AsyncIO *IO) */ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) { - StrBuf *CfgData; + StrBuf *CfgData=NULL; StrBuf *CfgType; StrBuf *Line; rss_room_counter *Count = NULL; struct stat statbuf; char filename[PATH_MAX]; - int fd; + int fd; int Done; - rss_aggregator *rncptr = NULL; - rss_aggregator *use_this_rncptr = NULL; + rss_aggregator *RSSAggr = NULL; + rss_aggregator *use_this_RSSAggr = NULL; void *vptr; const char *CfgPtr, *lPtr; const char *Err; - citthread_mutex_lock(&RSSQueueMutex); + pthread_mutex_lock(&RSSQueueMutex); if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr)) { - syslog(LOG_DEBUG, - "rssclient: [%ld] %s already in progress.\n", - qrbuf->QRnumber, + EVRSSQ_syslog(LOG_DEBUG, + "rssclient: [%ld] %s already in progress.\n", + qrbuf->QRnumber, qrbuf->QRname); - citthread_mutex_unlock(&RSSQueueMutex); + pthread_mutex_unlock(&RSSQueueMutex); return; } - citthread_mutex_unlock(&RSSQueueMutex); + pthread_mutex_unlock(&RSSQueueMutex); assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); - if (CtdlThreadCheckStop()) + if (server_shutting_down) return; - + /* Only do net processing for rooms that have netconfigs */ fd = open(filename, 0); if (fd <= 0) { - //syslog(LOG_DEBUG, "rssclient: %s no config.\n", qrbuf->QRname); + /* syslog(LOG_DEBUG, + "rssclient: %s no config.\n", + qrbuf->QRname); */ return; } - if (CtdlThreadCheckStop()) + + if (server_shutting_down) return; + if (fstat(fd, &statbuf) == -1) { - syslog(LOG_DEBUG, "ERROR: could not stat configfile '%s' - %s\n", - filename, strerror(errno)); + EVRSSQ_syslog(LOG_DEBUG, + "ERROR: could not stat configfile '%s' - %s\n", + filename, + strerror(errno)); return; } - if (CtdlThreadCheckStop()) + + if (server_shutting_down) return; + CfgData = NewStrBufPlain(NULL, statbuf.st_size + 1); + if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) { close(fd); FreeStrBuf(&CfgData); - syslog(LOG_DEBUG, "ERROR: reading config '%s' - %s
\n", - filename, strerror(errno)); + EVRSSQ_syslog(LOG_ERR, "ERROR: reading config '%s' - %s
\n", + filename, strerror(errno)); return; } close(fd); - if (CtdlThreadCheckStop()) + if (server_shutting_down) return; - + CfgPtr = NULL; CfgType = NewStrBuf(); Line = NewStrBufPlain(NULL, StrLength(CfgData)); Done = 0; while (!Done) { - Done = StrBufSipLine(Line, CfgData, &CfgPtr) == 0; - if (StrLength(Line) > 0) - { - lPtr = NULL; - StrBufExtract_NextToken(CfgType, Line, &lPtr, '|'); - if (!strcasecmp("rssclient", ChrPtr(CfgType))) + Done = StrBufSipLine(Line, CfgData, &CfgPtr) == 0; + if (StrLength(Line) > 0) { - if (Count == NULL) - { - Count = malloc(sizeof(rss_room_counter)); - Count->count = 0; - } - Count->count ++; - rncptr = (rss_aggregator *) malloc(sizeof(rss_aggregator)); - memset (rncptr, 0, sizeof(rss_aggregator)); - rncptr->roomlist_parts = 1; - rncptr->Url = NewStrBuf(); - StrBufExtract_NextToken(rncptr->Url, Line, &lPtr, '|'); - - citthread_mutex_lock(&RSSQueueMutex); - GetHash(RSSFetchUrls, SKEY(rncptr->Url), &vptr); - use_this_rncptr = (rss_aggregator *)vptr; - if (use_this_rncptr != NULL) - { - /* mustn't attach to an active session */ - if (use_this_rncptr->RefCount > 0) - { - DeleteRssCfg(rncptr); - Count->count--; - } - else - { - long *QRnumber; - StrBufAppendBufPlain(use_this_rncptr->rooms, - qrbuf->QRname, - -1, 0); - if (use_this_rncptr->roomlist_parts == 1) - { - use_this_rncptr->OtherQRnumbers = NewHash(1, lFlathash); - } - QRnumber = (long*)malloc(sizeof(long)); - *QRnumber = qrbuf->QRnumber; - Put(use_this_rncptr->OtherQRnumbers, LKEY(qrbuf->QRnumber), QRnumber, NULL); - use_this_rncptr->roomlist_parts++; - } - citthread_mutex_unlock(&RSSQueueMutex); - continue; - } - citthread_mutex_unlock(&RSSQueueMutex); - - rncptr->ItemType = RSS_UNSET; - - rncptr->rooms = NewStrBufPlain(qrbuf->QRname, -1); - - citthread_mutex_lock(&RSSQueueMutex); - Put(RSSFetchUrls, SKEY(rncptr->Url), rncptr, DeleteRssCfg); - citthread_mutex_unlock(&RSSQueueMutex); + lPtr = NULL; + StrBufExtract_NextToken(CfgType, Line, &lPtr, '|'); + if (!strcasecmp("rssclient", ChrPtr(CfgType))) + { + if (Count == NULL) + { + Count = malloc( + sizeof(rss_room_counter)); + Count->count = 0; + } + Count->count ++; + RSSAggr = (rss_aggregator *) malloc( + sizeof(rss_aggregator)); + + memset (RSSAggr, 0, sizeof(rss_aggregator)); + RSSAggr->QRnumber = qrbuf->QRnumber; + RSSAggr->roomlist_parts = 1; + RSSAggr->Url = NewStrBuf(); + + StrBufExtract_NextToken(RSSAggr->Url, + Line, + &lPtr, + '|'); + + pthread_mutex_lock(&RSSQueueMutex); + GetHash(RSSFetchUrls, + SKEY(RSSAggr->Url), + &vptr); + + use_this_RSSAggr = (rss_aggregator *)vptr; + if (use_this_RSSAggr != NULL) + { + long *QRnumber; + StrBufAppendBufPlain( + use_this_RSSAggr->rooms, + qrbuf->QRname, + -1, 0); + if (use_this_RSSAggr->roomlist_parts==1) + { + use_this_RSSAggr->OtherQRnumbers + = NewHash(1, lFlathash); + } + QRnumber = (long*)malloc(sizeof(long)); + *QRnumber = qrbuf->QRnumber; + Put(use_this_RSSAggr->OtherQRnumbers, + LKEY(qrbuf->QRnumber), + QRnumber, + NULL); + use_this_RSSAggr->roomlist_parts++; + + pthread_mutex_unlock(&RSSQueueMutex); + + FreeStrBuf(&RSSAggr->Url); + free(RSSAggr); + RSSAggr = NULL; + continue; + } + pthread_mutex_unlock(&RSSQueueMutex); + + RSSAggr->ItemType = RSS_UNSET; + + RSSAggr->rooms = NewStrBufPlain( + qrbuf->QRname, -1); + + pthread_mutex_lock(&RSSQueueMutex); + + Put(RSSFetchUrls, + SKEY(RSSAggr->Url), + RSSAggr, + DeleteRssCfg); + + pthread_mutex_unlock(&RSSQueueMutex); + } } - } } if (Count != NULL) { Count->QRnumber = qrbuf->QRnumber; - citthread_mutex_lock(&RSSQueueMutex); - syslog(LOG_DEBUG, "rssclient: [%ld] %s now starting.\n", + pthread_mutex_lock(&RSSQueueMutex); + EVRSSQ_syslog(LOG_DEBUG, "client: [%ld] %s now starting.\n", qrbuf->QRnumber, qrbuf->QRname); Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL); - citthread_mutex_unlock(&RSSQueueMutex); + pthread_mutex_unlock(&RSSQueueMutex); } FreeStrBuf(&CfgData); FreeStrBuf(&CfgType); @@ -658,61 +727,86 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) * Scan for rooms that have RSS client requests configured */ void rssclient_scan(void) { - static int doing_rssclient = 0; + int RSSRoomCount, RSSCount; rss_aggregator *rptr = NULL; void *vrptr = NULL; - HashPos *it; + HashPos *it; long len; const char *Key; + time_t now = time(NULL); + + /* Run no more than once every 15 minutes. */ + if ((now - last_run) < 900) { + EVRSSQ_syslog(LOG_DEBUG, + "Client: polling interval not yet reached; last run was %ldm%lds ago", + ((now - last_run) / 60), + ((now - last_run) % 60) + ); + return; + } /* - * This is a simple concurrency check to make sure only one rssclient run - * is done at a time. We could do this with a mutex, but since we - * don't really require extremely fine granularity here, we'll do it - * with a static variable instead. + * This is a simple concurrency check to make sure only one rssclient + * run is done at a time. */ - if (doing_rssclient) return; - doing_rssclient = 1; + pthread_mutex_lock(&RSSQueueMutex); + RSSCount = GetCount(RSSFetchUrls); + RSSRoomCount = GetCount(RSSQueueRooms); + pthread_mutex_unlock(&RSSQueueMutex); + + if ((RSSRoomCount > 0) || (RSSCount > 0)) { + EVRSSQ_syslog(LOG_DEBUG, + "rssclient: concurrency check failed; %d rooms and %d url's are queued", + RSSRoomCount, RSSCount + ); + return; + } - syslog(LOG_DEBUG, "rssclient started\n"); + become_session(&rss_CC); + EVRSSQM_syslog(LOG_DEBUG, "rssclient started\n"); CtdlForEachRoom(rssclient_scan_room, NULL); - citthread_mutex_lock(&RSSQueueMutex); + pthread_mutex_lock(&RSSQueueMutex); it = GetNewHashPos(RSSFetchUrls, 0); - while (GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && + while (!server_shutting_down && + GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && (vrptr != NULL)) { rptr = (rss_aggregator *)vrptr; - if (rptr->RefCount == 0) - if (!rss_do_fetching(rptr)) - UnlinkRSSAggregator(rptr); + if (!rss_do_fetching(rptr)) + UnlinkRSSAggregator(rptr); } DeleteHashPos(&it); - citthread_mutex_unlock(&RSSQueueMutex); + pthread_mutex_unlock(&RSSQueueMutex); - syslog(LOG_DEBUG, "rssclient ended\n"); - doing_rssclient = 0; + EVRSSQM_syslog(LOG_DEBUG, "rssclient ended\n"); return; } void rss_cleanup(void) { - citthread_mutex_destroy(&RSSQueueMutex); + /* citthread_mutex_destroy(&RSSQueueMutex); TODO */ DeleteHash(&RSSFetchUrls); DeleteHash(&RSSQueueRooms); } +void LogDebugEnableRSSClient(const int n) +{ + RSSClientDebugEnabled = n; +} CTDL_MODULE_INIT(rssclient) { if (threading) { - citthread_mutex_init(&RSSQueueMutex, NULL); + CtdlFillSystemContext(&rss_CC, "rssclient"); + pthread_mutex_init(&RSSQueueMutex, NULL); RSSQueueRooms = NewHash(1, lFlathash); RSSFetchUrls = NewHash(1, NULL); syslog(LOG_INFO, "%s\n", curl_version()); - CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); - CtdlRegisterCleanupHook(rss_cleanup); + CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300); + CtdlRegisterEVCleanupHook(rss_cleanup); + CtdlRegisterDebugFlagHook(HKEY("rssclient"), LogDebugEnableRSSClient, &RSSClientDebugEnabled); } return "rssclient"; }