X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=72ce11ed280f8164cdaa538ce73b0bf6994d7529;hb=684e3a06c4d2ab35e39760f328a0d354ad330622;hp=f9355025a09ceb65b1268d1c3a712c081998a9c5;hpb=a80ade9d4c5b3e0dcd31b44418c5271e0c89a337;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index f9355025a..72ce11ed2 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -1,21 +1,15 @@ /* * Bring external RSS feeds into rooms. * - * Copyright (c) 2007-2010 by the citadel.org team + * Copyright (c) 2007-2017 by the citadel.org team * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. + * This program is open source software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -27,9 +21,9 @@ # include #else # if HAVE_SYS_TIME_H -# include +#include # else -# include +#include # endif #endif @@ -54,697 +48,416 @@ #include "citadel_dirs.h" #include "md5.h" #include "context.h" -#include "event_client.h" -#include "rss_atom_parser.h" - +#include "internet_addressing.h" -#define TMP_MSGDATA 0xFF -#define TMP_SHORTER_URL_OFFSET 0xFE -#define TMP_SHORTER_URLS 0xFD +struct rssroom { + struct rssroom *next; + char *room; +}; -time_t last_run = 0L; +struct rssurl { + struct rssurl *next; + char *url; + struct rssroom *rooms; +}; -pthread_mutex_t RSSQueueMutex; /* locks the access to the following vars: */ -HashList *RSSQueueRooms = NULL; /* rss_room_counter */ -HashList *RSSFetchUrls = NULL; /* -> rss_aggregator; ->RefCount access to be locked too. */ - -eNextState RSSAggregatorTerminate(AsyncIO *IO); +struct rssparser { + StrBuf *CData; + struct CtdlMessage *msg; + char *link; + char *description; + char *item_id; + struct rssroom *rooms; +}; +time_t last_run = 0L; struct CitContext rss_CC; +struct rssurl *rsstodo = NULL; -struct rssnetcfg *rnclist = NULL; -void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title) -{ - if (StrLength(link) > 0) - { - StrBufAppendBufPlain(Message, HKEY(""), 0); - if (StrLength(LinkTitle) > 0) - StrBufAppendBuf(Message, LinkTitle, 0); - else if ((Title != NULL) && !IsEmptyStr(Title)) - StrBufAppendBufPlain(Message, Title, -1, 0); - else - StrBufAppendBuf(Message, link, 0); - StrBufAppendBufPlain(Message, HKEY("
\n"), 0); - } -} -typedef struct __networker_save_message { - AsyncIO IO; - struct CtdlMessage *Msg; - struct recptypes *recp; - rss_aggregator *Cfg; - StrBuf *MsgGUID; - StrBuf *Message; - struct UseTable ut; -} networker_save_message; - - -void DeleteRoomReference(long QRnumber) -{ - HashPos *At; - long HKLen; - const char *HK; - void *vData = NULL; - rss_room_counter *pRoomC; - - At = GetNewHashPos(RSSQueueRooms, 0); - GetHashPosFromKey(RSSQueueRooms, LKEY(QRnumber), At); - GetHashPos(RSSQueueRooms, At, &HKLen, &HK, &vData); - if (vData != NULL) - { - pRoomC = (rss_room_counter *) vData; - pRoomC->count --; - if (pRoomC->count == 0) - DeleteEntryFromHash(RSSQueueRooms, At); - } - DeleteHashPos(&At); -} - -void UnlinkRooms(rss_aggregator *Cfg) +// This handler is called whenever an XML tag opens. +// +void rss_start_element(void *data, const char *el, const char **attribute) { - - DeleteRoomReference(Cfg->QRnumber); - if (Cfg->OtherQRnumbers != NULL) - { - long HKLen; - const char *HK; - HashPos *At; - void *vData; - - At = GetNewHashPos(Cfg->OtherQRnumbers, 0); - while (GetNextHashPos(Cfg->OtherQRnumbers, At, &HKLen, &HK, &vData) && - (vData != NULL)) - { - long *lData = (long*) vData; - DeleteRoomReference(*lData); + struct rssparser *r = (struct rssparser *)data; + int i; + + if ( + (!strcasecmp(el, "entry")) + || (!strcasecmp(el, "item")) + ) { + // this is the start of a new item(rss) or entry(atom) + if (r->msg != NULL) { + CM_Free(r->msg); + r->msg = NULL; } -/* - if (server_shutting_down) - break; / * TODO */ - - DeleteHashPos(&At); + r->msg = malloc(sizeof(struct CtdlMessage)); + memset(r->msg, 0, sizeof(struct CtdlMessage)); + r->msg->cm_magic = CTDLMESSAGE_MAGIC; + r->msg->cm_anon_type = MES_NORMAL; + r->msg->cm_format_type = FMT_RFC822; } -} - -void UnlinkRSSAggregator(rss_aggregator *Cfg) -{ - HashPos *At; - - UnlinkRooms(Cfg); - - At = GetNewHashPos(RSSFetchUrls, 0); - if (GetHashPosFromKey(RSSFetchUrls, SKEY(Cfg->Url), At) == 0) - { - DeleteEntryFromHash(RSSFetchUrls, At); - } - DeleteHashPos(&At); - last_run = time(NULL); -} - -eNextState FreeNetworkSaveMessage (AsyncIO *IO) -{ - networker_save_message *Ctx = (networker_save_message *) IO->Data; - - pthread_mutex_lock(&RSSQueueMutex); - Ctx->Cfg->RefCount --; - - if (Ctx->Cfg->RefCount == 0) - { - UnlinkRSSAggregator(Ctx->Cfg); + else if (!strcasecmp(el, "link")) { // atom feeds have the link as an attribute + for(i = 0; attribute[i]; i += 2) { + if (!strcasecmp(attribute[i], "href")) { + if (r->link != NULL) { + free(r->link); + r->link = NULL; + } + r->link = strdup(attribute[i+1]); + striplt(r->link); + } + } } - pthread_mutex_unlock(&RSSQueueMutex); - - CtdlFreeMessage(Ctx->Msg); - free_recipients(Ctx->recp); - FreeStrBuf(&Ctx->Message); - FreeStrBuf(&Ctx->MsgGUID); - ((struct CitContext*)IO->CitContext)->state = CON_IDLE; - ((struct CitContext*)IO->CitContext)->kill_me = 1; - free(Ctx); - last_run = time(NULL); - return eAbort; } -eNextState AbortNetworkSaveMessage (AsyncIO *IO) -{ - return eAbort; ///TODO -} -eNextState RSSSaveMessage(AsyncIO *IO) +// This handler is called whenever an XML tag closes. +// +void rss_end_element(void *data, const char *el) { - networker_save_message *Ctx = (networker_save_message *) IO->Data; - - Ctx->Msg->cm_fields['M'] = SmashStrBuf(&Ctx->Message); + struct rssparser *r = (struct rssparser *)data; - CtdlSubmitMsg(Ctx->Msg, Ctx->recp, NULL, 0); + if ( // end of a new item(rss) or entry(atom) + (!strcasecmp(el, "entry")) + || (!strcasecmp(el, "item")) + ) { - /* write the uidl to the use table so we don't store this item again */ - cdb_store(CDB_USETABLE, SKEY(Ctx->MsgGUID), &Ctx->ut, sizeof(struct UseTable) ); - - return eTerminateConnection; -} + if (r->msg != NULL) { // Save the message to the rooms -eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) -{ - struct cdbdata *cdbut; - networker_save_message *Ctx = (networker_save_message *) IO->Data; - - /* Find out if we've already seen this item */ - strcpy(Ctx->ut.ut_msgid, ChrPtr(Ctx->MsgGUID)); /// TODO - Ctx->ut.ut_timestamp = time(NULL); - - cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->MsgGUID)); -#ifndef DEBUG_RSS - if (cdbut != NULL) { - /* Item has already been seen */ - syslog(LOG_DEBUG, "%s has already been seen\n", ChrPtr(Ctx->MsgGUID)); - cdb_free(cdbut); - - /* rewrite the record anyway, to update the timestamp */ - cdb_store(CDB_USETABLE, - SKEY(Ctx->MsgGUID), - &Ctx->ut, sizeof(struct UseTable) ); - return eAbort; - } - else -#endif - { - NextDBOperation(IO, RSSSaveMessage); - return eSendMore; - } -} -void RSSQueueSaveMessage(struct CtdlMessage *Msg, struct recptypes *recp, StrBuf *MsgGUID, StrBuf *MessageBody, rss_aggregator *Cfg) -{ - networker_save_message *Ctx; + // use the link as an item id if nothing else is available + if ((r->item_id == NULL) && (r->link != NULL)) { + r->item_id = strdup(r->link); + } - Ctx = (networker_save_message *) malloc(sizeof(networker_save_message)); - memset(Ctx, 0, sizeof(networker_save_message)); + // check the use table + StrBuf *u = NewStrBuf(); + StrBufAppendPrintf(u, "rss/%s", r->item_id); + int already_seen = CheckIfAlreadySeen(u); + FreeStrBuf(&u); + + if (already_seen == 0) { + + // Compose the message text + StrBuf *TheMessage = NewStrBuf(); + StrBufAppendPrintf(TheMessage, + "Content-type: text/html\n\n" + "\n\n" + "" + ); + + if (r->description != NULL) { + StrBufAppendPrintf(TheMessage, "%s

\r\n", r->description); + free(r->description); + r->description = NULL; + } + + if (r->link != NULL) { + StrBufAppendPrintf(TheMessage, "%s\r\n", r->link, r->link); + free(r->link); + r->link = NULL; + } - Ctx->MsgGUID = MsgGUID; - Ctx->Message = MessageBody; - Ctx->Msg = Msg; - Ctx->Cfg = Cfg; - Ctx->recp = recp; - Ctx->IO.Data = Ctx; - Ctx->IO.CitContext = CloneContext(&rss_CC); - Ctx->IO.Terminate = FreeNetworkSaveMessage; - Ctx->IO.ShutdownAbort = AbortNetworkSaveMessage; - QueueDBOperation(&Ctx->IO, RSS_FetchNetworkUsetableEntry); -} - - -/* - * Commit a fetched and parsed RSS item to disk - */ -void rss_save_item(rss_item *ri, rss_aggregator *Cfg) -{ - - struct MD5Context md5context; - u_char rawdigest[MD5_DIGEST_LEN]; - struct CtdlMessage *msg; - struct recptypes *recp = NULL; - int msglen = 0; - StrBuf *Message; - StrBuf *guid; - StrBuf *Buf; - - recp = (struct recptypes *) malloc(sizeof(struct recptypes)); - if (recp == NULL) return; - memset(recp, 0, sizeof(struct recptypes)); - Buf = NewStrBufDup(Cfg->rooms); - recp->recp_room = SmashStrBuf(&Buf); - recp->num_room = Cfg->roomlist_parts; - recp->recptypes_magic = RECPTYPES_MAGIC; - - Cfg->RefCount ++; - /* Construct a GUID to use in the S_USETABLE table. - * If one is not present in the item itself, make one up. - */ - if (ri->guid != NULL) { - StrBufSpaceToBlank(ri->guid); - StrBufTrim(ri->guid); - guid = NewStrBufPlain(HKEY("rss/")); - StrBufAppendBuf(guid, ri->guid, 0); - } - else { - MD5Init(&md5context); - if (ri->title != NULL) { - MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->title), StrLength(ri->title)); + StrBufAppendPrintf(TheMessage, "\r\n"); + CM_SetField(r->msg, eMesageText, ChrPtr(TheMessage), StrLength(TheMessage)); + FreeStrBuf(&TheMessage); + + if (CM_IsEmpty(r->msg, eAuthor)) { + CM_SetField(r->msg, eAuthor, HKEY("rss")); + } + + if (CM_IsEmpty(r->msg, eTimestamp)) { + CM_SetFieldLONG(r->msg, eTimestamp, time(NULL)); + } + + // Save it to the room(s) + struct rssroom *rr = NULL; + long msgnum = (-1); + for (rr=r->rooms; rr!=NULL; rr=rr->next) { + if (rr == r->rooms) { + msgnum = CtdlSubmitMsg(r->msg, NULL, rr->room, 0); + } + else { + CtdlSaveMsgPointerInRoom(rr->room, msgnum, 0, NULL); + } + syslog(LOG_DEBUG, "Saved message %ld to %s", msgnum, rr->room); + } + } + else { + syslog(LOG_DEBUG, "%s was already seen", r->item_id); + } + + CM_Free(r->msg); + r->msg = NULL; } - if (ri->link != NULL) { - MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->link), StrLength(ri->link)); + + if (r->item_id != NULL) { + free(r->item_id); + r->item_id = NULL; } - MD5Final(rawdigest, &md5context); - guid = NewStrBufPlain(NULL, MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); - StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); - StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0); } - /* translate Item into message. */ - syslog(LOG_DEBUG, "RSS: translating item...\n"); - if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); - StrBufSpaceToBlank(ri->description); - msg = malloc(sizeof(struct CtdlMessage)); - memset(msg, 0, sizeof(struct CtdlMessage)); - msg->cm_magic = CTDLMESSAGE_MAGIC; - msg->cm_anon_type = MES_NORMAL; - msg->cm_format_type = FMT_RFC822; - - if (ri->guid != NULL) { - msg->cm_fields['E'] = strdup(ChrPtr(ri->guid)); + else if (!strcasecmp(el, "title")) { // item subject (rss and atom) + if ((r->msg != NULL) && (CM_IsEmpty(r->msg, eMsgSubject))) { + CM_SetField(r->msg, eMsgSubject, ChrPtr(r->CData), StrLength(r->CData)); + striplt(r->msg->cm_fields[eMsgSubject]); + } } - if (ri->author_or_creator != NULL) { - char *From; - StrBuf *Encoded = NULL; - int FromAt; - - From = html_to_ascii(ChrPtr(ri->author_or_creator), - StrLength(ri->author_or_creator), - 512, 0); - StrBufPlain(ri->author_or_creator, From, -1); - StrBufTrim(ri->author_or_creator); - free(From); - - FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL; - if (!FromAt && StrLength (ri->author_email) > 0) - { - StrBufRFC2047encode(&Encoded, ri->author_or_creator); - msg->cm_fields['A'] = SmashStrBuf(&Encoded); - msg->cm_fields['P'] = SmashStrBuf(&ri->author_email); + else if (!strcasecmp(el, "author")) { // author of item (rss and maybe atom) + if ((r->msg != NULL) && (CM_IsEmpty(r->msg, eAuthor))) { + CM_SetField(r->msg, eAuthor, ChrPtr(r->CData), StrLength(r->CData)); + striplt(r->msg->cm_fields[eAuthor]); } - else - { - if (FromAt) - { - msg->cm_fields['A'] = SmashStrBuf(&ri->author_or_creator); - msg->cm_fields['P'] = strdup(msg->cm_fields['A']); - } - else - { - StrBufRFC2047encode(&Encoded, ri->author_or_creator); - msg->cm_fields['A'] = SmashStrBuf(&Encoded); - msg->cm_fields['P'] = strdup("rss@localhost"); + } - } - if (ri->pubdate <= 0) { - ri->pubdate = time(NULL); - } + else if (!strcasecmp(el, "pubdate")) { // date/time stamp (rss) Sat, 25 Feb 2017 14:28:01 EST + if ((r->msg)&&(r->msg->cm_fields[eTimestamp]==NULL)) { + CM_SetFieldLONG(r->msg, eTimestamp, parsedate(ChrPtr(r->CData))); } } - else { - msg->cm_fields['A'] = strdup("rss"); - } - msg->cm_fields['N'] = strdup(NODENAME); - if (ri->title != NULL) { - long len; - char *Sbj; - StrBuf *Encoded, *QPEncoded; - - QPEncoded = NULL; - StrBufSpaceToBlank(ri->title); - len = StrLength(ri->title); - Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); - len = strlen(Sbj); - if (Sbj[len - 1] == '\n') - { - len --; - Sbj[len] = '\0'; + else if (!strcasecmp(el, "updated")) { // date/time stamp (atom) 2003-12-13T18:30:02Z + if ((r->msg)&&(r->msg->cm_fields[eTimestamp]==NULL)) { + struct tm t; + char zulu; + memset(&t, 0, sizeof t); + sscanf(ChrPtr(r->CData), "%d-%d-%dT%d:%d:%d%c", &t.tm_year, &t.tm_mon, &t.tm_mday, &t.tm_hour, &t.tm_min, &t.tm_sec, &zulu); + t.tm_year -= 1900; + t.tm_mon -= 1; + CM_SetFieldLONG(r->msg, eTimestamp, mktime(&t)); } - Encoded = NewStrBufPlain(Sbj, len); - free(Sbj); - - StrBufTrim(Encoded); - StrBufRFC2047encode(&QPEncoded, Encoded); + } - msg->cm_fields['U'] = SmashStrBuf(&QPEncoded); - FreeStrBuf(&Encoded); + else if (!strcasecmp(el, "link")) { // link to story (rss) + if (r->link != NULL) { + free(r->link); + r->link = NULL; + } + r->link = strdup(ChrPtr(r->CData)); + striplt(r->link); } - msg->cm_fields['T'] = malloc(64); - snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); - if (ri->channel_title != NULL) { - if (StrLength(ri->channel_title) > 0) { - msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title)); + + else if ( + (!strcasecmp(el, "guid")) // unique item id (rss) + || (!strcasecmp(el, "id")) // unique item id (atom) + ) { + if (r->item_id != NULL) { + free(r->item_id); + r->item_id = NULL; } + r->item_id = strdup(ChrPtr(r->CData)); + striplt(r->item_id); } - if (ri->link == NULL) - ri->link = NewStrBufPlain(HKEY("")); -#if 0 /* temporarily disable shorter urls. */ - msg->cm_fields[TMP_SHORTER_URLS] = GetShorterUrls(ri->description); -#endif + else if ( + (!strcasecmp(el, "description")) // message text (rss) + || (!strcasecmp(el, "summary")) // message text (atom) + || (!strcasecmp(el, "content")) // message text (atom) + ) { + if (r->description != NULL) { + free(r->description); + r->description = NULL; + } + r->description = strdup(ChrPtr(r->CData)); + striplt(r->description); + } - msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; + if (r->CData != NULL) { + FreeStrBuf(&r->CData); + r->CData = NULL; + } +} - Message = NewStrBufPlain(NULL, StrLength(ri->description)); - StrBufPlain(Message, HKEY( - "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" - "\n")); -#if 0 /* disable shorter url for now. */ - msg->cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message); -#endif - StrBufAppendBuf(Message, ri->description, 0); - StrBufAppendBufPlain(Message, HKEY("

\n"), 0); +// This handler is called whenever data appears between opening and closing tags. +// +void rss_handle_data(void *data, const char *content, int length) +{ + struct rssparser *r = (struct rssparser *)data; - AppendLink(Message, ri->link, ri->linkTitle, NULL); - AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this"); - StrBufAppendBufPlain(Message, HKEY("\n"), 0); + if (r->CData == NULL) { + r->CData = NewStrBuf(); + } - RSSQueueSaveMessage(msg, recp, guid, Message, Cfg); + StrBufAppendBufPlain(r->CData, content, length, 0); } - -/* - * Begin a feed parse - */ -int rss_do_fetching(rss_aggregator *Cfg) +// Feed has been downloaded, now parse it. +// +void rss_parse_feed(StrBuf *Feed, struct rssroom *rooms) { - rss_item *ri; - - time_t now; - AsyncIO *IO; - - now = time(NULL); - - if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) - return 0; - Cfg->RefCount = 1; + struct rssparser r; + + memset(&r, 0, sizeof r); + r.rooms = rooms; + XML_Parser p = XML_ParserCreate("UTF-8"); + XML_SetElementHandler(p, rss_start_element, rss_end_element); + XML_SetCharacterDataHandler(p, rss_handle_data); + XML_SetUserData(p, (void *)&r); + XML_Parse(p, ChrPtr(Feed), StrLength(Feed), XML_TRUE); + XML_ParserFree(p); +} - ri = (rss_item*) malloc(sizeof(rss_item)); - memset(ri, 0, sizeof(rss_item)); - Cfg->Item = ri; - IO = &Cfg->IO; - IO->CitContext = CloneContext(&rss_CC); - IO->Data = Cfg; +// Add a feed/room pair into the todo list +// +void rssclient_push_todo(char *rssurl, char *roomname) +{ + struct rssurl *r = NULL; + struct rssurl *thisone = NULL; + struct rssroom *newroom = NULL; - syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url)); - ParseURL(&IO->ConnectMe, Cfg->Url, 80); - CurlPrepareURL(IO->ConnectMe); + syslog(LOG_DEBUG, "rssclient_push_todo(%s, %s)", rssurl, roomname); - if (! evcurl_init(IO, -// Ctx, - NULL, - "Citadel RSS Client", - ParseRSSReply, - RSSAggregatorTerminate)) - { - syslog(LOG_DEBUG, "Unable to initialize libcurl.\n"); - return 0; + for (r=rsstodo; r!=NULL; r=r->next) { + if (!strcasecmp(r->url, rssurl)) { + thisone = r; + } + } + if (thisone == NULL) { + thisone = malloc(sizeof(struct rssurl)); + thisone->url = strdup(rssurl); + thisone->rooms = NULL; + thisone->next = rsstodo; + rsstodo = thisone; } - evcurl_handle_start(IO); - return 1; + newroom = malloc(sizeof(struct rssroom)); + newroom->room = strdup(roomname); + newroom->next = thisone->rooms; + thisone->rooms = newroom; } -void DeleteRssCfg(void *vptr) +// pull one feed (possibly multiple rooms) +// +void rss_pull_one_feed(struct rssurl *url) { - rss_aggregator *rncptr = (rss_aggregator *)vptr; + CURL *curl; + CURLcode res; + StrBuf *Downloaded = NULL; - FreeStrBuf(&rncptr->Url); - FreeStrBuf(&rncptr->rooms); - FreeStrBuf(&rncptr->CData); - FreeStrBuf(&rncptr->Key); - FreeStrBuf(&rncptr->IO.HttpReq.ReplyData); - DeleteHash(&rncptr->OtherQRnumbers); - FreeURL(&rncptr->IO.ConnectMe); + syslog(LOG_DEBUG, "rss_pull_one_feed(%s)", url->url); - if (rncptr->Item != NULL) - { - FreeStrBuf(&rncptr->Item->guid); - FreeStrBuf(&rncptr->Item->title); - FreeStrBuf(&rncptr->Item->link); - FreeStrBuf(&rncptr->Item->linkTitle); - FreeStrBuf(&rncptr->Item->reLink); - FreeStrBuf(&rncptr->Item->reLinkTitle); - FreeStrBuf(&rncptr->Item->description); - FreeStrBuf(&rncptr->Item->channel_title); - FreeStrBuf(&rncptr->Item->author_or_creator); - FreeStrBuf(&rncptr->Item->author_url); - FreeStrBuf(&rncptr->Item->author_email); - - free(rncptr->Item); + curl = curl_easy_init(); + if (!curl) { + return; } - free(rncptr); -} - -eNextState RSSAggregatorTerminate(AsyncIO *IO) -{ - rss_aggregator *rncptr = (rss_aggregator *)IO->Data; - /* - HashPos *At; - long HKLen; - const char *HK; - void *vData; - */ - pthread_mutex_lock(&RSSQueueMutex); - rncptr->RefCount --; - if (rncptr->RefCount == 0) - { - UnlinkRSSAggregator(rncptr); + Downloaded = NewStrBuf(); + + curl_easy_setopt(curl, CURLOPT_URL, url->url); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); // Follow redirects + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlFillStrBuf_callback); // What to do with downloaded data + curl_easy_setopt(curl, CURLOPT_WRITEDATA, Downloaded); // Give it our StrBuf to work with + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L); // Time out after 20 seconds + res = curl_easy_perform(curl); // Perform the request + if (res != CURLE_OK) { + syslog(LOG_WARNING, "Failed to load feed: %s", curl_easy_strerror(res)); } - pthread_mutex_unlock(&RSSQueueMutex); -/* - At = GetNewHashPos(RSSFetchUrls, 0); + curl_easy_cleanup(curl); - pthread_mutex_lock(&RSSQueueMutex); - GetHashPosFromKey(RSSFetchUrls, SKEY(rncptr->Url), At); - GetHashPos(RSSFetchUrls, At, &HKLen, &HK, &vData); - DeleteEntryFromHash(RSSFetchUrls, At); - pthread_mutex_unlock(&RSSQueueMutex); - - DeleteHashPos(&At); -*/ - return eAbort; + rss_parse_feed(Downloaded, url->rooms); // parse the feed + FreeStrBuf(&Downloaded); // free the downloaded feed data } -/* - * Scan a room's netconfig to determine whether it is requesting any RSS feeds - */ -void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) + +// We have a list, now download the feeds +// +void rss_pull_feeds(void) { - StrBuf *CfgData=NULL; - StrBuf *CfgType; - StrBuf *Line; - rss_room_counter *Count = NULL; - struct stat statbuf; - char filename[PATH_MAX]; - int fd; - int Done; - rss_aggregator *rncptr = NULL; - rss_aggregator *use_this_rncptr = NULL; - void *vptr; - const char *CfgPtr, *lPtr; - const char *Err; - - pthread_mutex_lock(&RSSQueueMutex); - if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr)) - { - syslog(LOG_DEBUG, - "rssclient: [%ld] %s already in progress.\n", - qrbuf->QRnumber, - qrbuf->QRname); - pthread_mutex_unlock(&RSSQueueMutex); - return; + struct rssurl *r; + struct rssroom *rr; + + while (rsstodo != NULL) { + rss_pull_one_feed(rsstodo); + r = rsstodo; + rsstodo = rsstodo->next; + while (r->rooms != NULL) { + rr = r->rooms; + r->rooms = r->rooms->next; + free(rr->room); + free(rr); + } + free(r->url); + free(r); } - pthread_mutex_unlock(&RSSQueueMutex); - - assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); +} - if (server_shutting_down) - return; - - /* Only do net processing for rooms that have netconfigs */ - fd = open(filename, 0); - if (fd <= 0) { - //syslog(LOG_DEBUG, "rssclient: %s no config.\n", qrbuf->QRname); - return; - } - if (server_shutting_down) - return; +// Scan a room's netconfig looking for RSS feed parsing requests +// +void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) +{ + char *serialized_config = NULL; + int num_configs = 0; + char cfgline[SIZ]; + int i = 0; - if (fstat(fd, &statbuf) == -1) { - syslog(LOG_DEBUG, "ERROR: could not stat configfile '%s' - %s\n", - filename, strerror(errno)); + serialized_config = LoadRoomNetConfigFile(qrbuf->QRnumber); + if (!serialized_config) { return; } - if (server_shutting_down) - return; - - CfgData = NewStrBufPlain(NULL, statbuf.st_size + 1); - - if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) { - close(fd); - FreeStrBuf(&CfgData); - syslog(LOG_DEBUG, "ERROR: reading config '%s' - %s
\n", - filename, strerror(errno)); - return; - } - close(fd); - if (server_shutting_down) - return; - - CfgPtr = NULL; - CfgType = NewStrBuf(); - Line = NewStrBufPlain(NULL, StrLength(CfgData)); - Done = 0; - while (!Done) - { - Done = StrBufSipLine(Line, CfgData, &CfgPtr) == 0; - if (StrLength(Line) > 0) - { - lPtr = NULL; - StrBufExtract_NextToken(CfgType, Line, &lPtr, '|'); - if (!strcasecmp("rssclient", ChrPtr(CfgType))) - { - if (Count == NULL) - { - Count = malloc(sizeof(rss_room_counter)); - Count->count = 0; - } - Count->count ++; - rncptr = (rss_aggregator *) malloc(sizeof(rss_aggregator)); - memset (rncptr, 0, sizeof(rss_aggregator)); - rncptr->roomlist_parts = 1; - rncptr->Url = NewStrBuf(); - StrBufExtract_NextToken(rncptr->Url, Line, &lPtr, '|'); - - pthread_mutex_lock(&RSSQueueMutex); - GetHash(RSSFetchUrls, SKEY(rncptr->Url), &vptr); - use_this_rncptr = (rss_aggregator *)vptr; - if (use_this_rncptr != NULL) - { - /* mustn't attach to an active session */ - if (use_this_rncptr->RefCount > 0) - { - DeleteRssCfg(rncptr); - Count->count--; - } - else - { - long *QRnumber; - StrBufAppendBufPlain(use_this_rncptr->rooms, - qrbuf->QRname, - -1, 0); - if (use_this_rncptr->roomlist_parts == 1) - { - use_this_rncptr->OtherQRnumbers = NewHash(1, lFlathash); - } - QRnumber = (long*)malloc(sizeof(long)); - *QRnumber = qrbuf->QRnumber; - Put(use_this_rncptr->OtherQRnumbers, LKEY(qrbuf->QRnumber), QRnumber, NULL); - use_this_rncptr->roomlist_parts++; - } - pthread_mutex_unlock(&RSSQueueMutex); - - - FreeStrBuf(&rncptr->Url); - free(rncptr); - rncptr = NULL; - continue; - } - pthread_mutex_unlock(&RSSQueueMutex); - - rncptr->ItemType = RSS_UNSET; - - rncptr->rooms = NewStrBufPlain(qrbuf->QRname, -1); - - pthread_mutex_lock(&RSSQueueMutex); - Put(RSSFetchUrls, SKEY(rncptr->Url), rncptr, DeleteRssCfg); - pthread_mutex_unlock(&RSSQueueMutex); + num_configs = num_tokens(serialized_config, '\n'); + for (i=0; iQRname); } - } - } - if (Count != NULL) - { - Count->QRnumber = qrbuf->QRnumber; - pthread_mutex_lock(&RSSQueueMutex); - syslog(LOG_DEBUG, "rssclient: [%ld] %s now starting.\n", - qrbuf->QRnumber, qrbuf->QRname); - Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL); - pthread_mutex_unlock(&RSSQueueMutex); } - FreeStrBuf(&CfgData); - FreeStrBuf(&CfgType); - FreeStrBuf(&Line); + + free(serialized_config); } + /* * Scan for rooms that have RSS client requests configured */ void rssclient_scan(void) { - static int doing_rssclient = 0; - rss_aggregator *rptr = NULL; - void *vrptr = NULL; - HashPos *it; - long len; - const char *Key; + time_t now = time(NULL); /* Run no more than once every 15 minutes. */ - if ((time(NULL) - last_run) < 900) { + if ((now - last_run) < 900) { + syslog(LOG_DEBUG, + "Client: polling interval not yet reached; last run was %ldm%lds ago", + ((now - last_run) / 60), + ((now - last_run) % 60) + ); return; } - /* - * This is a simple concurrency check to make sure only one rssclient run - * is done at a time. We could do this with a mutex, but since we - * don't really require extremely fine granularity here, we'll do it - * with a static variable instead. - */ - if (doing_rssclient) return; - doing_rssclient = 1; - - syslog(LOG_DEBUG, "rssclient started\n"); + become_session(&rss_CC); + syslog(LOG_DEBUG, "rssclient started"); CtdlForEachRoom(rssclient_scan_room, NULL); - - pthread_mutex_lock(&RSSQueueMutex); - - it = GetNewHashPos(RSSFetchUrls, 0); - while (!server_shutting_down && - GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && - (vrptr != NULL)) { - rptr = (rss_aggregator *)vrptr; - if (rptr->RefCount == 0) - if (!rss_do_fetching(rptr)) - UnlinkRSSAggregator(rptr); - } - DeleteHashPos(&it); - pthread_mutex_unlock(&RSSQueueMutex); - - syslog(LOG_DEBUG, "rssclient ended\n"); - doing_rssclient = 0; + rss_pull_feeds(); + syslog(LOG_DEBUG, "rssclient ended"); + last_run = time(NULL); return; } -void rss_cleanup(void) -{ - /* citthread_mutex_destroy(&RSSQueueMutex); TODO */ - DeleteHash(&RSSFetchUrls); - DeleteHash(&RSSQueueRooms); -} - CTDL_MODULE_INIT(rssclient) { - if (threading) + if (!threading) + { + syslog(LOG_INFO, "%s", curl_version()); + CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300); + } + else { CtdlFillSystemContext(&rss_CC, "rssclient"); - pthread_mutex_init(&RSSQueueMutex, NULL); - RSSQueueRooms = NewHash(1, lFlathash); - RSSFetchUrls = NewHash(1, NULL); - syslog(LOG_INFO, "%s\n", curl_version()); - CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); - CtdlRegisterCleanupHook(rss_cleanup); } return "rssclient"; } +