X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=530bed03fe6f689b8bb96eb6af547744f980f33c;hb=c855d497545dad80942a194624c111a54cd1fdc7;hp=605b0772d70524977cae7784f783954ecf8784ea;hpb=a2fda4eafb51bbf58c04471522aa2d0f116c797e;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 605b0772d..530bed03f 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -1,8 +1,21 @@ /* - * $Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $ - * * Bring external RSS feeds into rooms. * + * Copyright (c) 2007-2010 by the citadel.org team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -26,6 +39,7 @@ #include #include #include +#include #include #include "citadel.h" #include "server.h" @@ -33,637 +47,686 @@ #include "support.h" #include "config.h" #include "threads.h" -#include "room_ops.h" #include "ctdl_module.h" -#include "clientsocket.h" #include "msgbase.h" #include "parsedate.h" #include "database.h" #include "citadel_dirs.h" #include "md5.h" +#include "context.h" +#include "event_client.h" +#include "rss_atom_parser.h" + + +#define TMP_MSGDATA 0xFF +#define TMP_SHORTER_URL_OFFSET 0xFE +#define TMP_SHORTER_URLS 0xFD +time_t last_run = 0L; -struct rssnetcfg { - struct rssnetcfg *next; - char url[256]; - char *rooms; -}; - -struct rss_item { - char *chardata; - int chardata_len; - char *roomlist; - int done_parsing; - char *guid; - char *title; - char *link; - char *description; - time_t pubdate; - char channel_title[256]; - int item_tag_nesting; -}; +pthread_mutex_t RSSQueueMutex; /* locks the access to the following vars: */ +HashList *RSSQueueRooms = NULL; /* rss_room_counter */ +HashList *RSSFetchUrls = NULL; /*->rss_aggregator;->RefCount access locked*/ + +eNextState RSSAggregator_Terminate(AsyncIO *IO); +eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO); +struct CitContext rss_CC; struct rssnetcfg *rnclist = NULL; +void AppendLink(StrBuf *Message, + StrBuf *link, + StrBuf *LinkTitle, + const char *Title) +{ + if (StrLength(link) > 0) + { + StrBufAppendBufPlain(Message, HKEY(""), 0); + if (StrLength(LinkTitle) > 0) + StrBufAppendBuf(Message, LinkTitle, 0); + else if ((Title != NULL) && !IsEmptyStr(Title)) + StrBufAppendBufPlain(Message, Title, -1, 0); + else + StrBufAppendBuf(Message, link, 0); + StrBufAppendBufPlain(Message, HKEY("
\n"), 0); + } +} -/* - * Commit a fetched and parsed RSS item to disk - */ -void rss_save_item(struct rss_item *ri) { +void DeleteRoomReference(long QRnumber) +{ + HashPos *At; + long HKLen; + const char *HK; + void *vData = NULL; + rss_room_counter *pRoomC; - struct MD5Context md5context; - u_char rawdigest[MD5_DIGEST_LEN]; - int i; - char utmsgid[SIZ]; - struct cdbdata *cdbut; - struct UseTable ut; - struct CtdlMessage *msg; - struct recptypes *recp = NULL; - int msglen = 0; + At = GetNewHashPos(RSSQueueRooms, 0); - recp = (struct recptypes *) malloc(sizeof(struct recptypes)); - if (recp == NULL) return; - memset(recp, 0, sizeof(struct recptypes)); - recp->recp_room = strdup(ri->roomlist); - recp->num_room = num_tokens(ri->roomlist, '|'); - recp->recptypes_magic = RECPTYPES_MAGIC; - - /* Construct a GUID to use in the S_USETABLE table. - * If one is not present in the item itself, make one up. - */ - if (ri->guid != NULL) { - snprintf(utmsgid, sizeof utmsgid, "rss/%s", ri->guid); - } - else { - MD5Init(&md5context); - if (ri->title != NULL) { - MD5Update(&md5context, ri->title, strlen(ri->title)); - } - if (ri->link != NULL) { - MD5Update(&md5context, ri->link, strlen(ri->link)); - } - MD5Final(rawdigest, &md5context); - for (i=0; icount --; + if (pRoomC->count == 0) + DeleteEntryFromHash(RSSQueueRooms, At); } - strcat(utmsgid, "_rss2ctdl"); } + DeleteHashPos(&At); +} - /* Find out if we've already seen this item */ - cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid)); - if (cdbut != NULL) { - /* Item has already been seen */ - CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid); - cdb_free(cdbut); +void UnlinkRooms(rss_aggregator *Cfg) +{ + DeleteRoomReference(Cfg->QRnumber); + if (Cfg->OtherQRnumbers != NULL) + { + long HKLen; + const char *HK; + HashPos *At; + void *vData; + + At = GetNewHashPos(Cfg->OtherQRnumbers, 0); + while (! server_shutting_down && + GetNextHashPos(Cfg->OtherQRnumbers, + At, + &HKLen, &HK, + &vData) && + (vData != NULL)) + { + long *lData = (long*) vData; + DeleteRoomReference(*lData); + } - /* rewrite the record anyway, to update the timestamp */ - strcpy(ut.ut_msgid, utmsgid); - ut.ut_timestamp = time(NULL); - cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) ); + DeleteHashPos(&At); } - else { - /* Item has not been seen, so save it. */ +} - if (ri->description == NULL) ri->description = strdup(""); - for (i=strlen(ri->description); i>=0; --i) { - if (isspace(ri->description[i])) { - ri->description[i] = ' '; - } - } +void UnlinkRSSAggregator(rss_aggregator *Cfg) +{ + HashPos *At; - msg = malloc(sizeof(struct CtdlMessage)); - memset(msg, 0, sizeof(struct CtdlMessage)); - msg->cm_magic = CTDLMESSAGE_MAGIC; - msg->cm_anon_type = MES_NORMAL; - msg->cm_format_type = FMT_RFC822; - msg->cm_fields['A'] = strdup("rss"); - msg->cm_fields['N'] = strdup(NODENAME); - msg->cm_fields['U'] = strdup(ri->title); - msg->cm_fields['T'] = malloc(64); - snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); - if (!IsEmptyStr(ri->channel_title)) { - msg->cm_fields['O'] = strdup(ri->channel_title); - } + UnlinkRooms(Cfg); - msglen = 1024 + strlen(ri->link) + strlen(ri->description) ; - msg->cm_fields['M'] = malloc(msglen); - snprintf(msg->cm_fields['M'], msglen, - "Content-type: text/html\r\n\r\n" - "\n" - "%s

\n" - "%s\n" - "\n" - , - ri->description, - ri->link, ri->link - ); - - CtdlSubmitMsg(msg, recp, NULL); - CtdlFreeMessage(msg); - - /* write the uidl to the use table so we don't store this item again */ - strcpy(ut.ut_msgid, utmsgid); - ut.ut_timestamp = time(NULL); - cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) ); + At = GetNewHashPos(RSSFetchUrls, 0); + if (GetHashPosFromKey(RSSFetchUrls, SKEY(Cfg->Url), At)) + { + DeleteEntryFromHash(RSSFetchUrls, At); } - free_recipients(recp); + DeleteHashPos(&At); + last_run = time(NULL); } +void FreeNetworkSaveMessage (void *vMsg) +{ + networker_save_message *Msg = (networker_save_message *) vMsg; + CtdlFreeMessageContents(&Msg->Msg); + FreeStrBuf(&Msg->Message); + FreeStrBuf(&Msg->MsgGUID); + free(Msg); +} -/* - * Convert an RDF/RSS datestamp into a time_t - */ -time_t rdf_parsedate(char *p) +eNextState AbortNetworkSaveMessage (AsyncIO *IO) { - struct tm tm; - time_t t = 0; - - if (!p) return 0L; - if (strlen(p) < 10) return 0L; + return eAbort; ///TODO +} - memset(&tm, 0, sizeof tm); +eNextState RSSSaveMessage(AsyncIO *IO) +{ + long len; + const char *Key; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; - /* YYYY-MM-DDTHH:MM format... - */ - if ( (p[4] == '-') && (p[7] == '-') ) { - tm.tm_year = atoi(&p[0]) - 1900; - tm.tm_mon = atoi(&p[5]) - 1; - tm.tm_mday = atoi(&p[8]); - if ( (p[10] == 'T') && (p[13] == ':') ) { - tm.tm_hour = atoi(&p[11]); - tm.tm_min = atoi(&p[14]); - } - return mktime(&tm); - } + Ctx->ThisMsg->Msg.cm_fields['M'] = SmashStrBuf(&Ctx->ThisMsg->Message); - /* hmm... try RFC822 date stamp format */ + CtdlSubmitMsg(&Ctx->ThisMsg->Msg, &Ctx->recp, NULL, 0); - t = parsedate(p); - if (t > 0) return(t); + /* write the uidl to the use table so we don't store this item again */ + cdb_store(CDB_USETABLE, + SKEY(Ctx->ThisMsg->MsgGUID), + &Ctx->ThisMsg->ut, + sizeof(struct UseTable) ); - /* yeesh. ok, just return the current date and time. */ - return(time(NULL)); + if (GetNextHashPos(Ctx->Messages, + Ctx->Pos, + &len, &Key, + (void**) &Ctx->ThisMsg)) + return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry); + else + return eAbort; } +eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) +{ + const char *Key; + long len; + struct cdbdata *cdbut; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; + /* Find out if we've already seen this item */ + strcpy(Ctx->ThisMsg->ut.ut_msgid, + ChrPtr(Ctx->ThisMsg->MsgGUID)); /// TODO + Ctx->ThisMsg->ut.ut_timestamp = time(NULL); -void rss_xml_start(void *data, const char *supplied_el, const char **attr) { - struct rss_item *ri = (struct rss_item *) data; - char el[256]; - char *sep = NULL; + cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->ThisMsg->MsgGUID)); +#ifndef DEBUG_RSS + if (cdbut != NULL) { + /* Item has already been seen */ + EV_syslog(LOG_DEBUG, + "%s has already been seen\n", + ChrPtr(Ctx->ThisMsg->MsgGUID)); + cdb_free(cdbut); - /* Axe the namespace, we don't care about it */ - safestrncpy(el, supplied_el, sizeof el); - while (sep = strchr(el, ':'), sep) { - strcpy(el, ++sep); + /* rewrite the record anyway, to update the timestamp */ + cdb_store(CDB_USETABLE, + SKEY(Ctx->ThisMsg->MsgGUID), + &Ctx->ThisMsg->ut, sizeof(struct UseTable) ); + + if (GetNextHashPos(Ctx->Messages, + Ctx->Pos, + &len, &Key, + (void**) &Ctx->ThisMsg)) + return NextDBOperation( + IO, + RSS_FetchNetworkUsetableEntry); + else + return eAbort; } - - if (!strcasecmp(el, "item")) { - ++ri->item_tag_nesting; - - /* Initialize the feed item data structure */ - if (ri->guid != NULL) free(ri->guid); - ri->guid = NULL; - if (ri->title != NULL) free(ri->title); - ri->title = NULL; - if (ri->link != NULL) free(ri->link); - ri->link = NULL; - if (ri->description != NULL) free(ri->description); - ri->description = NULL; - - /* Throw away any existing character data */ - if (ri->chardata_len > 0) { - free(ri->chardata); - ri->chardata = 0; - ri->chardata_len = 0; - } + else +#endif + { + NextDBOperation(IO, RSSSaveMessage); + return eSendMore; } +} +/* + * Commit a fetched and parsed RSS item to disk + */ +void rss_save_item(rss_item *ri, rss_aggregator *Cfg) +{ + networker_save_message *SaveMsg; + struct MD5Context md5context; + u_char rawdigest[MD5_DIGEST_LEN]; + int msglen = 0; + StrBuf *Message; + StrBuf *guid; + AsyncIO *IO = &Cfg->IO; + int n; -} - -void rss_xml_end(void *data, const char *supplied_el) { - struct rss_item *ri = (struct rss_item *) data; - char el[256]; - char *sep = NULL; + SaveMsg = (networker_save_message *) malloc( + sizeof(networker_save_message)); + memset(SaveMsg, 0, sizeof(networker_save_message)); - /* Axe the namespace, we don't care about it */ - safestrncpy(el, supplied_el, sizeof el); - while (sep = strchr(el, ':'), sep) { - strcpy(el, ++sep); + /* Construct a GUID to use in the S_USETABLE table. + * If one is not present in the item itself, make one up. + */ + if (ri->guid != NULL) { + StrBufSpaceToBlank(ri->guid); + StrBufTrim(ri->guid); + guid = NewStrBufPlain(HKEY("rss/")); + StrBufAppendBuf(guid, ri->guid, 0); } - - if ( (!strcasecmp(el, "title")) && (ri->item_tag_nesting == 0) && (ri->chardata != NULL) ) { - safestrncpy(ri->channel_title, ri->chardata, sizeof ri->channel_title); - striplt(ri->channel_title); + else { + MD5Init(&md5context); + if (ri->title != NULL) { + MD5Update(&md5context, + (const unsigned char*)SKEY(ri->title)); + } + if (ri->link != NULL) { + MD5Update(&md5context, + (const unsigned char*)SKEY(ri->link)); + } + MD5Final(rawdigest, &md5context); + guid = NewStrBufPlain(NULL, + MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); + StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); + StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0); } - if ( (!strcasecmp(el, "guid")) && (ri->chardata != NULL) ) { - if (ri->guid != NULL) free(ri->guid); - striplt(ri->chardata); - ri->guid = strdup(ri->chardata); - } + /* translate Item into message. */ + EVM_syslog(LOG_DEBUG, "RSS: translating item...\n"); + if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); + StrBufSpaceToBlank(ri->description); + SaveMsg->Msg.cm_magic = CTDLMESSAGE_MAGIC; + SaveMsg->Msg.cm_anon_type = MES_NORMAL; + SaveMsg->Msg.cm_format_type = FMT_RFC822; - if ( (!strcasecmp(el, "title")) && (ri->chardata != NULL) ) { - if (ri->title != NULL) free(ri->title); - striplt(ri->chardata); - ri->title = strdup(ri->chardata); - } + if (ri->guid != NULL) { + SaveMsg->Msg.cm_fields['E'] = strdup(ChrPtr(ri->guid)); + } + + if (ri->author_or_creator != NULL) { + char *From; + StrBuf *Encoded = NULL; + int FromAt; + + From = html_to_ascii(ChrPtr(ri->author_or_creator), + StrLength(ri->author_or_creator), + 512, 0); + StrBufPlain(ri->author_or_creator, From, -1); + StrBufTrim(ri->author_or_creator); + free(From); + + FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL; + if (!FromAt && StrLength (ri->author_email) > 0) + { + StrBufRFC2047encode(&Encoded, ri->author_or_creator); + SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded); + SaveMsg->Msg.cm_fields['P'] = + SmashStrBuf(&ri->author_email); + } + else + { + if (FromAt) + { + SaveMsg->Msg.cm_fields['A'] = + SmashStrBuf(&ri->author_or_creator); + SaveMsg->Msg.cm_fields['P'] = + strdup(SaveMsg->Msg.cm_fields['A']); + } + else + { + StrBufRFC2047encode(&Encoded, + ri->author_or_creator); + SaveMsg->Msg.cm_fields['A'] = + SmashStrBuf(&Encoded); + SaveMsg->Msg.cm_fields['P'] = + strdup("rss@localhost"); - if ( (!strcasecmp(el, "link")) && (ri->chardata != NULL) ) { - if (ri->link != NULL) free(ri->link); - striplt(ri->chardata); - ri->link = strdup(ri->chardata); + } + if (ri->pubdate <= 0) { + ri->pubdate = time(NULL); + } + } } + else { + SaveMsg->Msg.cm_fields['A'] = strdup("rss"); + } + + SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME); + if (ri->title != NULL) { + long len; + char *Sbj; + StrBuf *Encoded, *QPEncoded; + + QPEncoded = NULL; + StrBufSpaceToBlank(ri->title); + len = StrLength(ri->title); + Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); + len = strlen(Sbj); + if (Sbj[len - 1] == '\n') + { + len --; + Sbj[len] = '\0'; + } + Encoded = NewStrBufPlain(Sbj, len); + free(Sbj); - if ( (!strcasecmp(el, "description")) && (ri->chardata != NULL) ) { - if (ri->description != NULL) free(ri->description); - ri->description = strdup(ri->chardata); - } + StrBufTrim(Encoded); + StrBufRFC2047encode(&QPEncoded, Encoded); - if ( ((!strcasecmp(el, "pubdate")) || (!strcasecmp(el, "date"))) && (ri->chardata != NULL) ) { - striplt(ri->chardata); - ri->pubdate = rdf_parsedate(ri->chardata); + SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded); + FreeStrBuf(&Encoded); } - - if (!strcasecmp(el, "item")) { - --ri->item_tag_nesting; - rss_save_item(ri); + SaveMsg->Msg.cm_fields['T'] = malloc(64); + snprintf(SaveMsg->Msg.cm_fields['T'], 64, "%ld", ri->pubdate); + if (ri->channel_title != NULL) { + if (StrLength(ri->channel_title) > 0) { + SaveMsg->Msg.cm_fields['O'] = + strdup(ChrPtr(ri->channel_title)); + } } + if (ri->link == NULL) + ri->link = NewStrBufPlain(HKEY("")); - if ( (!strcasecmp(el, "rss")) || (!strcasecmp(el, "rdf")) ) { - CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); - ri->done_parsing = 1; - } +#if 0 /* temporarily disable shorter urls. */ + SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] = + GetShorterUrls(ri->description); +#endif - if (ri->chardata_len > 0) { - free(ri->chardata); - ri->chardata = 0; - ri->chardata_len = 0; - } + msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; -} + Message = NewStrBufPlain(NULL, StrLength(ri->description)); + + StrBufPlain(Message, HKEY( + "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" + "\n")); +#if 0 /* disable shorter url for now. */ + SaveMsg->Msg.cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message); +#endif + StrBufAppendBuf(Message, ri->description, 0); + StrBufAppendBufPlain(Message, HKEY("

\n"), 0); + AppendLink(Message, ri->link, ri->linkTitle, NULL); + AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this"); + StrBufAppendBufPlain(Message, HKEY("\n"), 0); -/* - * This callback stores up the data which appears in between tags. - */ -void rss_xml_chardata(void *data, const XML_Char *s, int len) { - struct rss_item *ri = (struct rss_item *) data; - int old_len; - int new_len; - char *new_buffer; - - old_len = ri->chardata_len; - new_len = old_len + len; - new_buffer = realloc(ri->chardata, new_len + 1); - if (new_buffer != NULL) { - memcpy(&new_buffer[old_len], s, len); - new_buffer[new_len] = 0; - ri->chardata = new_buffer; - ri->chardata_len = new_len; - } + SaveMsg->MsgGUID = guid; + SaveMsg->Message = Message; + + n = GetCount(Cfg->Messages) + 1; + Put(Cfg->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage); } -/* - * Parse a URL into host, port number, and resource identifier. +/* + * Begin a feed parse */ -int parse_url(char *url, char *hostname, int *port, char *identifier) +int rss_do_fetching(rss_aggregator *Cfg) { - char protocol[1024]; - char scratch[1024]; - char *ptr = NULL; - char *nptr = NULL; - - strcpy(scratch, url); - ptr = (char *)strchr(scratch, ':'); - if (!ptr) { - return(1); /* no protocol specified */ - } + rss_item *ri; + time_t now; - strcpy(ptr, ""); - strcpy(protocol, scratch); - if (strcmp(protocol, "http")) { - return(2); /* not HTTP */ - } + now = time(NULL); - strcpy(scratch, url); - ptr = (char *) strstr(scratch, "//"); - if (!ptr) { - return(3); /* no server specified */ - } - ptr += 2; + if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) + return 0; - strcpy(hostname, ptr); - nptr = (char *)strchr(ptr, ':'); - if (!nptr) { - *port = 80; /* default */ - nptr = (char *)strchr(hostname, '/'); - } - else { - sscanf(nptr, ":%d", port); - nptr = (char *)strchr(hostname, ':'); - } + ri = (rss_item*) malloc(sizeof(rss_item)); + memset(ri, 0, sizeof(rss_item)); + Cfg->Item = ri; - if (nptr) { - *nptr = '\0'; + if (! InitcURLIOStruct(&Cfg->IO, + Cfg, + "Citadel RSS Client", + RSSAggregator_ParseReply, + RSSAggregator_Terminate, + RSSAggregator_ShutdownAbort)) + { + syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); + return 0; } - nptr = (char *)strchr(ptr, '/'); - - if (!nptr) { - return(4); /* no url specified */ - } - - strcpy(identifier, nptr); - return(0); + safestrncpy(((CitContext*)Cfg->IO.CitContext)->cs_host, + ChrPtr(Cfg->Url), + sizeof(((CitContext*)Cfg->IO.CitContext)->cs_host)); + + syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url)); + ParseURL(&Cfg->IO.ConnectMe, Cfg->Url, 80); + CurlPrepareURL(Cfg->IO.ConnectMe); + + QueueCurlContext(&Cfg->IO); + return 1; } -/* - * Begin a feed parse - */ -void rss_do_fetching(char *url, char *rooms) { - char buf[1024]; - char rsshost[1024]; - int rssport = 80; - char rssurl[1024]; - struct rss_item ri; - XML_Parser xp; - int sock = (-1); - int got_bytes = (-1); - int redirect_count = 0; - - /* Parse the URL */ - if (parse_url(url, rsshost, &rssport, rssurl) != 0) { - CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", url); - } - - if (CtdlThreadCheckStop()) - return; +void DeleteRssCfg(void *vptr) +{ + rss_aggregator *rncptr = (rss_aggregator *)vptr; + AsyncIO *IO = &rncptr->IO; + EVM_syslog(LOG_DEBUG, "RSS: destroying\n"); - xp = XML_ParserCreateNS("UTF-8", ':'); - if (!xp) { - CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n"); - return; - } + FreeStrBuf(&rncptr->Url); + FreeStrBuf(&rncptr->rooms); + FreeStrBuf(&rncptr->CData); + FreeStrBuf(&rncptr->Key); + FreeStrBuf(&rncptr->IO.HttpReq.ReplyData); + DeleteHash(&rncptr->OtherQRnumbers); + FreeURL(&rncptr->IO.ConnectMe); + + DeleteHashPos (&rncptr->Pos); + DeleteHash (&rncptr->Messages); + if (rncptr->recp.recp_room != NULL) + free(rncptr->recp.recp_room); - memset(&ri, 0, sizeof(struct rss_item)); - ri.roomlist = rooms; - XML_SetElementHandler(xp, rss_xml_start, rss_xml_end); - XML_SetCharacterDataHandler(xp, rss_xml_chardata); - XML_SetUserData(xp, &ri); - if (CtdlThreadCheckStop()) + if (rncptr->Item != NULL) { - XML_ParserFree(xp); - return; - } - -retry: CtdlLogPrintf(CTDL_NOTICE, "Connecting to <%s>\n", rsshost); - sprintf(buf, "%d", rssport); - sock = sock_connect(rsshost, buf, "tcp"); - if (sock >= 0) { - CtdlLogPrintf(CTDL_DEBUG, "Connected!\n"); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - snprintf(buf, sizeof buf, "GET %s HTTP/1.0", rssurl); - CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - snprintf(buf, sizeof buf, "Host: %s", rsshost); - CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - snprintf(buf, sizeof buf, "User-Agent: %s", CITADEL); - CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - snprintf(buf, sizeof buf, "Accept: */*"); - CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - sock_puts(sock, ""); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - if (sock_getln(sock, buf, sizeof buf) >= 0) { - CtdlLogPrintf(CTDL_DEBUG, ">%s\n", buf); - remove_token(buf, 0, ' '); - - /* 200 OK */ - if (buf[0] == '2') { - - while (got_bytes = sock_getln(sock, buf, sizeof buf), - (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) { - if (CtdlThreadCheckStop()) - goto shutdown ; - /* discard headers */ - } - - while (got_bytes = sock_read(sock, buf, sizeof buf, 0), - ((got_bytes>=0) && (ri.done_parsing == 0)) ) { - if (CtdlThreadCheckStop()) - goto shutdown ; - XML_Parse(xp, buf, got_bytes, 0); - } - if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1); - } + FreeStrBuf(&rncptr->Item->guid); + FreeStrBuf(&rncptr->Item->title); + FreeStrBuf(&rncptr->Item->link); + FreeStrBuf(&rncptr->Item->linkTitle); + FreeStrBuf(&rncptr->Item->reLink); + FreeStrBuf(&rncptr->Item->reLinkTitle); + FreeStrBuf(&rncptr->Item->description); + FreeStrBuf(&rncptr->Item->channel_title); + FreeStrBuf(&rncptr->Item->author_or_creator); + FreeStrBuf(&rncptr->Item->author_url); + FreeStrBuf(&rncptr->Item->author_email); + + free(rncptr->Item); + } + free(rncptr); +} - /* 30X redirect */ - else if ( (!strncmp(buf, "30", 2)) && (redirect_count < 16) ) { - while (got_bytes = sock_getln(sock, buf, sizeof buf), - (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) { - if (CtdlThreadCheckStop()) - goto shutdown ; - if (!strncasecmp(buf, "Location:", 9)) { - ++redirect_count; - strcpy(buf, &buf[9]); - striplt(buf); - if (parse_url(buf, rsshost, &rssport, rssurl) == 0) { - sock_close(sock); - goto retry; - } - else { - CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", buf); - } - } - } - } +eNextState RSSAggregator_Terminate(AsyncIO *IO) +{ + rss_aggregator *rncptr = (rss_aggregator *)IO->Data; - } -shutdown: - sock_close(sock); - } - else { - CtdlLogPrintf(CTDL_ERR, "Could not connect: %s\n", strerror(errno)); - } + EVM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); - XML_ParserFree(xp); - - /* Free the feed item data structure */ - if (ri.guid != NULL) free(ri.guid); - ri.guid = NULL; - if (ri.title != NULL) free(ri.title); - ri.title = NULL; - if (ri.link != NULL) free(ri.link); - ri.link = NULL; - if (ri.description != NULL) free(ri.description); - ri.description = NULL; - if (ri.chardata_len > 0) { - free(ri.chardata); - ri.chardata = 0; - ri.chardata_len = 0; - } + + UnlinkRSSAggregator(rncptr); + return eAbort; } +eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO) +{ + const char *pUrl; + rss_aggregator *rncptr = (rss_aggregator *)IO->Data; + + pUrl = IO->ConnectMe->PlainUrl; + if (pUrl == NULL) + pUrl = ""; + + EV_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); + UnlinkRSSAggregator(rncptr); + return eAbort; +} + /* * Scan a room's netconfig to determine whether it is requesting any RSS feeds */ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) { + StrBuf *CfgData=NULL; + StrBuf *CfgType; + StrBuf *Line; + rss_room_counter *Count = NULL; + struct stat statbuf; char filename[PATH_MAX]; - char buf[1024]; - char instr[32]; - FILE *fp; - char feedurl[256]; - struct rssnetcfg *rncptr = NULL; - struct rssnetcfg *use_this_rncptr = NULL; - int len = 0; - char *ptr = NULL; + int fd; + int Done; + rss_aggregator *rncptr = NULL; + rss_aggregator *use_this_rncptr = NULL; + void *vptr; + const char *CfgPtr, *lPtr; + const char *Err; + + pthread_mutex_lock(&RSSQueueMutex); + if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr)) + { + syslog(LOG_DEBUG, + "rssclient: [%ld] %s already in progress.\n", + qrbuf->QRnumber, + qrbuf->QRname); + pthread_mutex_unlock(&RSSQueueMutex); + return; + } + pthread_mutex_unlock(&RSSQueueMutex); assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); - if (CtdlThreadCheckStop()) + if (server_shutting_down) return; - + /* Only do net processing for rooms that have netconfigs */ - fp = fopen(filename, "r"); - if (fp == NULL) { + fd = open(filename, 0); + if (fd <= 0) { + /* syslog(LOG_DEBUG, + "rssclient: %s no config.\n", + qrbuf->QRname); */ return; } - while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) { - buf[strlen(buf)-1] = 0; - - extract_token(instr, buf, 0, '|', sizeof instr); - if (!strcasecmp(instr, "rssclient")) { + if (server_shutting_down) + return; - use_this_rncptr = NULL; + if (fstat(fd, &statbuf) == -1) { + syslog(LOG_DEBUG, + "ERROR: could not stat configfile '%s' - %s\n", + filename, + strerror(errno)); + return; + } - extract_token(feedurl, buf, 1, '|', sizeof feedurl); + if (server_shutting_down) + return; - /* If any other rooms have requested the same feed, then we will just add this - * room to the target list for that client request. - */ - for (rncptr=rnclist; rncptr!=NULL; rncptr=rncptr->next) { - if (!strcmp(rncptr->url, feedurl)) { - use_this_rncptr = rncptr; - } - } + CfgData = NewStrBufPlain(NULL, statbuf.st_size + 1); - /* Otherwise create a new client request */ - if (use_this_rncptr == NULL) { - rncptr = (struct rssnetcfg *) malloc(sizeof(struct rssnetcfg)); - if (rncptr != NULL) { - rncptr->next = rnclist; - safestrncpy(rncptr->url, feedurl, sizeof rncptr->url); - rncptr->rooms = NULL; - rnclist = rncptr; - use_this_rncptr = rncptr; - } - } + if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) { + close(fd); + FreeStrBuf(&CfgData); + syslog(LOG_DEBUG, "ERROR: reading config '%s' - %s
\n", + filename, strerror(errno)); + return; + } + close(fd); + if (server_shutting_down) + return; - /* Add the room name to the request */ - if (use_this_rncptr != NULL) { - if (use_this_rncptr->rooms == NULL) { - rncptr->rooms = strdup(qrbuf->QRname); - } - else { - len = strlen(use_this_rncptr->rooms) + strlen(qrbuf->QRname) + 5; - ptr = realloc(use_this_rncptr->rooms, len); - if (ptr != NULL) { - strcat(ptr, "|"); - strcat(ptr, qrbuf->QRname); - use_this_rncptr->rooms = ptr; - } - } - } + CfgPtr = NULL; + CfgType = NewStrBuf(); + Line = NewStrBufPlain(NULL, StrLength(CfgData)); + Done = 0; + while (!Done) + { + Done = StrBufSipLine(Line, CfgData, &CfgPtr) == 0; + if (StrLength(Line) > 0) + { + lPtr = NULL; + StrBufExtract_NextToken(CfgType, Line, &lPtr, '|'); + if (!strcasecmp("rssclient", ChrPtr(CfgType))) + { + if (Count == NULL) + { + Count = malloc(sizeof(rss_room_counter)); + Count->count = 0; + } + Count->count ++; + rncptr = (rss_aggregator *) malloc(sizeof(rss_aggregator)); + memset (rncptr, 0, sizeof(rss_aggregator)); + rncptr->roomlist_parts = 1; + rncptr->Url = NewStrBuf(); + StrBufExtract_NextToken(rncptr->Url, Line, &lPtr, '|'); + + pthread_mutex_lock(&RSSQueueMutex); + GetHash(RSSFetchUrls, SKEY(rncptr->Url), &vptr); + use_this_rncptr = (rss_aggregator *)vptr; + if (use_this_rncptr != NULL) + { + long *QRnumber; + StrBufAppendBufPlain(use_this_rncptr->rooms, + qrbuf->QRname, + -1, 0); + if (use_this_rncptr->roomlist_parts == 1) + { + use_this_rncptr->OtherQRnumbers = + NewHash(1, lFlathash); + } + QRnumber = (long*)malloc(sizeof(long)); + *QRnumber = qrbuf->QRnumber; + Put(use_this_rncptr->OtherQRnumbers, + LKEY(qrbuf->QRnumber), + QRnumber, + NULL); + use_this_rncptr->roomlist_parts++; + + pthread_mutex_unlock(&RSSQueueMutex); + + FreeStrBuf(&rncptr->Url); + free(rncptr); + rncptr = NULL; + continue; + } + pthread_mutex_unlock(&RSSQueueMutex); + + rncptr->ItemType = RSS_UNSET; + + rncptr->rooms = NewStrBufPlain(qrbuf->QRname, -1); + + pthread_mutex_lock(&RSSQueueMutex); + Put(RSSFetchUrls, SKEY(rncptr->Url), rncptr, DeleteRssCfg); + pthread_mutex_unlock(&RSSQueueMutex); } - + } } - - fclose(fp); - + if (Count != NULL) + { + Count->QRnumber = qrbuf->QRnumber; + pthread_mutex_lock(&RSSQueueMutex); + syslog(LOG_DEBUG, "rssclient: [%ld] %s now starting.\n", + qrbuf->QRnumber, qrbuf->QRname); + Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL); + pthread_mutex_unlock(&RSSQueueMutex); + } + FreeStrBuf(&CfgData); + FreeStrBuf(&CfgType); + FreeStrBuf(&Line); } /* * Scan for rooms that have RSS client requests configured */ -void *rssclient_scan(void *args) { - static time_t last_run = 0L; +void rssclient_scan(void) { static int doing_rssclient = 0; - struct rssnetcfg *rptr = NULL; - struct CitContext rssclientCC; - - /* Give this thread its own private CitContext */ - memset(&rssclientCC, 0, sizeof(struct CitContext)); - rssclientCC.internal_pgm = 1; - rssclientCC.cs_pid = 0; - pthread_setspecific(MyConKey, (void *)&rssclientCC ); - - CtdlThreadAllocTSD(); + rss_aggregator *rptr = NULL; + void *vrptr = NULL; + HashPos *it; + long len; + const char *Key; + + /* Run no more than once every 15 minutes. */ + if ((time(NULL) - last_run) < 900) { + return; + } /* - * This is a simple concurrency check to make sure only one rssclient run - * is done at a time. We could do this with a mutex, but since we + * This is a simple concurrency check to make sure only one rssclient + * run is done at a time. We could do this with a mutex, but since we * don't really require extremely fine granularity here, we'll do it * with a static variable instead. */ - if (doing_rssclient) return NULL; + if (doing_rssclient) return; doing_rssclient = 1; + if ((GetCount(RSSQueueRooms) > 0) || (GetCount(RSSFetchUrls) > 0)) + return; + + become_session(&rss_CC); + syslog(LOG_DEBUG, "rssclient started\n"); + CtdlForEachRoom(rssclient_scan_room, NULL); - CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n"); - ForEachRoom(rssclient_scan_room, NULL); + pthread_mutex_lock(&RSSQueueMutex); - while (rnclist != NULL && !CtdlThreadCheckStop()) { - rss_do_fetching(rnclist->url, rnclist->rooms); - rptr = rnclist; - rnclist = rnclist->next; - if (rptr->rooms != NULL) free(rptr->rooms); - free(rptr); + it = GetNewHashPos(RSSFetchUrls, 0); + while (!server_shutting_down && + GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && + (vrptr != NULL)) { + rptr = (rss_aggregator *)vrptr; + if (!rss_do_fetching(rptr)) + UnlinkRSSAggregator(rptr); } + DeleteHashPos(&it); + pthread_mutex_unlock(&RSSQueueMutex); - CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n"); - last_run = time(NULL); + syslog(LOG_DEBUG, "rssclient ended\n"); doing_rssclient = 0; - if (!CtdlThreadCheckStop()) - CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq); - else - CtdlLogPrintf(CTDL_DEBUG, "rssclient: Task STOPPED.\n"); - return NULL; + return; +} + +void rss_cleanup(void) +{ + /* citthread_mutex_destroy(&RSSQueueMutex); TODO */ + DeleteHash(&RSSFetchUrls); + DeleteHash(&RSSQueueRooms); } @@ -671,8 +734,13 @@ CTDL_MODULE_INIT(rssclient) { if (threading) { - CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, 0); - } - /* return our Subversion id for the Log */ - return "$Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $"; + CtdlFillSystemContext(&rss_CC, "rssclient"); + pthread_mutex_init(&RSSQueueMutex, NULL); + RSSQueueRooms = NewHash(1, lFlathash); + RSSFetchUrls = NewHash(1, NULL); + syslog(LOG_INFO, "%s\n", curl_version()); + CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); + CtdlRegisterCleanupHook(rss_cleanup); + } + return "rssclient"; }