From: Art Cancro Date: Sun, 26 Feb 2017 01:58:22 +0000 (-0500) Subject: Parser fix X-Git-Tag: v939~634 X-Git-Url: https://code.citadel.org/?p=citadel.git;a=commitdiff_plain;h=629b5ca03065aad3bf112d7e2d3f09c61997c656 Parser fix --- diff --git a/citadel/modules/rssclient/rss_atom_parser.c b/citadel/modules/rssclient/rss_atom_parser.c deleted file mode 100644 index 7bdb94baf..000000000 --- a/citadel/modules/rssclient/rss_atom_parser.c +++ /dev/null @@ -1,1057 +0,0 @@ -/* - * Bring external RSS feeds into rooms. - * - * Copyright (c) 2007-2015 by the citadel.org team - * - * This program is open source software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 3. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include - -#if TIME_WITH_SYS_TIME -# include -# include -#else -# if HAVE_SYS_TIME_H -# include -# else -# include -# endif -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include "citadel.h" -#include "server.h" -#include "citserver.h" -#include "support.h" -#include "config.h" -#include "threads.h" -#include "ctdl_module.h" -#include "clientsocket.h" -#include "msgbase.h" -#include "parsedate.h" -#include "database.h" -#include "citadel_dirs.h" -#include "md5.h" -#include "context.h" -#include "event_client.h" -#include "rss_atom_parser.h" - -void rss_remember_item(rss_item *ri, rss_aggregator *Cfg); - -int RSSAtomParserDebugEnabled = 0; - -#define N ((rss_aggregator*)IO->Data)->Cfg.QRnumber - -/* - * Convert an RDF/RSS datestamp into a time_t - */ -time_t rdf_parsedate(const char *p) -{ - struct tm tm; - time_t t = 0; - - if (!p) return 0L; - if (strlen(p) < 10) return 0L; - - memset(&tm, 0, sizeof tm); - - /* - * If the timestamp appears to be in W3C datetime format, try to - * parse it. See also: http://www.w3.org/TR/NOTE-datetime - * - * This code, along with parsedate.c, is a potential candidate for - * moving into libcitadel. - */ - if ( (p[4] == '-') && (p[7] == '-') ) { - tm.tm_year = atoi(&p[0]) - 1900; - tm.tm_mon = atoi(&p[5]) - 1; - tm.tm_mday = atoi(&p[8]); - if ( (p[10] == 'T') && (p[13] == ':') ) { - tm.tm_hour = atoi(&p[11]); - tm.tm_min = atoi(&p[14]); - } - return mktime(&tm); - } - - /* hmm... try RFC822 date stamp format */ - - t = parsedate(p); - if (t > 0) return(t); - - /* yeesh. ok, just return the current date and time. */ - return(time(NULL)); -} - -void flush_rss_item(rss_item *ri) -{ - /* Initialize the feed item data structure */ - FreeStrBuf(&ri->guid); - FreeStrBuf(&ri->title); - FreeStrBuf(&ri->link); - FreeStrBuf(&ri->author_or_creator); - FreeStrBuf(&ri->author_email); - FreeStrBuf(&ri->author_url); - FreeStrBuf(&ri->description); - - FreeStrBuf(&ri->linkTitle); - FreeStrBuf(&ri->reLink); - FreeStrBuf(&ri->reLinkTitle); - FreeStrBuf(&ri->channel_title); -} - - -/****************************************************************************** - * XML-Handler * - ******************************************************************************/ - - -void RSS_item_rss_start (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - syslog(LOG_DEBUG, "RSS: This is an RSS feed."); - RSSAggr->ItemType = RSS_RSS; -} - -void RSS_item_rdf_start(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - syslog(LOG_DEBUG, "RSS: This is an RDF feed."); - RSSAggr->ItemType = RSS_RSS; -} - -void ATOM_item_feed_start(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - syslog(LOG_DEBUG, "RSS: This is an ATOM feed."); - RSSAggr->ItemType = RSS_ATOM; -} - - -void RSS_item_item_start(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - ri->item_tag_nesting ++; - flush_rss_item(ri); -} - -void ATOM_item_entry_start(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ -/* Atom feed... */ - ri->item_tag_nesting ++; - flush_rss_item(ri); -} - -void ATOM_item_link_start (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - int i; - const char *pHref = NULL; - const char *pType = NULL; - const char *pRel = NULL; - const char *pTitle = NULL; - - for (i = 0; Attr[i] != NULL; i+=2) - { - if (!strcmp(Attr[i], "href")) - { - pHref = Attr[i+1]; - } - else if (!strcmp(Attr[i], "rel")) - { - pRel = Attr[i+1]; - } - else if (!strcmp(Attr[i], "type")) - { - pType = Attr[i+1]; - } - else if (!strcmp(Attr[i], "title")) - { - pTitle = Attr[i+1]; - } - } - if (pHref == NULL) - return; /* WHUT? Pointing... where? */ - if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml")) - return; - /* these just point to other rss resources, - we're not interested in them. */ - if (pRel != NULL) - { - if (!strcasecmp (pRel, "replies")) - { - NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1); - StrBufTrim(ri->link); - NewStrBufDupAppendFlush(&ri->reLinkTitle, - NULL, - pTitle, - -1); - } - else if (!strcasecmp(pRel, "alternate")) - { /* Alternative representation of this Item... */ - NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1); - StrBufTrim(ri->link); - NewStrBufDupAppendFlush(&ri->linkTitle, - NULL, - pTitle, - -1); - - } -#if 0 /* these are also defined, but dunno what to do with them.. */ - else if (!strcasecmp(pRel, "related")) - { - } - else if (!strcasecmp(pRel, "self")) - { - } - else if (!strcasecmp(pRel, "enclosure")) - {/*...reference can get big, and is probably the full article*/ - } - else if (!strcasecmp(pRel, "via")) - {/* this article was provided via... */ - } -#endif - } - else if (StrLength(ri->link) == 0) - { - NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1); - StrBufTrim(ri->link); - NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1); - } -} - - - - -void ATOMRSS_item_title_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) { - NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0); - StrBufTrim(ri->channel_title); - } -} - -void RSS_item_guid_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0); - } -} - -void ATOM_item_id_end(StrBuf *CData, - rss_item *ri, rss_aggregator *RSSAggr, const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0); - } -} - - -void RSS_item_link_end (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0); - StrBufTrim(ri->link); - } -} -void RSS_item_relink_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0); - StrBufTrim(ri->reLink); - } -} - -void RSSATOM_item_title_end (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0); - StrBufTrim(ri->title); - } -} - -void ATOM_item_content_end (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - long olen = StrLength (ri->description); - long clen = StrLength (CData); - if (clen > 0) - { - if (olen == 0) { - NewStrBufDupAppendFlush(&ri->description, - CData, - NULL, - 0); - StrBufTrim(ri->description); - } - else if (olen < clen) { - FlushStrBuf(ri->description); - NewStrBufDupAppendFlush(&ri->description, - CData, - NULL, - 0); - - StrBufTrim(ri->description); - } - } -} -void ATOM_item_summary_end (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - /* - * this can contain an abstract of the article. - * but we don't want to verwrite a full document if we already have it. - */ - if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0)) - { - NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); - StrBufTrim(ri->description); - } -} - -void RSS_item_description_end (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - long olen = StrLength (ri->description); - long clen = StrLength (CData); - if (clen > 0) - { - if (olen == 0) { - NewStrBufDupAppendFlush(&ri->description, - CData, - NULL, - 0); - StrBufTrim(ri->description); - } - else if (olen < clen) { - FlushStrBuf(ri->description); - NewStrBufDupAppendFlush(&ri->description, - CData, - NULL, - 0); - StrBufTrim(ri->description); - } - } -} - -void ATOM_item_published_end (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - StrBufTrim(CData); - ri->pubdate = rdf_parsedate(ChrPtr(CData)); - } -} - -void ATOM_item_updated_end (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - StrBufTrim(CData); - ri->pubdate = rdf_parsedate(ChrPtr(CData)); - } -} - -void RSS_item_pubdate_end (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - StrBufTrim(CData); - ri->pubdate = rdf_parsedate(ChrPtr(CData)); - } -} - - -void RSS_item_date_end (StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - StrBufTrim(CData); - ri->pubdate = rdf_parsedate(ChrPtr(CData)); - } -} - - - -void RSS_item_author_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); - StrBufTrim(ri->author_or_creator); - } -} - - -void ATOM_item_name_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); - StrBufTrim(ri->author_or_creator); - } -} - -void ATOM_item_email_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0); - StrBufTrim(ri->author_email); - } -} - -void RSS_item_creator_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if ((StrLength(CData) > 0) && - (StrLength(ri->author_or_creator) == 0)) - { - NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); - StrBufTrim(ri->author_or_creator); - } -} - - -void ATOM_item_uri_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0); - StrBufTrim(ri->author_url); - } -} - -void RSS_item_item_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - --ri->item_tag_nesting; - rss_remember_item(ri, RSSAggr); -} - - -void ATOM_item_entry_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - --ri->item_tag_nesting; - rss_remember_item(ri, RSSAggr); -} - -void RSS_item_rss_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - syslog(LOG_DEBUG, "End of feed detected. Closing parser."); - ri->done_parsing = 1; -} - -void RSS_item_rdf_end(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ - syslog(LOG_DEBUG, "End of feed detected. Closing parser."); - ri->done_parsing = 1; -} - - -void RSSATOM_item_ignore(StrBuf *CData, - rss_item *ri, - rss_aggregator *RSSAggr, - const char** Attr) -{ -} - - - -/* - * This callback stores up the data which appears in between tags. - */ -void rss_xml_cdata_start(void *data) -{ - rss_aggregator *RSSAggr = (rss_aggregator*) data; - - FlushStrBuf(RSSAggr->CData); -} - -void rss_xml_cdata_end(void *data) -{ -} -void rss_xml_chardata(void *data, const XML_Char *s, int len) -{ - rss_aggregator *RSSAggr = (rss_aggregator*) data; - - StrBufAppendBufPlain (RSSAggr->CData, s, len, 0); -} - - -/****************************************************************************** - * RSS parser logic * - ******************************************************************************/ - -extern pthread_mutex_t RSSQueueMutex; - -HashList *StartHandlers = NULL; -HashList *EndHandlers = NULL; -HashList *KnownNameSpaces = NULL; - -void FreeNetworkSaveMessage (void *vMsg) -{ - networker_save_message *Msg = (networker_save_message *) vMsg; - - CM_FreeContents(&Msg->Msg); - FreeStrBuf(&Msg->Message); - FreeStrBuf(&Msg->MsgGUID); - - FreeStrBuf(&Msg->author_email); - FreeStrBuf(&Msg->author_or_creator); - FreeStrBuf(&Msg->title); - FreeStrBuf(&Msg->description); - - FreeStrBuf(&Msg->link); - FreeStrBuf(&Msg->linkTitle); - - FreeStrBuf(&Msg->reLink); - FreeStrBuf(&Msg->reLinkTitle); - - free(Msg); -} - - -/* - * Commit a fetched and parsed RSS item to disk - */ -void rss_remember_item(rss_item *ri, rss_aggregator *RSSAggr) -{ - networker_save_message *SaveMsg; - struct MD5Context md5context; - u_char rawdigest[MD5_DIGEST_LEN]; - StrBuf *guid; - int n; - - SaveMsg = (networker_save_message *) malloc(sizeof(networker_save_message)); - memset(SaveMsg, 0, sizeof(networker_save_message)); - - /* Construct a GUID to use in the S_USETABLE table. - * If one is not present in the item itself, make one up. - */ - if (ri->guid != NULL) { - StrBufSpaceToBlank(ri->guid); - StrBufTrim(ri->guid); - guid = NewStrBufPlain(HKEY("rss/")); - StrBufAppendBuf(guid, ri->guid, 0); - } - else { - MD5Init(&md5context); - if (ri->title != NULL) { - MD5Update(&md5context, (const unsigned char*)SKEY(ri->title)); - } - if (ri->link != NULL) { - MD5Update(&md5context, (const unsigned char*)SKEY(ri->link)); - } - MD5Final(rawdigest, &md5context); - guid = NewStrBufPlain(NULL, MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); - StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); - StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0); - } - - /* translate Item into message. */ - syslog(LOG_DEBUG, "RSS: translating item..."); - if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); - StrBufSpaceToBlank(ri->description); - SaveMsg->Msg.cm_magic = CTDLMESSAGE_MAGIC; - SaveMsg->Msg.cm_anon_type = MES_NORMAL; - SaveMsg->Msg.cm_format_type = FMT_RFC822; - - /* gather the cheaply computed information now... */ - - if (ri->guid != NULL) { - CM_SetField(&SaveMsg->Msg, eExclusiveID, SKEY(ri->guid)); - } - - SaveMsg->MsgGUID = guid; - - if (ri->pubdate <= 0) { - ri->pubdate = time(NULL); - } - CM_SetFieldLONG(&SaveMsg->Msg, eTimestamp, ri->pubdate); - if (ri->channel_title != NULL) { - if (StrLength(ri->channel_title) > 0) { - CM_SetField(&SaveMsg->Msg, eOriginalRoom, SKEY(ri->channel_title)); - } - } - - /* remember the ones for defferred processing to save computing power after we know if we realy need it. */ - - SaveMsg->author_or_creator = ri->author_or_creator; - ri->author_or_creator = NULL; - - SaveMsg->author_email = ri->author_email; - ri->author_email = NULL; - - SaveMsg->title = ri->title; - ri->title = NULL; - - SaveMsg->link = ri->link; - ri->link = NULL; - - SaveMsg->description = ri->description; - ri->description = NULL; - - SaveMsg->linkTitle = ri->linkTitle; - ri->linkTitle = NULL; - - SaveMsg->reLink = ri->reLink; - ri->reLink = NULL; - - SaveMsg->reLinkTitle = ri->reLinkTitle; - ri->reLinkTitle = NULL; - - n = GetCount(RSSAggr->Messages) + 1; - Put(RSSAggr->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage); -} - - - -void rss_xml_start(void *data, const char *supplied_el, const char **attr) -{ - rss_xml_handler *h; - rss_aggregator *RSSAggr = (rss_aggregator*) data; - rss_item *ri = RSSAggr->Item; - void *pv; - const char *pel; - char *sep = NULL; - - /* Axe the namespace, we don't care about it */ - /* - syslog(LOG_DEBUG, - "RSS: supplied el %d: %s\n", RSSAggr->RSSAggr->ItemType, supplied_el); - */ - pel = supplied_el; - while (sep = strchr(pel, ':'), sep) { - pel = sep + 1; - } - - if (pel != supplied_el) - { - void *v; - - if (!GetHash(KnownNameSpaces, - supplied_el, - pel - supplied_el - 1, - &v)) - { - syslog(LOG_DEBUG, - "RSS: START ignoring " - "because of wrong namespace [%s]", - supplied_el - ); - return; - } - } - - StrBufPlain(RSSAggr->Key, pel, -1); - StrBufLowerCase(RSSAggr->Key); - if (GetHash(StartHandlers, SKEY(RSSAggr->Key), &pv)) - { - h = (rss_xml_handler*) pv; - - if (((h->Flags & RSS_UNSET) != 0) && - (RSSAggr->ItemType == RSS_UNSET)) - { - h->Handler(RSSAggr->CData, ri, RSSAggr, attr); - } - else if (((h->Flags & RSS_RSS) != 0) && - (RSSAggr->ItemType == RSS_RSS)) - { - h->Handler(RSSAggr->CData, ri, RSSAggr, attr); - } - else if (((h->Flags & RSS_ATOM) != 0) && - (RSSAggr->ItemType == RSS_ATOM)) - { - h->Handler(RSSAggr->CData, - ri, - RSSAggr, - attr); - } - else - syslog(LOG_DEBUG, - "RSS: START unhandled: [%s] [%s]...", - pel, - supplied_el - ); - } - else - syslog(LOG_DEBUG, - "RSS: START unhandled: [%s] [%s]...", - pel, - supplied_el - ); -} - -void rss_xml_end(void *data, const char *supplied_el) -{ - rss_xml_handler *h; - rss_aggregator *RSSAggr = (rss_aggregator*) data; - rss_item *ri = RSSAggr->Item; - const char *pel; - char *sep = NULL; - void *pv; - - /* Axe the namespace, we don't care about it */ - pel = supplied_el; - while (sep = strchr(pel, ':'), sep) { - pel = sep + 1; - } - syslog(LOG_DEBUG, "RSS: END %s...", supplied_el); - if (pel != supplied_el) - { - void *v; - - if (!GetHash(KnownNameSpaces, - supplied_el, - pel - supplied_el - 1, - &v)) - { - syslog(LOG_DEBUG, - "RSS: END ignoring because of wrong namespace" - "[%s] = [%s]", - supplied_el, - ChrPtr(RSSAggr->CData)); - FlushStrBuf(RSSAggr->CData); - return; - } - } - - StrBufPlain(RSSAggr->Key, pel, -1); - StrBufLowerCase(RSSAggr->Key); - if (GetHash(EndHandlers, SKEY(RSSAggr->Key), &pv)) - { - h = (rss_xml_handler*) pv; - - if (((h->Flags & RSS_UNSET) != 0) && - (RSSAggr->ItemType == RSS_UNSET)) - { - h->Handler(RSSAggr->CData, ri, RSSAggr, NULL); - } - else if (((h->Flags & RSS_RSS) != 0) && - (RSSAggr->ItemType == RSS_RSS)) - { - h->Handler(RSSAggr->CData, ri, RSSAggr, NULL); - } - else if (((h->Flags & RSS_ATOM) != 0) && - (RSSAggr->ItemType == RSS_ATOM)) - { - h->Handler(RSSAggr->CData, ri, RSSAggr, NULL); - } - else - syslog(LOG_DEBUG, - "RSS: END unhandled: [%s] [%s] = [%s]...", - pel, - supplied_el, - ChrPtr(RSSAggr->CData)); - } - else - syslog(LOG_DEBUG, - "RSS: END unhandled: [%s] [%s] = [%s]...", - pel, - supplied_el, - ChrPtr(RSSAggr->CData)); - FlushStrBuf(RSSAggr->CData); -} - - - -eNextState RSSAggregator_ParseReply(AsyncIO *IO) -{ - StrBuf *Buf; - rss_aggregator *RSSAggr; - rss_item *ri; - const char *at; - char *ptr; - long len; - const char *Key; - - RSSAggr = IO->Data; - ri = RSSAggr->Item; - RSSAggr->CData = NewStrBufPlain(NULL, SIZ); - RSSAggr->Key = NewStrBuf(); - at = NULL; - StrBufSipLine(RSSAggr->Key, IO->HttpReq.ReplyData, &at); - ptr = NULL; - -#define encoding "encoding=\"" - ptr = strstr(ChrPtr(RSSAggr->Key), encoding); - if (ptr != NULL) - { - char *pche; - - ptr += sizeof (encoding) - 1; - pche = strchr(ptr, '"'); - if (pche != NULL) - StrBufCutAt(RSSAggr->Key, -1, pche); - else - ptr = "UTF-8"; - } - else - ptr = "UTF-8"; - - syslog(LOG_DEBUG, "RSS: Now parsing [%s]", ChrPtr(RSSAggr->Url)); - - RSSAggr->xp = XML_ParserCreateNS(ptr, ':'); - if (!RSSAggr->xp) { - syslog(LOG_ALERT, "Cannot create XML parser!"); - return eAbort; - } - FlushStrBuf(RSSAggr->Key); - - RSSAggr->Messages = NewHash(1, Flathash); - XML_SetElementHandler(RSSAggr->xp, rss_xml_start, rss_xml_end); - XML_SetCharacterDataHandler(RSSAggr->xp, rss_xml_chardata); - XML_SetUserData(RSSAggr->xp, RSSAggr); - XML_SetCdataSectionHandler(RSSAggr->xp, - rss_xml_cdata_start, - rss_xml_cdata_end - ); - - len = StrLength(IO->HttpReq.ReplyData); - ptr = SmashStrBuf(&IO->HttpReq.ReplyData); - XML_Parse(RSSAggr->xp, ptr, len, 0); - free (ptr); - if (ri->done_parsing == 0) { - XML_Parse(RSSAggr->xp, "", 0, 1); - } - - syslog(LOG_DEBUG, "RSS: XML Status [%s]", XML_ErrorString(XML_GetErrorCode(RSSAggr->xp))); - - XML_ParserFree(RSSAggr->xp); - flush_rss_item(ri); - - Buf = NewStrBufDup(RSSAggr->rooms); - RSSAggr->recp.recp_room = SmashStrBuf(&Buf); - RSSAggr->recp.num_room = RSSAggr->roomlist_parts; - RSSAggr->recp.recptypes_magic = RECPTYPES_MAGIC; - - RSSAggr->Pos = GetNewHashPos(RSSAggr->Messages, 1); - -#if 0 -// FIXME ajc - if (GetNextHashPos(RSSAggr->Messages, - RSSAggr->Pos, - &len, - &Key, - (void**) &RSSAggr->ThisMsg)) { - return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry); - } - else { -#endif - return eAbort; -#if 0 - } -#endif -} - - -/****************************************************************************** - * RSS handler registering logic * - ******************************************************************************/ - -void AddRSSStartHandler(rss_handler_func Handler, - int Flags, - const char *key, - long len) -{ - rss_xml_handler *h; - h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler)); - h->Flags = Flags; - h->Handler = Handler; - Put(StartHandlers, key, len, h, NULL); -} - -void AddRSSEndHandler(rss_handler_func Handler, - int Flags, - const char *key, - long len) -{ - rss_xml_handler *h; - h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler)); - h->Flags = Flags; - h->Handler = Handler; - Put(EndHandlers, key, len, h, NULL); -} - -void rss_parser_cleanup(void) -{ - DeleteHash(&StartHandlers); - DeleteHash(&EndHandlers); - DeleteHash(&KnownNameSpaces); -} - -void LogDebugEnableRSSATOMParser(const int n) -{ - RSSAtomParserDebugEnabled = n; -} - -CTDL_MODULE_INIT(rssparser) -{ - if (!threading) - { - StartHandlers = NewHash(1, NULL); - EndHandlers = NewHash(1, NULL); - - AddRSSStartHandler(RSS_item_rss_start, RSS_UNSET, HKEY("rss")); - AddRSSStartHandler(RSS_item_rdf_start, RSS_UNSET, HKEY("rdf")); - AddRSSStartHandler(ATOM_item_feed_start, RSS_UNSET, HKEY("feed")); - AddRSSStartHandler(RSS_item_item_start, RSS_RSS, HKEY("item")); - AddRSSStartHandler(ATOM_item_entry_start, RSS_ATOM, HKEY("entry")); - AddRSSStartHandler(ATOM_item_link_start, RSS_ATOM, HKEY("link")); - - AddRSSEndHandler(ATOMRSS_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); - AddRSSEndHandler(RSS_item_guid_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid")); - AddRSSEndHandler(ATOM_item_id_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id")); - AddRSSEndHandler(RSS_item_link_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("link")); -#if 0 -// hm, rss to the comments of that blog, might be interesting in future, but... - AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss")); -// comment count... - AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments")); -#endif - AddRSSEndHandler(RSSATOM_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); - AddRSSEndHandler(ATOM_item_content_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content")); - AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded")); - AddRSSEndHandler(ATOM_item_summary_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary")); - AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description")); - AddRSSEndHandler(ATOM_item_published_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published")); - AddRSSEndHandler(ATOM_item_updated_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated")); - AddRSSEndHandler(RSS_item_pubdate_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate")); - AddRSSEndHandler(RSS_item_date_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("date")); - AddRSSEndHandler(RSS_item_author_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("author")); - AddRSSEndHandler(RSS_item_creator_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator")); -/* */ - AddRSSEndHandler(ATOM_item_email_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email")); - AddRSSEndHandler(ATOM_item_name_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name")); - AddRSSEndHandler(ATOM_item_uri_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri")); -/* */ - AddRSSEndHandler(RSS_item_item_end, RSS_RSS, HKEY("item")); - AddRSSEndHandler(RSS_item_rss_end, RSS_RSS, HKEY("rss")); - AddRSSEndHandler(RSS_item_rdf_end, RSS_RSS, HKEY("rdf")); - AddRSSEndHandler(ATOM_item_entry_end, RSS_ATOM, HKEY("entry")); - - -/* at the start of atoms:
  • link to resource
  • ignore them. */ - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); - -/* links to other feed generators... */ - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); - - KnownNameSpaces = NewHash(1, NULL); - Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler); -#if 0 - /* we don't like these namespaces because of they shadow our usefull parameters. */ - Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler); -#endif - CtdlRegisterDebugFlagHook(HKEY("RSSAtomParser"), LogDebugEnableRSSATOMParser, &RSSAtomParserDebugEnabled); - CtdlRegisterCleanupHook(rss_parser_cleanup); - } - return "rssparser"; -} diff --git a/citadel/modules/rssclient/rss_atom_parser.h b/citadel/modules/rssclient/rss_atom_parser.h deleted file mode 100644 index a9ff561c5..000000000 --- a/citadel/modules/rssclient/rss_atom_parser.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Bring external RSS feeds into rooms. - * - * Copyright (c) 2007-2012 by the citadel.org team - * - * This program is open source software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 3. - * - * - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * - * - * - */ - -#include "internet_addressing.h" - -#define RSS_UNSET (1<<0) -#define RSS_RSS (1<<1) -#define RSS_ATOM (1<<2) -#define RSS_REQUIRE_BUF (1<<3) - -typedef struct rss_aggregator rss_aggregator; -typedef struct rss_item rss_item; -typedef struct rss_room_counter rss_room_counter; - -typedef void (*rss_handler_func)(StrBuf *CData, - rss_item *ri, - rss_aggregator *Cfg, - const char** Attr); - - -typedef struct __rss_xml_handler { - int Flags; - rss_handler_func Handler; -}rss_xml_handler; - -struct rss_item { - int done_parsing; - int item_tag_nesting; - time_t pubdate; - StrBuf *guid; - StrBuf *title; - StrBuf *link; - StrBuf *linkTitle; - StrBuf *reLink; - StrBuf *reLinkTitle; - StrBuf *description; - StrBuf *channel_title; - StrBuf *author_or_creator; - StrBuf *author_url; - StrBuf *author_email; -}; -void flush_rss_item(rss_item *ri); - -struct rss_room_counter { - int count; - long QRnumber; -}; - -typedef struct __networker_save_message { - struct CtdlMessage Msg; - StrBuf *MsgGUID; - StrBuf *Message; - - StrBuf *author_email; - StrBuf *author_or_creator; - StrBuf *title; - StrBuf *description; - - StrBuf *link; - StrBuf *linkTitle; - - StrBuf *reLink; - StrBuf *reLinkTitle; -} networker_save_message; - -typedef struct RSSCfgLine RSSCfgLine; -struct RSSCfgLine { - RSSCfgLine *next; - StrBuf *Url; - time_t last_known_good; -}; - -typedef struct __pRSSConfig { - const RSSCfgLine *pCfg; - long QRnumber; -}pRSSConfig; - -struct rss_aggregator { - AsyncIO IO; - XML_Parser xp; - - int ItemType; - int roomlist_parts; - - time_t last_error_when; - time_t next_poll; - StrBuf *Url; - StrBuf *RedirectUrl; - StrBuf *rooms; - pRSSConfig Cfg; - HashList *OtherQRnumbers; - - StrBuf *CData; - StrBuf *Key; - - rss_item *Item; - recptypes recp; - HashPos *Pos; - HashList *Messages; - networker_save_message *ThisMsg; -}; - - - -eNextState RSSAggregator_ParseReply(AsyncIO *IO); - -eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO); diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 909587d62..16811389f 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -48,7 +48,6 @@ #include "citadel_dirs.h" #include "md5.h" #include "context.h" -#include "rss_atom_parser.h" struct rssroom { struct rssroom *next; @@ -61,7 +60,6 @@ struct rssurl { struct rssroom *rooms; }; - time_t last_run = 0L; struct CitContext rss_CC; struct rssurl *rsstodo = NULL;