/*
* Bring external RSS feeds into rooms.
*
- * Copyright (c) 2007-2010 by the citadel.org team
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
+ * Copyright (c) 2007-2015 by the citadel.org team
*
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3.
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
#include "event_client.h"
#include "rss_atom_parser.h"
-HashList *StartHandlers = NULL;
-HashList *EndHandlers = NULL;
-HashList *KnownNameSpaces = NULL;
-void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len)
-{
- rss_xml_handler *h;
- h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
- h->Flags = Flags;
- h->Handler = Handler;
- Put(StartHandlers, key, len, h, NULL);
-}
-void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len)
-{
- rss_xml_handler *h;
- h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
- h->Flags = Flags;
- h->Handler = Handler;
- Put(EndHandlers, key, len, h, NULL);
-}
+void rss_remember_item(rss_item *ri, rss_aggregator *Cfg);
+
+int RSSAtomParserDebugEnabled = 0;
+
+#define N ((rss_aggregator*)IO->Data)->Cfg.QRnumber
+#define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSAtomParserDebugEnabled != 0))
+#define EVRSSATOM_syslog(LEVEL, FORMAT, ...) \
+ DBGLOG(LEVEL) syslog(LEVEL, \
+ "%s[%ld]CC[%d][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, CCID, N, __VA_ARGS__)
+
+#define EVRSSATOMM_syslog(LEVEL, FORMAT) \
+ DBGLOG(LEVEL) syslog(LEVEL, \
+ "%s[%ld]CC[%d][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, CCID, N)
+
+#define EVRSSATOMCS_syslog(LEVEL, FORMAT, ...) \
+ DBGLOG(LEVEL) syslog(LEVEL, "%s[%ld][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, N, __VA_ARGS__)
+
+#define EVRSSATOMSM_syslog(LEVEL, FORMAT) \
+ DBGLOG(LEVEL) syslog(LEVEL, "%s[%ld][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, N)
/*
* Convert an RDF/RSS datestamp into a time_t
FreeStrBuf(&ri->author_email);
FreeStrBuf(&ri->author_url);
FreeStrBuf(&ri->description);
-}
-
-void rss_xml_start(void *data, const char *supplied_el, const char **attr)
-{
- rss_xml_handler *h;
- rsscollection *rssc = (rsscollection*) data;
- rssnetcfg *Cfg = rssc->Cfg;
- rss_item *ri = rssc->Item;
- void *pv;
- const char *pel;
- char *sep = NULL;
-
- /* Axe the namespace, we don't care about it */
-/// CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el);
- pel = supplied_el;
- while (sep = strchr(pel, ':'), sep) {
- pel = sep + 1;
- }
- if (pel != supplied_el)
- {
- void *v;
-
- if (!GetHash(KnownNameSpaces,
- supplied_el,
- pel - supplied_el - 1,
- &v))
- {
-#ifdef DEBUG_RSS
- CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n",
- supplied_el);
-#endif
- return;
- }
- }
-
- StrBufPlain(rssc->Key, pel, -1);
- StrBufLowerCase(rssc->Key);
- if (GetHash(StartHandlers, SKEY(rssc->Key), &pv))
- {
- rssc->Current = h = (rss_xml_handler*) pv;
-
- if (((h->Flags & RSS_UNSET) != 0) &&
- (Cfg->ItemType == RSS_UNSET))
- {
- h->Handler(rssc->CData, ri, Cfg, attr);
- }
- else if (((h->Flags & RSS_RSS) != 0) &&
- (Cfg->ItemType == RSS_RSS))
- {
- h->Handler(rssc->CData, ri, Cfg, attr);
- }
- else if (((h->Flags & RSS_ATOM) != 0) &&
- (Cfg->ItemType == RSS_ATOM))
- {
- h->Handler(rssc->CData, ri, Cfg, attr);
- }
-#ifdef DEBUG_RSS
- else
- CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
-#endif
- }
-#ifdef DEBUG_RSS
- else
- CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
-#endif
+ FreeStrBuf(&ri->linkTitle);
+ FreeStrBuf(&ri->reLink);
+ FreeStrBuf(&ri->reLinkTitle);
+ FreeStrBuf(&ri->channel_title);
}
-void rss_xml_end(void *data, const char *supplied_el)
-{
- rss_xml_handler *h;
- rsscollection *rssc = (rsscollection*) data;
- rssnetcfg *Cfg = rssc->Cfg;
- rss_item *ri = rssc->Item;
- const char *pel;
- char *sep = NULL;
- void *pv;
- /* Axe the namespace, we don't care about it */
- pel = supplied_el;
- while (sep = strchr(pel, ':'), sep) {
- pel = sep + 1;
- }
-// CtdlLogPrintf(0, "RSS: END %s...\n", el);
- if (pel != supplied_el)
- {
- void *v;
-
- if (!GetHash(KnownNameSpaces,
- supplied_el,
- pel - supplied_el - 1,
- &v))
- {
-#ifdef DEBUG_RSS
- CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n",
- supplied_el, ChrPtr(rssc->CData));
-#endif
- FlushStrBuf(rssc->CData);
- return;
- }
- }
-
- StrBufPlain(rssc->Key, pel, -1);
- StrBufLowerCase(rssc->Key);
- if (GetHash(EndHandlers, SKEY(rssc->Key), &pv))
- {
- h = (rss_xml_handler*) pv;
-
- if (((h->Flags & RSS_UNSET) != 0) &&
- (Cfg->ItemType == RSS_UNSET))
- {
- h->Handler(rssc->CData, ri, Cfg, NULL);
- }
- else if (((h->Flags & RSS_RSS) != 0) &&
- (Cfg->ItemType == RSS_RSS))
- {
- h->Handler(rssc->CData, ri, Cfg, NULL);
- }
- else if (((h->Flags & RSS_ATOM) != 0) &&
- (Cfg->ItemType == RSS_ATOM))
- {
- h->Handler(rssc->CData, ri, Cfg, NULL);
- }
-#ifdef DEBUG_RSS
- else
- CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
-#endif
- }
-#ifdef DEBUG_RSS
- else
- CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
-#endif
- FlushStrBuf(rssc->CData);
- rssc->Current = NULL;
-}
+/******************************************************************************
+ * XML-Handler *
+ ******************************************************************************/
-
-
-
-void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_rss_start (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RSS feed.\n");
- Cfg->ItemType = RSS_RSS;
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n");
+ RSSAggr->ItemType = RSS_RSS;
}
-void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_rdf_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RDF feed.\n");
- Cfg->ItemType = RSS_RSS;
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n");
+ RSSAggr->ItemType = RSS_RSS;
}
-void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_feed_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an ATOM feed.\n");
- Cfg->ItemType = RSS_ATOM;
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n");
+ RSSAggr->ItemType = RSS_ATOM;
}
-void RSS_item_item_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_item_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
ri->item_tag_nesting ++;
flush_rss_item(ri);
}
-void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_entry_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
/* Atom feed... */
ri->item_tag_nesting ++;
flush_rss_item(ri);
}
-void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_link_start (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
int i;
const char *pHref = NULL;
if (pHref == NULL)
return; /* WHUT? Pointing... where? */
if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml"))
- return; /* these just point to other rss resources, we're not interested in them. */
+ return;
+ /* these just point to other rss resources,
+ we're not interested in them. */
if (pRel != NULL)
{
if (!strcasecmp (pRel, "replies"))
{
NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1);
StrBufTrim(ri->link);
- NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1);
+ NewStrBufDupAppendFlush(&ri->reLinkTitle,
+ NULL,
+ pTitle,
+ -1);
}
- else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */
- {
+ else if (!strcasecmp(pRel, "alternate"))
+ { /* Alternative representation of this Item... */
NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
StrBufTrim(ri->link);
- NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
+ NewStrBufDupAppendFlush(&ri->linkTitle,
+ NULL,
+ pTitle,
+ -1);
}
#if 0 /* these are also defined, but dunno what to do with them.. */
{
}
else if (!strcasecmp(pRel, "enclosure"))
- {/* this reference can get big, and is probably the full article... */
+ {/*...reference can get big, and is probably the full article*/
}
else if (!strcasecmp(pRel, "via"))
{/* this article was provided via... */
-void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOMRSS_item_title_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) {
NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0);
}
}
-void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_guid_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
}
}
-void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_id_end(StrBuf *CData,
+ rss_item *ri, rss_aggregator *RSSAggr, const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
}
-void RSS_item_link_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_link_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0);
StrBufTrim(ri->link);
}
}
-void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_relink_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0);
}
}
-void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSSATOM_item_title_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0);
}
}
-void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_content_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
long olen = StrLength (ri->description);
long clen = StrLength (CData);
- if (clen > 0)
+ if (clen > 0)
{
if (olen == 0) {
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
StrBufTrim(ri->description);
}
else if (olen < clen) {
FlushStrBuf(ri->description);
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
+
StrBufTrim(ri->description);
}
}
}
-void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_summary_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */
+ /*
+ * this can contain an abstract of the article.
+ * but we don't want to verwrite a full document if we already have it.
+ */
if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0))
{
NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
}
}
-void RSS_item_description_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_description_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
long olen = StrLength (ri->description);
long clen = StrLength (CData);
- if (clen > 0)
+ if (clen > 0)
{
if (olen == 0) {
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
StrBufTrim(ri->description);
}
else if (olen < clen) {
FlushStrBuf(ri->description);
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
StrBufTrim(ri->description);
}
}
}
-void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
-{
+void ATOM_item_published_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
+{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
ri->pubdate = rdf_parsedate(ChrPtr(CData));
}
}
-void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_updated_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
}
}
-void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_pubdate_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
}
-void RSS_item_date_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_date_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
-void RSS_item_author_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_author_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
}
-void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_name_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
}
}
-void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_email_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0);
}
}
-void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_creator_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- if ((StrLength(CData) > 0) &&
+ if ((StrLength(CData) > 0) &&
(StrLength(ri->author_or_creator) == 0))
{
NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
}
-void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_uri_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0);
}
}
-void RSS_item_item_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_item_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
--ri->item_tag_nesting;
- rss_save_item(ri);
+ rss_remember_item(ri, RSSAggr);
}
-void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_entry_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
--ri->item_tag_nesting;
- rss_save_item(ri);
+ rss_remember_item(ri, RSSAggr);
}
-void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_rss_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
-// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n");
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n");
ri->done_parsing = 1;
-
}
-void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+
+void RSS_item_rdf_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
-// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n");
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n");
ri->done_parsing = 1;
}
-void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSSATOM_item_ignore(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
}
/*
* This callback stores up the data which appears in between tags.
*/
-void rss_xml_cdata_start(void *data)
+void rss_xml_cdata_start(void *data)
{
- rsscollection *rssc = (rsscollection*) data;
+ rss_aggregator *RSSAggr = (rss_aggregator*) data;
- FlushStrBuf(rssc->CData);
+ FlushStrBuf(RSSAggr->CData);
}
-void rss_xml_cdata_end(void *data)
+void rss_xml_cdata_end(void *data)
+{
+}
+void rss_xml_chardata(void *data, const XML_Char *s, int len)
{
+ rss_aggregator *RSSAggr = (rss_aggregator*) data;
+
+ StrBufAppendBufPlain (RSSAggr->CData, s, len, 0);
}
-void rss_xml_chardata(void *data, const XML_Char *s, int len)
+
+
+/******************************************************************************
+ * RSS parser logic *
+ ******************************************************************************/
+
+extern pthread_mutex_t RSSQueueMutex;
+
+HashList *StartHandlers = NULL;
+HashList *EndHandlers = NULL;
+HashList *KnownNameSpaces = NULL;
+
+void FreeNetworkSaveMessage (void *vMsg)
{
- rsscollection *rssc = (rsscollection*) data;
+ networker_save_message *Msg = (networker_save_message *) vMsg;
+
+ CM_FreeContents(&Msg->Msg);
+ FreeStrBuf(&Msg->Message);
+ FreeStrBuf(&Msg->MsgGUID);
+
+ FreeStrBuf(&Msg->author_email);
+ FreeStrBuf(&Msg->author_or_creator);
+ FreeStrBuf(&Msg->title);
+ FreeStrBuf(&Msg->description);
+
+ FreeStrBuf(&Msg->link);
+ FreeStrBuf(&Msg->linkTitle);
+
+ FreeStrBuf(&Msg->reLink);
+ FreeStrBuf(&Msg->reLinkTitle);
- StrBufAppendBufPlain (rssc->CData, s, len, 0);
+ free(Msg);
}
+
/*
- * Callback function for passing libcurl's output to expat for parsing
+ * Commit a fetched and parsed RSS item to disk
*/
+void rss_remember_item(rss_item *ri, rss_aggregator *RSSAggr)
+{
+ networker_save_message *SaveMsg;
+ struct MD5Context md5context;
+ u_char rawdigest[MD5_DIGEST_LEN];
+ StrBuf *guid;
+ AsyncIO *IO = &RSSAggr->IO;
+ int n;
+
+ SaveMsg = (networker_save_message *) malloc(sizeof(networker_save_message));
+ memset(SaveMsg, 0, sizeof(networker_save_message));
+
+ /* Construct a GUID to use in the S_USETABLE table.
+ * If one is not present in the item itself, make one up.
+ */
+ if (ri->guid != NULL) {
+ StrBufSpaceToBlank(ri->guid);
+ StrBufTrim(ri->guid);
+ guid = NewStrBufPlain(HKEY("rss/"));
+ StrBufAppendBuf(guid, ri->guid, 0);
+ }
+ else {
+ MD5Init(&md5context);
+ if (ri->title != NULL) {
+ MD5Update(&md5context, (const unsigned char*)SKEY(ri->title));
+ }
+ if (ri->link != NULL) {
+ MD5Update(&md5context, (const unsigned char*)SKEY(ri->link));
+ }
+ MD5Final(rawdigest, &md5context);
+ guid = NewStrBufPlain(NULL, MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
+ StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN);
+ StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0);
+ }
+
+ /* translate Item into message. */
+ EVRSSATOMM_syslog(LOG_DEBUG, "RSS: translating item...\n");
+ if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
+ StrBufSpaceToBlank(ri->description);
+ SaveMsg->Msg.cm_magic = CTDLMESSAGE_MAGIC;
+ SaveMsg->Msg.cm_anon_type = MES_NORMAL;
+ SaveMsg->Msg.cm_format_type = FMT_RFC822;
+
+ /* gather the cheaply computed information now... */
+
+ if (ri->guid != NULL) {
+ CM_SetField(&SaveMsg->Msg, eExclusiveID, SKEY(ri->guid));
+ }
+
+ SaveMsg->MsgGUID = guid;
+
+ if (ri->pubdate <= 0) {
+ ri->pubdate = time(NULL);
+ }
+ CM_SetFieldLONG(&SaveMsg->Msg, eTimestamp, ri->pubdate);
+ if (ri->channel_title != NULL) {
+ if (StrLength(ri->channel_title) > 0) {
+ CM_SetField(&SaveMsg->Msg, eOriginalRoom, SKEY(ri->channel_title));
+ }
+ }
+
+ /* remember the ones for defferred processing to save computing power after we know if we realy need it. */
+
+ SaveMsg->author_or_creator = ri->author_or_creator;
+ ri->author_or_creator = NULL;
+
+ SaveMsg->author_email = ri->author_email;
+ ri->author_email = NULL;
+
+ SaveMsg->title = ri->title;
+ ri->title = NULL;
+
+ SaveMsg->link = ri->link;
+ ri->link = NULL;
+
+ SaveMsg->description = ri->description;
+ ri->description = NULL;
+
+ SaveMsg->linkTitle = ri->linkTitle;
+ ri->linkTitle = NULL;
+
+ SaveMsg->reLink = ri->reLink;
+ ri->reLink = NULL;
+
+ SaveMsg->reLinkTitle = ri->reLinkTitle;
+ ri->reLinkTitle = NULL;
+
+ n = GetCount(RSSAggr->Messages) + 1;
+ Put(RSSAggr->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage);
+}
+
+
+
+void rss_xml_start(void *data, const char *supplied_el, const char **attr)
+{
+ rss_xml_handler *h;
+ rss_aggregator *RSSAggr = (rss_aggregator*) data;
+ AsyncIO *IO = &RSSAggr->IO;
+ rss_item *ri = RSSAggr->Item;
+ void *pv;
+ const char *pel;
+ char *sep = NULL;
+
+ /* Axe the namespace, we don't care about it */
+ /*
+ syslog(LOG_DEBUG,
+ "RSS: supplied el %d: %s\n", RSSAggr->RSSAggr->ItemType, supplied_el);
+ */
+ pel = supplied_el;
+ while (sep = strchr(pel, ':'), sep) {
+ pel = sep + 1;
+ }
+
+ if (pel != supplied_el)
+ {
+ void *v;
+
+ if (!GetHash(KnownNameSpaces,
+ supplied_el,
+ pel - supplied_el - 1,
+ &v))
+ {
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: START ignoring "
+ "because of wrong namespace [%s]\n",
+ supplied_el);
+ return;
+ }
+ }
+
+ StrBufPlain(RSSAggr->Key, pel, -1);
+ StrBufLowerCase(RSSAggr->Key);
+ if (GetHash(StartHandlers, SKEY(RSSAggr->Key), &pv))
+ {
+ h = (rss_xml_handler*) pv;
+
+ if (((h->Flags & RSS_UNSET) != 0) &&
+ (RSSAggr->ItemType == RSS_UNSET))
+ {
+ h->Handler(RSSAggr->CData, ri, RSSAggr, attr);
+ }
+ else if (((h->Flags & RSS_RSS) != 0) &&
+ (RSSAggr->ItemType == RSS_RSS))
+ {
+ h->Handler(RSSAggr->CData, ri, RSSAggr, attr);
+ }
+ else if (((h->Flags & RSS_ATOM) != 0) &&
+ (RSSAggr->ItemType == RSS_ATOM))
+ {
+ h->Handler(RSSAggr->CData,
+ ri,
+ RSSAggr,
+ attr);
+ }
+ else
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: START unhandled: [%s] [%s]...\n",
+ pel,
+ supplied_el);
+ }
+ else
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: START unhandled: [%s] [%s]...\n",
+ pel,
+ supplied_el);
+}
+
+void rss_xml_end(void *data, const char *supplied_el)
+{
+ rss_xml_handler *h;
+ rss_aggregator *RSSAggr = (rss_aggregator*) data;
+ AsyncIO *IO = &RSSAggr->IO;
+ rss_item *ri = RSSAggr->Item;
+ const char *pel;
+ char *sep = NULL;
+ void *pv;
+
+ /* Axe the namespace, we don't care about it */
+ pel = supplied_el;
+ while (sep = strchr(pel, ':'), sep) {
+ pel = sep + 1;
+ }
+ EVRSSATOM_syslog(LOG_DEBUG, "RSS: END %s...\n", supplied_el);
+ if (pel != supplied_el)
+ {
+ void *v;
+
+ if (!GetHash(KnownNameSpaces,
+ supplied_el,
+ pel - supplied_el - 1,
+ &v))
+ {
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: END ignoring because of wrong namespace"
+ "[%s] = [%s]\n",
+ supplied_el,
+ ChrPtr(RSSAggr->CData));
+ FlushStrBuf(RSSAggr->CData);
+ return;
+ }
+ }
+
+ StrBufPlain(RSSAggr->Key, pel, -1);
+ StrBufLowerCase(RSSAggr->Key);
+ if (GetHash(EndHandlers, SKEY(RSSAggr->Key), &pv))
+ {
+ h = (rss_xml_handler*) pv;
+
+ if (((h->Flags & RSS_UNSET) != 0) &&
+ (RSSAggr->ItemType == RSS_UNSET))
+ {
+ h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
+ }
+ else if (((h->Flags & RSS_RSS) != 0) &&
+ (RSSAggr->ItemType == RSS_RSS))
+ {
+ h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
+ }
+ else if (((h->Flags & RSS_ATOM) != 0) &&
+ (RSSAggr->ItemType == RSS_ATOM))
+ {
+ h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
+ }
+ else
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: END unhandled: [%s] [%s] = [%s]...\n",
+ pel,
+ supplied_el,
+ ChrPtr(RSSAggr->CData));
+ }
+ else
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: END unhandled: [%s] [%s] = [%s]...\n",
+ pel,
+ supplied_el,
+ ChrPtr(RSSAggr->CData));
+ FlushStrBuf(RSSAggr->CData);
+}
+
+
+
+/*
+ * Callback function for passing libcurl's output to expat for parsing
+ * we don't do streamed parsing so expat can handle non-utf8 documents
size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream)
{
XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0);
return (size*nmemb);
}
+ */
-eNextState ParseRSSReply(AsyncIO *IO)
+eNextState RSSAggregator_ParseReply(AsyncIO *IO)
{
- rsscollection *rssc;
+ StrBuf *Buf;
+ rss_aggregator *RSSAggr;
rss_item *ri;
const char *at;
char *ptr;
long len;
+ const char *Key;
- rssc = IO->Data;
- ri = rssc->Item;
- rssc->CData = NewStrBufPlain(NULL, SIZ);
- rssc->Key = NewStrBuf();
+ RSSAggr = IO->Data;
+ ri = RSSAggr->Item;
+ RSSAggr->CData = NewStrBufPlain(NULL, SIZ);
+ RSSAggr->Key = NewStrBuf();
at = NULL;
- StrBufSipLine(rssc->Key, IO->HttpReq.ReplyData, &at);
+ StrBufSipLine(RSSAggr->Key, IO->HttpReq.ReplyData, &at);
ptr = NULL;
#define encoding "encoding=\""
- ptr = strstr(ChrPtr(rssc->Key), encoding);
+ ptr = strstr(ChrPtr(RSSAggr->Key), encoding);
if (ptr != NULL)
{
char *pche;
ptr += sizeof (encoding) - 1;
pche = strchr(ptr, '"');
if (pche != NULL)
- StrBufCutAt(rssc->Key, -1, pche);
- else
+ StrBufCutAt(RSSAggr->Key, -1, pche);
+ else
ptr = "UTF-8";
}
else
ptr = "UTF-8";
+ EVRSSATOM_syslog(LOG_DEBUG, "RSS: Now parsing [%s] \n", ChrPtr(RSSAggr->Url));
- rssc->xp = XML_ParserCreateNS(ptr, ':');
- if (!rssc->xp) {
- CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n");
- goto shutdown;
+ RSSAggr->xp = XML_ParserCreateNS(ptr, ':');
+ if (!RSSAggr->xp) {
+ EVRSSATOMM_syslog(LOG_ALERT, "Cannot create XML parser!\n");
+ return eAbort;
}
- FlushStrBuf(rssc->Key);
+ FlushStrBuf(RSSAggr->Key);
- XML_SetElementHandler(rssc->xp, rss_xml_start, rss_xml_end);
- XML_SetCharacterDataHandler(rssc->xp, rss_xml_chardata);
- XML_SetUserData(rssc->xp, rssc);
- XML_SetCdataSectionHandler(rssc->xp,
+ RSSAggr->Messages = NewHash(1, Flathash);
+ XML_SetElementHandler(RSSAggr->xp, rss_xml_start, rss_xml_end);
+ XML_SetCharacterDataHandler(RSSAggr->xp, rss_xml_chardata);
+ XML_SetUserData(RSSAggr->xp, RSSAggr);
+ XML_SetCdataSectionHandler(RSSAggr->xp,
rss_xml_cdata_start,
rss_xml_cdata_end);
len = StrLength(IO->HttpReq.ReplyData);
ptr = SmashStrBuf(&IO->HttpReq.ReplyData);
- XML_Parse(rssc->xp, ptr, len, 0);
+ XML_Parse(RSSAggr->xp, ptr, len, 0);
free (ptr);
if (ri->done_parsing == 0)
- XML_Parse(rssc->xp, "", 0, 1);
+ XML_Parse(RSSAggr->xp, "", 0, 1);
- CtdlLogPrintf(CTDL_ALERT, "RSS: XML Status [%s] \n",
- XML_ErrorString(
- XML_GetErrorCode(rssc->xp)));
-
-shutdown:
- XML_ParserFree(rssc->xp);
+ EVRSSATOM_syslog(LOG_DEBUG, "RSS: XML Status [%s] \n",
+ XML_ErrorString(XML_GetErrorCode(RSSAggr->xp)));
+ XML_ParserFree(RSSAggr->xp);
flush_rss_item(ri);
- FreeStrBuf(&rssc->CData);
- FreeStrBuf(&rssc->Key);
- ///Cfg->next_poll = time(NULL) + config.c_net_freq;
+ Buf = NewStrBufDup(RSSAggr->rooms);
+ RSSAggr->recp.recp_room = SmashStrBuf(&Buf);
+ RSSAggr->recp.num_room = RSSAggr->roomlist_parts;
+ RSSAggr->recp.recptypes_magic = RECPTYPES_MAGIC;
+
+ RSSAggr->Pos = GetNewHashPos(RSSAggr->Messages, 1);
+
+ if (GetNextHashPos(RSSAggr->Messages,
+ RSSAggr->Pos,
+ &len,
+ &Key,
+ (void**) &RSSAggr->ThisMsg)) {
+ return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry);
+ }
+ else {
+ return eAbort;
+ }
+}
+
+
+/******************************************************************************
+ * RSS handler registering logic *
+ ******************************************************************************/
- return eTerminateConnection;
+void AddRSSStartHandler(rss_handler_func Handler,
+ int Flags,
+ const char *key,
+ long len)
+{
+ rss_xml_handler *h;
+ h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
+ h->Flags = Flags;
+ h->Handler = Handler;
+ Put(StartHandlers, key, len, h, NULL);
}
+void AddRSSEndHandler(rss_handler_func Handler,
+ int Flags,
+ const char *key,
+ long len)
+{
+ rss_xml_handler *h;
+ h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
+ h->Flags = Flags;
+ h->Handler = Handler;
+ Put(EndHandlers, key, len, h, NULL);
+}
-void rss_cleanup(void)
+void rss_parser_cleanup(void)
{
- DeleteHash(&StartHandlers);
- DeleteHash(&EndHandlers);
+ DeleteHash(&StartHandlers);
+ DeleteHash(&EndHandlers);
DeleteHash(&KnownNameSpaces);
}
+void LogDebugEnableRSSATOMParser(const int n)
+{
+ RSSAtomParserDebugEnabled = n;
+}
CTDL_MODULE_INIT(rssparser)
{
AddRSSStartHandler(RSS_item_rss_start, RSS_UNSET, HKEY("rss"));
AddRSSStartHandler(RSS_item_rdf_start, RSS_UNSET, HKEY("rdf"));
- AddRSSStartHandler(ATOM_item_feed_start, RSS_UNSET, HKEY("feed"));
+ AddRSSStartHandler(ATOM_item_feed_start, RSS_UNSET, HKEY("feed"));
AddRSSStartHandler(RSS_item_item_start, RSS_RSS, HKEY("item"));
AddRSSStartHandler(ATOM_item_entry_start, RSS_ATOM, HKEY("entry"));
AddRSSStartHandler(ATOM_item_link_start, RSS_ATOM, HKEY("link"));
AddRSSEndHandler(RSS_item_guid_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid"));
AddRSSEndHandler(ATOM_item_id_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id"));
AddRSSEndHandler(RSS_item_link_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("link"));
-#if 0
-// hm, rss to the comments of that blog, might be interesting in future, but...
+#if 0
+// hm, rss to the comments of that blog, might be interesting in future, but...
AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss"));
// comment count...
AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments"));
/* we don't like these namespaces because of they shadow our usefull parameters. */
Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler);
#endif
- CtdlRegisterCleanupHook(rss_cleanup);
+ CtdlRegisterDebugFlagHook(HKEY("RSSAtomParser"), LogDebugEnableRSSATOMParser, &RSSAtomParserDebugEnabled);
+ CtdlRegisterCleanupHook(rss_parser_cleanup);
}
return "rssparser";
}