X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Frss_atom_parser.c;h=16899b88d1d5f2166ee160aa8dc22721630af78f;hb=66f72c07b70ed9500c49e8ff3c3f895e5269d339;hp=bed68587c54d8f905628fabc473be65cf700dd5c;hpb=67d954e97ccee1cd9c9ae8a969eece1383a04d46;p=citadel.git
diff --git a/citadel/modules/rssclient/rss_atom_parser.c b/citadel/modules/rssclient/rss_atom_parser.c
index bed68587c..16899b88d 100644
--- a/citadel/modules/rssclient/rss_atom_parser.c
+++ b/citadel/modules/rssclient/rss_atom_parser.c
@@ -58,26 +58,7 @@
#include "event_client.h"
#include "rss_atom_parser.h"
-HashList *StartHandlers = NULL;
-HashList *EndHandlers = NULL;
-HashList *KnownNameSpaces = NULL;
-void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len)
-{
- rss_xml_handler *h;
- h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
- h->Flags = Flags;
- h->Handler = Handler;
- Put(StartHandlers, key, len, h, NULL);
-}
-void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len)
-{
- rss_xml_handler *h;
- h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
- h->Flags = Flags;
- h->Handler = Handler;
- Put(EndHandlers, key, len, h, NULL);
-}
-
+void rss_save_item(rss_item *ri, rss_aggregator *Cfg);
/*
@@ -130,177 +111,70 @@ void flush_rss_item(rss_item *ri)
FreeStrBuf(&ri->author_email);
FreeStrBuf(&ri->author_url);
FreeStrBuf(&ri->description);
-}
-
-void rss_xml_start(void *data, const char *supplied_el, const char **attr)
-{
- rss_xml_handler *h;
- rss_aggregator *rssc = (rss_aggregator*) data;
- rss_item *ri = rssc->Item;
- void *pv;
- const char *pel;
- char *sep = NULL;
-
- /* Axe the namespace, we don't care about it */
-/// CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el);
- pel = supplied_el;
- while (sep = strchr(pel, ':'), sep) {
- pel = sep + 1;
- }
-
- if (pel != supplied_el)
- {
- void *v;
-
- if (!GetHash(KnownNameSpaces,
- supplied_el,
- pel - supplied_el - 1,
- &v))
- {
-#ifdef DEBUG_RSS
- CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n",
- supplied_el);
-#endif
- return;
- }
- }
-
- StrBufPlain(rssc->Key, pel, -1);
- StrBufLowerCase(rssc->Key);
- if (GetHash(StartHandlers, SKEY(rssc->Key), &pv))
- {
- rssc->Current = h = (rss_xml_handler*) pv;
- if (((h->Flags & RSS_UNSET) != 0) &&
- (rssc->ItemType == RSS_UNSET))
- {
- h->Handler(rssc->CData, ri, rssc, attr);
- }
- else if (((h->Flags & RSS_RSS) != 0) &&
- (rssc->ItemType == RSS_RSS))
- {
- h->Handler(rssc->CData, ri, rssc, attr);
- }
- else if (((h->Flags & RSS_ATOM) != 0) &&
- (rssc->ItemType == RSS_ATOM))
- {
- h->Handler(rssc->CData, ri, rssc, attr);
- }
-#ifdef DEBUG_RSS
- else
- CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
-#endif
- }
-#ifdef DEBUG_RSS
- else
- CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
-#endif
+ FreeStrBuf(&ri->linkTitle);
+ FreeStrBuf(&ri->reLink);
+ FreeStrBuf(&ri->reLinkTitle);
+ FreeStrBuf(&ri->channel_title);
}
-void rss_xml_end(void *data, const char *supplied_el)
-{
- rss_xml_handler *h;
- rss_aggregator *rssc = (rss_aggregator*) data;
- rss_item *ri = rssc->Item;
- const char *pel;
- char *sep = NULL;
- void *pv;
- /* Axe the namespace, we don't care about it */
- pel = supplied_el;
- while (sep = strchr(pel, ':'), sep) {
- pel = sep + 1;
- }
-// CtdlLogPrintf(0, "RSS: END %s...\n", el);
- if (pel != supplied_el)
- {
- void *v;
-
- if (!GetHash(KnownNameSpaces,
- supplied_el,
- pel - supplied_el - 1,
- &v))
- {
-#ifdef DEBUG_RSS
- CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n",
- supplied_el, ChrPtr(rssc->CData));
-#endif
- FlushStrBuf(rssc->CData);
- return;
- }
- }
-
- StrBufPlain(rssc->Key, pel, -1);
- StrBufLowerCase(rssc->Key);
- if (GetHash(EndHandlers, SKEY(rssc->Key), &pv))
- {
- h = (rss_xml_handler*) pv;
-
- if (((h->Flags & RSS_UNSET) != 0) &&
- (rssc->ItemType == RSS_UNSET))
- {
- h->Handler(rssc->CData, ri, rssc, NULL);
- }
- else if (((h->Flags & RSS_RSS) != 0) &&
- (rssc->ItemType == RSS_RSS))
- {
- h->Handler(rssc->CData, ri, rssc, NULL);
- }
- else if (((h->Flags & RSS_ATOM) != 0) &&
- (rssc->ItemType == RSS_ATOM))
- {
- h->Handler(rssc->CData, ri, rssc, NULL);
- }
-#ifdef DEBUG_RSS
- else
- CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
-#endif
- }
-#ifdef DEBUG_RSS
- else
- CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
-#endif
- FlushStrBuf(rssc->CData);
- rssc->Current = NULL;
-}
+/******************************************************************************
+ * XML-Handler *
+ ******************************************************************************/
-
-
-
-void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_rss_start (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
- CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RSS feed.\n");
+ syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n");
Cfg->ItemType = RSS_RSS;
}
-void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_rdf_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
- CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RDF feed.\n");
+ syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n");
Cfg->ItemType = RSS_RSS;
}
-void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_feed_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
- CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an ATOM feed.\n");
+ syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n");
Cfg->ItemType = RSS_ATOM;
}
-void RSS_item_item_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_item_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
ri->item_tag_nesting ++;
flush_rss_item(ri);
}
-void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_entry_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
/* Atom feed... */
ri->item_tag_nesting ++;
flush_rss_item(ri);
}
-void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_link_start (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
int i;
const char *pHref = NULL;
@@ -330,20 +204,28 @@ void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, con
if (pHref == NULL)
return; /* WHUT? Pointing... where? */
if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml"))
- return; /* these just point to other rss resources, we're not interested in them. */
+ return;
+ /* these just point to other rss resources,
+ we're not interested in them. */
if (pRel != NULL)
{
if (!strcasecmp (pRel, "replies"))
{
NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1);
StrBufTrim(ri->link);
- NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1);
+ NewStrBufDupAppendFlush(&ri->reLinkTitle,
+ NULL,
+ pTitle,
+ -1);
}
- else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */
- {
+ else if (!strcasecmp(pRel, "alternate"))
+ { /* Alternative representation of this Item... */
NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
StrBufTrim(ri->link);
- NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
+ NewStrBufDupAppendFlush(&ri->linkTitle,
+ NULL,
+ pTitle,
+ -1);
}
#if 0 /* these are also defined, but dunno what to do with them.. */
@@ -354,7 +236,7 @@ void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, con
{
}
else if (!strcasecmp(pRel, "enclosure"))
- {/* this reference can get big, and is probably the full article... */
+ {/*...reference can get big, and is probably the full article*/
}
else if (!strcasecmp(pRel, "via"))
{/* this article was provided via... */
@@ -372,7 +254,10 @@ void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, con
-void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOMRSS_item_title_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) {
NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0);
@@ -380,14 +265,18 @@ void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, co
}
}
-void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_guid_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
}
}
-void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_id_end(StrBuf *CData,
+ rss_item *ri, rss_aggregator *Cfg, const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
@@ -395,14 +284,20 @@ void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const ch
}
-void RSS_item_link_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_link_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0);
StrBufTrim(ri->link);
}
}
-void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_relink_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0);
@@ -410,7 +305,10 @@ void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const
}
}
-void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSSATOM_item_title_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0);
@@ -418,26 +316,42 @@ void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, c
}
}
-void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_content_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
long olen = StrLength (ri->description);
long clen = StrLength (CData);
- if (clen > 0)
+ if (clen > 0)
{
if (olen == 0) {
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
StrBufTrim(ri->description);
}
else if (olen < clen) {
FlushStrBuf(ri->description);
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
+
StrBufTrim(ri->description);
}
}
}
-void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_summary_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
- /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */
+ /*
+ * this can contain an abstract of the article.
+ * but we don't want to verwrite a full document if we already have it.
+ */
if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0))
{
NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
@@ -445,33 +359,48 @@ void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, co
}
}
-void RSS_item_description_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_description_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
long olen = StrLength (ri->description);
long clen = StrLength (CData);
- if (clen > 0)
+ if (clen > 0)
{
if (olen == 0) {
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
StrBufTrim(ri->description);
}
else if (olen < clen) {
FlushStrBuf(ri->description);
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
StrBufTrim(ri->description);
}
}
}
-void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
-{
+void ATOM_item_published_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
+{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
ri->pubdate = rdf_parsedate(ChrPtr(CData));
}
}
-void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_updated_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
@@ -479,7 +408,10 @@ void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, co
}
}
-void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_pubdate_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
@@ -488,7 +420,10 @@ void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, con
}
-void RSS_item_date_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_date_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
@@ -498,7 +433,10 @@ void RSS_item_date_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const
-void RSS_item_author_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_author_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
@@ -507,7 +445,10 @@ void RSS_item_author_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const
}
-void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_name_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
@@ -515,7 +456,10 @@ void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const
}
}
-void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_email_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0);
@@ -523,9 +467,12 @@ void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const
}
}
-void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_creator_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
- if ((StrLength(CData) > 0) &&
+ if ((StrLength(CData) > 0) &&
(StrLength(ri->author_or_creator) == 0))
{
NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
@@ -534,7 +481,10 @@ void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, cons
}
-void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_uri_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0);
@@ -542,33 +492,48 @@ void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const c
}
}
-void RSS_item_item_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_item_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
--ri->item_tag_nesting;
rss_save_item(ri, Cfg);
}
-void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_entry_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
--ri->item_tag_nesting;
rss_save_item(ri, Cfg);
}
-void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_rss_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
-// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n");
+// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n");
ri->done_parsing = 1;
-
}
-void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+
+void RSS_item_rdf_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
-// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n");
+// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n");
ri->done_parsing = 1;
}
-void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSSATOM_item_ignore(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *Cfg,
+ const char** Attr)
{
}
@@ -577,52 +542,432 @@ void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const
/*
* This callback stores up the data which appears in between tags.
*/
-void rss_xml_cdata_start(void *data)
+void rss_xml_cdata_start(void *data)
{
- rss_aggregator *rssc = (rss_aggregator*) data;
+ rss_aggregator *RSSAggr = (rss_aggregator*) data;
- FlushStrBuf(rssc->CData);
+ FlushStrBuf(RSSAggr->CData);
}
-void rss_xml_cdata_end(void *data)
+void rss_xml_cdata_end(void *data)
{
}
-void rss_xml_chardata(void *data, const XML_Char *s, int len)
+void rss_xml_chardata(void *data, const XML_Char *s, int len)
{
- rss_aggregator *rssc = (rss_aggregator*) data;
+ rss_aggregator *RSSAggr = (rss_aggregator*) data;
- StrBufAppendBufPlain (rssc->CData, s, len, 0);
+ StrBufAppendBufPlain (RSSAggr->CData, s, len, 0);
+}
+
+
+/******************************************************************************
+ * RSS parser logic *
+ ******************************************************************************/
+
+extern pthread_mutex_t RSSQueueMutex;
+
+HashList *StartHandlers = NULL;
+HashList *EndHandlers = NULL;
+HashList *KnownNameSpaces = NULL;
+
+void FreeNetworkSaveMessage (void *vMsg)
+{
+ networker_save_message *Msg = (networker_save_message *) vMsg;
+
+ CtdlFreeMessageContents(&Msg->Msg);
+ FreeStrBuf(&Msg->Message);
+ FreeStrBuf(&Msg->MsgGUID);
+ free(Msg);
+}
+
+
+void AppendLink(StrBuf *Message,
+ StrBuf *link,
+ StrBuf *LinkTitle,
+ const char *Title)
+{
+ if (StrLength(link) > 0)
+ {
+ StrBufAppendBufPlain(Message, HKEY(""), 0);
+ if (StrLength(LinkTitle) > 0)
+ StrBufAppendBuf(Message, LinkTitle, 0);
+ else if ((Title != NULL) && !IsEmptyStr(Title))
+ StrBufAppendBufPlain(Message, Title, -1, 0);
+ else
+ StrBufAppendBuf(Message, link, 0);
+ StrBufAppendBufPlain(Message, HKEY("
\n"), 0);
+ }
}
/*
- * Callback function for passing libcurl's output to expat for parsing
+ * Commit a fetched and parsed RSS item to disk
*/
+void rss_save_item(rss_item *ri, rss_aggregator *Cfg)
+{
+ networker_save_message *SaveMsg;
+ struct MD5Context md5context;
+ u_char rawdigest[MD5_DIGEST_LEN];
+ int msglen = 0;
+ StrBuf *Message;
+ StrBuf *guid;
+ AsyncIO *IO = &Cfg->IO;
+ int n;
+
+
+ SaveMsg = (networker_save_message *) malloc(
+ sizeof(networker_save_message));
+ memset(SaveMsg, 0, sizeof(networker_save_message));
+
+ /* Construct a GUID to use in the S_USETABLE table.
+ * If one is not present in the item itself, make one up.
+ */
+ if (ri->guid != NULL) {
+ StrBufSpaceToBlank(ri->guid);
+ StrBufTrim(ri->guid);
+ guid = NewStrBufPlain(HKEY("rss/"));
+ StrBufAppendBuf(guid, ri->guid, 0);
+ }
+ else {
+ MD5Init(&md5context);
+ if (ri->title != NULL) {
+ MD5Update(&md5context,
+ (const unsigned char*)SKEY(ri->title));
+ }
+ if (ri->link != NULL) {
+ MD5Update(&md5context,
+ (const unsigned char*)SKEY(ri->link));
+ }
+ MD5Final(rawdigest, &md5context);
+ guid = NewStrBufPlain(NULL,
+ MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
+ StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN);
+ StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0);
+ }
+
+ /* translate Item into message. */
+ EVM_syslog(LOG_DEBUG, "RSS: translating item...\n");
+ if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
+ StrBufSpaceToBlank(ri->description);
+ SaveMsg->Msg.cm_magic = CTDLMESSAGE_MAGIC;
+ SaveMsg->Msg.cm_anon_type = MES_NORMAL;
+ SaveMsg->Msg.cm_format_type = FMT_RFC822;
+
+ if (ri->guid != NULL) {
+ SaveMsg->Msg.cm_fields['E'] = strdup(ChrPtr(ri->guid));
+ }
+
+ if (ri->author_or_creator != NULL) {
+ char *From;
+ StrBuf *Encoded = NULL;
+ int FromAt;
+
+ From = html_to_ascii(ChrPtr(ri->author_or_creator),
+ StrLength(ri->author_or_creator),
+ 512, 0);
+ StrBufPlain(ri->author_or_creator, From, -1);
+ StrBufTrim(ri->author_or_creator);
+ free(From);
+
+ FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL;
+ if (!FromAt && StrLength (ri->author_email) > 0)
+ {
+ StrBufRFC2047encode(&Encoded, ri->author_or_creator);
+ SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded);
+ SaveMsg->Msg.cm_fields['P'] =
+ SmashStrBuf(&ri->author_email);
+ }
+ else
+ {
+ if (FromAt)
+ {
+ SaveMsg->Msg.cm_fields['A'] =
+ SmashStrBuf(&ri->author_or_creator);
+ SaveMsg->Msg.cm_fields['P'] =
+ strdup(SaveMsg->Msg.cm_fields['A']);
+ }
+ else
+ {
+ StrBufRFC2047encode(&Encoded,
+ ri->author_or_creator);
+ SaveMsg->Msg.cm_fields['A'] =
+ SmashStrBuf(&Encoded);
+ SaveMsg->Msg.cm_fields['P'] =
+ strdup("rss@localhost");
+
+ }
+ if (ri->pubdate <= 0) {
+ ri->pubdate = time(NULL);
+ }
+ }
+ }
+ else {
+ SaveMsg->Msg.cm_fields['A'] = strdup("rss");
+ }
+
+ SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME);
+ if (ri->title != NULL) {
+ long len;
+ char *Sbj;
+ StrBuf *Encoded, *QPEncoded;
+
+ QPEncoded = NULL;
+ StrBufSpaceToBlank(ri->title);
+ len = StrLength(ri->title);
+ Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0);
+ len = strlen(Sbj);
+ if (Sbj[len - 1] == '\n')
+ {
+ len --;
+ Sbj[len] = '\0';
+ }
+ Encoded = NewStrBufPlain(Sbj, len);
+ free(Sbj);
+
+ StrBufTrim(Encoded);
+ StrBufRFC2047encode(&QPEncoded, Encoded);
+
+ SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded);
+ FreeStrBuf(&Encoded);
+ }
+ SaveMsg->Msg.cm_fields['T'] = malloc(64);
+ snprintf(SaveMsg->Msg.cm_fields['T'], 64, "%ld", ri->pubdate);
+ if (ri->channel_title != NULL) {
+ if (StrLength(ri->channel_title) > 0) {
+ SaveMsg->Msg.cm_fields['O'] =
+ strdup(ChrPtr(ri->channel_title));
+ }
+ }
+ if (ri->link == NULL)
+ ri->link = NewStrBufPlain(HKEY(""));
+
+#if 0 /* temporarily disable shorter urls. */
+ SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] =
+ GetShorterUrls(ri->description);
+#endif
+
+ msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ;
+
+ Message = NewStrBufPlain(NULL, StrLength(ri->description));
+
+ StrBufPlain(Message, HKEY(
+ "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
+ "