Move back to single threaded structure for rss feed puller
[citadel.git] / citadel / modules / rssclient / rss_atom_parser.c
index 9bd7cf362470c3829d6464dec89e2f2f707aa501..7bdb94bafe59969ae87d70cb644d82e7e7f504a9 100644 (file)
@@ -1,21 +1,15 @@
 /*
  * Bring external RSS feeds into rooms.
  *
- * Copyright (c) 2007-2010 by the citadel.org team
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
+ * Copyright (c) 2007-2015 by the citadel.org team
  *
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3.
+ * 
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include <stdlib.h>
 #include "event_client.h"
 #include "rss_atom_parser.h"
 
-HashList *StartHandlers = NULL;
-HashList *EndHandlers = NULL;
-HashList *KnownNameSpaces = NULL;
-void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len)
-{
-       rss_xml_handler *h;
-       h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
-       h->Flags = Flags;
-       h->Handler = Handler;
-       Put(StartHandlers, key, len, h, NULL);
-}
-void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len)
-{
-       rss_xml_handler *h;
-       h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
-       h->Flags = Flags;
-       h->Handler = Handler;
-       Put(EndHandlers, key, len, h, NULL);
-}
+void rss_remember_item(rss_item *ri, rss_aggregator *Cfg);
 
+int RSSAtomParserDebugEnabled = 0;
 
+#define N ((rss_aggregator*)IO->Data)->Cfg.QRnumber
 
 /*
  * Convert an RDF/RSS datestamp into a time_t
@@ -130,179 +108,70 @@ void flush_rss_item(rss_item *ri)
        FreeStrBuf(&ri->author_email);
        FreeStrBuf(&ri->author_url);
        FreeStrBuf(&ri->description);
-}
-
-void rss_xml_start(void *data, const char *supplied_el, const char **attr)
-{
-       rss_xml_handler *h;
-       rsscollection   *rssc = (rsscollection*) data;
-       rssnetcfg       *Cfg = rssc->Cfg;
-       rss_item        *ri = rssc->Item;
-       void            *pv;
-       const char      *pel;
-       char            *sep = NULL;
-
-       /* Axe the namespace, we don't care about it */
-///    CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el);
-       pel = supplied_el;
-       while (sep = strchr(pel, ':'), sep) {
-               pel = sep + 1;
-       }
-
-       if (pel != supplied_el)
-       {
-               void *v;
-               
-               if (!GetHash(KnownNameSpaces, 
-                            supplied_el, 
-                            pel - supplied_el - 1,
-                            &v))
-               {
-#ifdef DEBUG_RSS
-                       CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", 
-                                     supplied_el);
-#endif
-                       return;
-               }
-       }
 
-       StrBufPlain(rssc->Key, pel, -1);
-       StrBufLowerCase(rssc->Key);
-       if (GetHash(StartHandlers, SKEY(rssc->Key), &pv))
-       {
-               rssc->Current = h = (rss_xml_handler*) pv;
-
-               if (((h->Flags & RSS_UNSET) != 0) && 
-                   (Cfg->ItemType == RSS_UNSET))
-               {
-                       h->Handler(rssc->CData, ri, Cfg, attr);
-               }
-               else if (((h->Flags & RSS_RSS) != 0) &&
-                   (Cfg->ItemType == RSS_RSS))
-               {
-                       h->Handler(rssc->CData, ri, Cfg, attr);
-               }
-               else if (((h->Flags & RSS_ATOM) != 0) &&
-                        (Cfg->ItemType == RSS_ATOM))
-               {
-                       h->Handler(rssc->CData, ri, Cfg, attr);                 
-               }
-#ifdef DEBUG_RSS
-               else 
-                       CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
-#endif
-       }
-#ifdef DEBUG_RSS
-       else 
-               CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel,  supplied_el);
-#endif
-}
-
-void rss_xml_end(void *data, const char *supplied_el)
-{
-       rss_xml_handler *h;
-       rsscollection   *rssc = (rsscollection*) data;
-       rssnetcfg       *Cfg = rssc->Cfg;
-       rss_item        *ri = rssc->Item;
-       const char      *pel;
-       char            *sep = NULL;
-       void            *pv;
-
-       /* Axe the namespace, we don't care about it */
-       pel = supplied_el;
-       while (sep = strchr(pel, ':'), sep) {
-               pel = sep + 1;
-       }
-//     CtdlLogPrintf(0, "RSS: END %s...\n", el);
-       if (pel != supplied_el)
-       {
-               void *v;
-               
-               if (!GetHash(KnownNameSpaces, 
-                            supplied_el, 
-                            pel - supplied_el - 1,
-                            &v))
-               {
-#ifdef DEBUG_RSS
-                       CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", 
-                                     supplied_el, ChrPtr(rssc->CData));
-#endif
-                       FlushStrBuf(rssc->CData);
-                       return;
-               }
-       }
-
-       StrBufPlain(rssc->Key, pel, -1);
-       StrBufLowerCase(rssc->Key);
-       if (GetHash(EndHandlers, SKEY(rssc->Key), &pv))
-       {
-               h = (rss_xml_handler*) pv;
-
-               if (((h->Flags & RSS_UNSET) != 0) && 
-                   (Cfg->ItemType == RSS_UNSET))
-               {
-                       h->Handler(rssc->CData, ri, Cfg, NULL);
-               }
-               else if (((h->Flags & RSS_RSS) != 0) &&
-                   (Cfg->ItemType == RSS_RSS))
-               {
-                       h->Handler(rssc->CData, ri, Cfg, NULL);
-               }
-               else if (((h->Flags & RSS_ATOM) != 0) &&
-                        (Cfg->ItemType == RSS_ATOM))
-               {
-                       h->Handler(rssc->CData, ri, Cfg, NULL);
-               }
-#ifdef DEBUG_RSS
-               else 
-                       CtdlLogPrintf(0, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
-#endif
-       }
-#ifdef DEBUG_RSS
-       else 
-               CtdlLogPrintf(0, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
-#endif
-       FlushStrBuf(rssc->CData);
-       rssc->Current = NULL;
+       FreeStrBuf(&ri->linkTitle);
+       FreeStrBuf(&ri->reLink);
+       FreeStrBuf(&ri->reLinkTitle);
+       FreeStrBuf(&ri->channel_title);
 }
 
 
+/******************************************************************************
+ *                              XML-Handler                                   *
+ ******************************************************************************/
 
 
-
-void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_rss_start (StrBuf *CData,
+                        rss_item *ri,
+                        rss_aggregator *RSSAggr,
+                        const char** Attr)
 {
-       CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RSS feed.\n");
-       Cfg->ItemType = RSS_RSS;
+       syslog(LOG_DEBUG, "RSS: This is an RSS feed.");
+       RSSAggr->ItemType = RSS_RSS;
 }
 
-void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_rdf_start(StrBuf *CData,
+                       rss_item *ri,
+                       rss_aggregator *RSSAggr,
+                       const char** Attr)
 {
-       CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RDF feed.\n");
-       Cfg->ItemType = RSS_RSS;
+       syslog(LOG_DEBUG, "RSS: This is an RDF feed.");
+       RSSAggr->ItemType = RSS_RSS;
 }
 
-void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_feed_start(StrBuf *CData,
+                         rss_item *ri,
+                         rss_aggregator *RSSAggr,
+                         const char** Attr)
 {
-       CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an ATOM feed.\n");
-       Cfg->ItemType = RSS_ATOM;
+       syslog(LOG_DEBUG, "RSS: This is an ATOM feed.");
+       RSSAggr->ItemType = RSS_ATOM;
 }
 
 
-void RSS_item_item_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_item_start(StrBuf *CData,
+                        rss_item *ri,
+                        rss_aggregator *RSSAggr,
+                        const char** Attr)
 {
        ri->item_tag_nesting ++;
        flush_rss_item(ri);
 }
 
-void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_entry_start(StrBuf *CData,
+                          rss_item *ri,
+                          rss_aggregator *RSSAggr,
+                          const char** Attr)
 {
 /* Atom feed... */
        ri->item_tag_nesting ++;
        flush_rss_item(ri);
 }
 
-void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_link_start (StrBuf *CData,
+                          rss_item *ri,
+                          rss_aggregator *RSSAggr,
+                          const char** Attr)
 {
        int i;
        const char *pHref = NULL;
@@ -332,20 +201,28 @@ void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const ch
        if (pHref == NULL)
                return; /* WHUT? Pointing... where? */
        if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml"))
-               return; /* these just point to other rss resources, we're not interested in them. */
+               return;
+       /* these just point to other rss resources,
+          we're not interested in them. */
        if (pRel != NULL)
        {
                if (!strcasecmp (pRel, "replies"))
                {
                        NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1);
                        StrBufTrim(ri->link);
-                       NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1);
+                       NewStrBufDupAppendFlush(&ri->reLinkTitle,
+                                               NULL,
+                                               pTitle,
+                                               -1);
                }
-               else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */
-               {
+               else if (!strcasecmp(pRel, "alternate"))
+               { /* Alternative representation of this Item... */
                        NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
                        StrBufTrim(ri->link);
-                       NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
+                       NewStrBufDupAppendFlush(&ri->linkTitle,
+                                               NULL,
+                                               pTitle,
+                                               -1);
 
                }
 #if 0 /* these are also defined, but dunno what to do with them.. */
@@ -356,7 +233,7 @@ void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const ch
                {
                }
                else if (!strcasecmp(pRel, "enclosure"))
-               {/* this reference can get big, and is probably the full article... */
+               {/*...reference can get big, and is probably the full article*/
                }
                else if (!strcasecmp(pRel, "via"))
                {/* this article was provided via... */
@@ -374,7 +251,10 @@ void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const ch
 
 
 
-void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOMRSS_item_title_end(StrBuf *CData,
+                           rss_item *ri,
+                           rss_aggregator *RSSAggr,
+                           const char** Attr)
 {
        if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) {
                NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0);
@@ -382,14 +262,18 @@ void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const c
        }
 }
 
-void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_guid_end(StrBuf *CData,
+                      rss_item *ri,
+                      rss_aggregator *RSSAggr,
+                      const char** Attr)
 {
        if (StrLength(CData) > 0) {
                NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
        }
 }
 
-void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_id_end(StrBuf *CData,
+                     rss_item *ri, rss_aggregator *RSSAggr, const char** Attr)
 {
        if (StrLength(CData) > 0) {
                NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
@@ -397,14 +281,20 @@ void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char**
 }
 
 
-void RSS_item_link_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_link_end (StrBuf *CData,
+                       rss_item *ri,
+                       rss_aggregator *RSSAggr,
+                       const char** Attr)
 {
        if (StrLength(CData) > 0) {
                NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0);
                StrBufTrim(ri->link);
        }
 }
-void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_relink_end(StrBuf *CData,
+                        rss_item *ri,
+                        rss_aggregator *RSSAggr,
+                        const char** Attr)
 {
        if (StrLength(CData) > 0) {
                NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0);
@@ -412,7 +302,10 @@ void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char
        }
 }
 
-void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSSATOM_item_title_end (StrBuf *CData,
+                            rss_item *ri,
+                            rss_aggregator *RSSAggr,
+                            const char** Attr)
 {
        if (StrLength(CData) > 0) {
                NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0);
@@ -420,26 +313,42 @@ void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const
        }
 }
 
-void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_content_end (StrBuf *CData,
+                           rss_item *ri,
+                           rss_aggregator *RSSAggr,
+                           const char** Attr)
 {
        long olen = StrLength (ri->description);
        long clen = StrLength (CData);
-       if (clen > 0) 
+       if (clen > 0)
        {
                if (olen == 0) {
-                       NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+                       NewStrBufDupAppendFlush(&ri->description,
+                                               CData,
+                                               NULL,
+                                               0);
                        StrBufTrim(ri->description);
                }
                else if (olen < clen) {
                        FlushStrBuf(ri->description);
-                       NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+                       NewStrBufDupAppendFlush(&ri->description,
+                                               CData,
+                                               NULL,
+                                               0);
+
                        StrBufTrim(ri->description);
                }
        }
 }
-void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_summary_end (StrBuf *CData,
+                           rss_item *ri,
+                           rss_aggregator *RSSAggr,
+                           const char** Attr)
 {
-       /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */
+       /*
+        * this can contain an abstract of the article.
+        * but we don't want to verwrite a full document if we already have it.
+        */
        if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0))
        {
                NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
@@ -447,33 +356,48 @@ void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const c
        }
 }
 
-void RSS_item_description_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_description_end (StrBuf *CData,
+                              rss_item *ri,
+                              rss_aggregator *RSSAggr,
+                              const char** Attr)
 {
        long olen = StrLength (ri->description);
        long clen = StrLength (CData);
-       if (clen > 0) 
+       if (clen > 0)
        {
                if (olen == 0) {
-                       NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+                       NewStrBufDupAppendFlush(&ri->description,
+                                               CData,
+                                               NULL,
+                                               0);
                        StrBufTrim(ri->description);
                }
                else if (olen < clen) {
                        FlushStrBuf(ri->description);
-                       NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+                       NewStrBufDupAppendFlush(&ri->description,
+                                               CData,
+                                               NULL,
+                                               0);
                        StrBufTrim(ri->description);
                }
        }
 }
 
-void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
-{                
+void ATOM_item_published_end (StrBuf *CData,
+                             rss_item *ri,
+                             rss_aggregator *RSSAggr,
+                             const char** Attr)
+{
        if (StrLength(CData) > 0) {
                StrBufTrim(CData);
                ri->pubdate = rdf_parsedate(ChrPtr(CData));
        }
 }
 
-void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_updated_end (StrBuf *CData,
+                           rss_item *ri,
+                           rss_aggregator *RSSAggr,
+                           const char** Attr)
 {
        if (StrLength(CData) > 0) {
                StrBufTrim(CData);
@@ -481,7 +405,10 @@ void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const c
        }
 }
 
-void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_pubdate_end (StrBuf *CData,
+                          rss_item *ri,
+                          rss_aggregator *RSSAggr,
+                          const char** Attr)
 {
        if (StrLength(CData) > 0) {
                StrBufTrim(CData);
@@ -490,7 +417,10 @@ void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const ch
 }
 
 
-void RSS_item_date_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_date_end (StrBuf *CData,
+                       rss_item *ri,
+                       rss_aggregator *RSSAggr,
+                       const char** Attr)
 {
        if (StrLength(CData) > 0) {
                StrBufTrim(CData);
@@ -500,7 +430,10 @@ void RSS_item_date_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char*
 
 
 
-void RSS_item_author_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_author_end(StrBuf *CData,
+                        rss_item *ri,
+                        rss_aggregator *RSSAggr,
+                        const char** Attr)
 {
        if (StrLength(CData) > 0) {
                NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
@@ -509,7 +442,10 @@ void RSS_item_author_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char
 }
 
 
-void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_name_end(StrBuf *CData,
+                       rss_item *ri,
+                       rss_aggregator *RSSAggr,
+                       const char** Attr)
 {
        if (StrLength(CData) > 0) {
                NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
@@ -517,7 +453,10 @@ void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char*
        }
 }
 
-void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_email_end(StrBuf *CData,
+                        rss_item *ri,
+                        rss_aggregator *RSSAggr,
+                        const char** Attr)
 {
        if (StrLength(CData) > 0) {
                NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0);
@@ -525,9 +464,12 @@ void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char
        }
 }
 
-void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_creator_end(StrBuf *CData,
+                         rss_item *ri,
+                         rss_aggregator *RSSAggr,
+                         const char** Attr)
 {
-       if ((StrLength(CData) > 0) && 
+       if ((StrLength(CData) > 0) &&
            (StrLength(ri->author_or_creator) == 0))
        {
                NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
@@ -536,7 +478,10 @@ void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const cha
 }
 
 
-void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_uri_end(StrBuf *CData,
+                      rss_item *ri,
+                      rss_aggregator *RSSAggr,
+                      const char** Attr)
 {
        if (StrLength(CData) > 0) {
                NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0);
@@ -544,33 +489,48 @@ void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char**
        }
 }
 
-void RSS_item_item_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_item_end(StrBuf *CData,
+                      rss_item *ri,
+                      rss_aggregator *RSSAggr,
+                      const char** Attr)
 {
        --ri->item_tag_nesting;
-       rss_save_item(ri);
+       rss_remember_item(ri, RSSAggr);
 }
 
 
-void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void ATOM_item_entry_end(StrBuf *CData,
+                        rss_item *ri,
+                        rss_aggregator *RSSAggr,
+                        const char** Attr)
 {
        --ri->item_tag_nesting;
-       rss_save_item(ri);
+       rss_remember_item(ri, RSSAggr);
 }
 
-void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSS_item_rss_end(StrBuf *CData,
+                     rss_item *ri,
+                     rss_aggregator *RSSAggr,
+                     const char** Attr)
 {
-//             CtdlLogPrintf(CTDL_DEBUG, "End of feed detected.  Closing parser.\n");
+       syslog(LOG_DEBUG, "End of feed detected.  Closing parser.");
        ri->done_parsing = 1;
-       
 }
-void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+
+void RSS_item_rdf_end(StrBuf *CData,
+                     rss_item *ri,
+                     rss_aggregator *RSSAggr,
+                     const char** Attr)
 {
-//             CtdlLogPrintf(CTDL_DEBUG, "End of feed detected.  Closing parser.\n");
+       syslog(LOG_DEBUG, "End of feed detected.  Closing parser.");
        ri->done_parsing = 1;
 }
 
 
-void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
+void RSSATOM_item_ignore(StrBuf *CData,
+                        rss_item *ri,
+                        rss_aggregator *RSSAggr,
+                        const char** Attr)
 {
 }
 
@@ -579,54 +539,321 @@ void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char
 /*
  * This callback stores up the data which appears in between tags.
  */
-void rss_xml_cdata_start(void *data) 
+void rss_xml_cdata_start(void *data)
 {
-       rsscollection *rssc = (rsscollection*) data;
+       rss_aggregator *RSSAggr = (rss_aggregator*) data;
 
-       FlushStrBuf(rssc->CData);
+       FlushStrBuf(RSSAggr->CData);
 }
 
-void rss_xml_cdata_end(void *data) 
+void rss_xml_cdata_end(void *data)
 {
 }
-void rss_xml_chardata(void *data, const XML_Char *s, int len) 
+void rss_xml_chardata(void *data, const XML_Char *s, int len)
 {
-       rsscollection *rssc = (rsscollection*) data;
+       rss_aggregator *RSSAggr = (rss_aggregator*) data;
 
-       StrBufAppendBufPlain (rssc->CData, s, len, 0);
+       StrBufAppendBufPlain (RSSAggr->CData, s, len, 0);
 }
 
+
+/******************************************************************************
+ *                            RSS parser logic                                *
+ ******************************************************************************/
+
+extern pthread_mutex_t RSSQueueMutex;
+
+HashList *StartHandlers = NULL;
+HashList *EndHandlers = NULL;
+HashList *KnownNameSpaces = NULL;
+
+void FreeNetworkSaveMessage (void *vMsg)
+{
+       networker_save_message *Msg = (networker_save_message *) vMsg;
+
+       CM_FreeContents(&Msg->Msg);
+       FreeStrBuf(&Msg->Message);
+       FreeStrBuf(&Msg->MsgGUID);
+
+       FreeStrBuf(&Msg->author_email);
+       FreeStrBuf(&Msg->author_or_creator);
+       FreeStrBuf(&Msg->title);
+       FreeStrBuf(&Msg->description);
+
+       FreeStrBuf(&Msg->link);
+       FreeStrBuf(&Msg->linkTitle);
+
+       FreeStrBuf(&Msg->reLink);
+       FreeStrBuf(&Msg->reLinkTitle);
+
+       free(Msg);
+}
+
+
 /*
- * Callback function for passing libcurl's output to expat for parsing
+ * Commit a fetched and parsed RSS item to disk
  */
-size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream)
+void rss_remember_item(rss_item *ri, rss_aggregator *RSSAggr)
+{
+       networker_save_message *SaveMsg;
+       struct MD5Context md5context;
+       u_char rawdigest[MD5_DIGEST_LEN];
+       StrBuf *guid;
+       int n;
+
+       SaveMsg = (networker_save_message *) malloc(sizeof(networker_save_message));
+       memset(SaveMsg, 0, sizeof(networker_save_message));
+
+       /* Construct a GUID to use in the S_USETABLE table.
+        * If one is not present in the item itself, make one up.
+        */
+       if (ri->guid != NULL) {
+               StrBufSpaceToBlank(ri->guid);
+               StrBufTrim(ri->guid);
+               guid = NewStrBufPlain(HKEY("rss/"));
+               StrBufAppendBuf(guid, ri->guid, 0);
+       }
+       else {
+               MD5Init(&md5context);
+               if (ri->title != NULL) {
+                       MD5Update(&md5context, (const unsigned char*)SKEY(ri->title));
+               }
+               if (ri->link != NULL) {
+                       MD5Update(&md5context, (const unsigned char*)SKEY(ri->link));
+               }
+               MD5Final(rawdigest, &md5context);
+               guid = NewStrBufPlain(NULL, MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
+               StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN);
+               StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0);
+       }
+
+       /* translate Item into message. */
+       syslog(LOG_DEBUG, "RSS: translating item...");
+       if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
+       StrBufSpaceToBlank(ri->description);
+       SaveMsg->Msg.cm_magic = CTDLMESSAGE_MAGIC;
+       SaveMsg->Msg.cm_anon_type = MES_NORMAL;
+       SaveMsg->Msg.cm_format_type = FMT_RFC822;
+
+       /* gather the cheaply computed information now... */
+
+       if (ri->guid != NULL) {
+               CM_SetField(&SaveMsg->Msg, eExclusiveID, SKEY(ri->guid));
+       }
+
+       SaveMsg->MsgGUID = guid;
+
+       if (ri->pubdate <= 0) {
+               ri->pubdate = time(NULL);
+       }
+       CM_SetFieldLONG(&SaveMsg->Msg, eTimestamp, ri->pubdate);
+       if (ri->channel_title != NULL) {
+               if (StrLength(ri->channel_title) > 0) {
+                       CM_SetField(&SaveMsg->Msg, eOriginalRoom, SKEY(ri->channel_title));
+               }
+       }
+
+       /* remember the ones for defferred processing to save computing power after we know if we realy need it. */
+
+       SaveMsg->author_or_creator = ri->author_or_creator;
+       ri->author_or_creator = NULL;
+
+       SaveMsg->author_email = ri->author_email;
+       ri->author_email = NULL;
+
+       SaveMsg->title = ri->title;
+       ri->title = NULL;
+
+       SaveMsg->link = ri->link;
+       ri->link = NULL;
+
+       SaveMsg->description = ri->description;
+       ri->description = NULL;
+
+       SaveMsg->linkTitle = ri->linkTitle;
+       ri->linkTitle = NULL;
+
+       SaveMsg->reLink = ri->reLink;
+       ri->reLink = NULL;
+
+       SaveMsg->reLinkTitle = ri->reLinkTitle;
+       ri->reLinkTitle = NULL;
+
+       n = GetCount(RSSAggr->Messages) + 1;
+       Put(RSSAggr->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage);
+}
+
+
+
+void rss_xml_start(void *data, const char *supplied_el, const char **attr)
 {
-       XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0);
-       return (size*nmemb);
+       rss_xml_handler *h;
+       rss_aggregator  *RSSAggr = (rss_aggregator*) data;
+       rss_item        *ri = RSSAggr->Item;
+       void            *pv;
+       const char      *pel;
+       char            *sep = NULL;
+
+       /* Axe the namespace, we don't care about it */
+       /*
+         syslog(LOG_DEBUG,
+         "RSS: supplied el %d: %s\n", RSSAggr->RSSAggr->ItemType, supplied_el);
+       */
+       pel = supplied_el;
+       while (sep = strchr(pel, ':'), sep) {
+               pel = sep + 1;
+       }
+
+       if (pel != supplied_el)
+       {
+               void *v;
+
+               if (!GetHash(KnownNameSpaces,
+                            supplied_el,
+                            pel - supplied_el - 1,
+                            &v))
+               {
+                       syslog(LOG_DEBUG,
+                                        "RSS: START ignoring "
+                                        "because of wrong namespace [%s]",
+                                        supplied_el
+                       );
+                       return;
+               }
+       }
+
+       StrBufPlain(RSSAggr->Key, pel, -1);
+       StrBufLowerCase(RSSAggr->Key);
+       if (GetHash(StartHandlers, SKEY(RSSAggr->Key), &pv))
+       {
+               h = (rss_xml_handler*) pv;
+
+               if (((h->Flags & RSS_UNSET) != 0) &&
+                   (RSSAggr->ItemType == RSS_UNSET))
+               {
+                       h->Handler(RSSAggr->CData, ri, RSSAggr, attr);
+               }
+               else if (((h->Flags & RSS_RSS) != 0) &&
+                   (RSSAggr->ItemType == RSS_RSS))
+               {
+                       h->Handler(RSSAggr->CData, ri, RSSAggr, attr);
+               }
+               else if (((h->Flags & RSS_ATOM) != 0) &&
+                        (RSSAggr->ItemType == RSS_ATOM))
+               {
+                       h->Handler(RSSAggr->CData,
+                                  ri,
+                                  RSSAggr,
+                                  attr);
+               }
+               else
+                       syslog(LOG_DEBUG,
+                                         "RSS: START unhandled: [%s] [%s]...",
+                                        pel,
+                                        supplied_el
+                       );
+       }
+       else
+               syslog(LOG_DEBUG,
+                                "RSS: START unhandled: [%s] [%s]...",
+                                pel,
+                                supplied_el
+               );
 }
 
+void rss_xml_end(void *data, const char *supplied_el)
+{
+       rss_xml_handler *h;
+       rss_aggregator  *RSSAggr = (rss_aggregator*) data;
+       rss_item        *ri = RSSAggr->Item;
+       const char      *pel;
+       char            *sep = NULL;
+       void            *pv;
+
+       /* Axe the namespace, we don't care about it */
+       pel = supplied_el;
+       while (sep = strchr(pel, ':'), sep) {
+               pel = sep + 1;
+       }
+       syslog(LOG_DEBUG, "RSS: END %s...", supplied_el);
+       if (pel != supplied_el)
+       {
+               void *v;
+
+               if (!GetHash(KnownNameSpaces,
+                            supplied_el,
+                            pel - supplied_el - 1,
+                            &v))
+               {
+                       syslog(LOG_DEBUG,
+                                        "RSS: END ignoring because of wrong namespace"
+                                        "[%s] = [%s]",
+                                        supplied_el,
+                                        ChrPtr(RSSAggr->CData));
+                       FlushStrBuf(RSSAggr->CData);
+                       return;
+               }
+       }
+
+       StrBufPlain(RSSAggr->Key, pel, -1);
+       StrBufLowerCase(RSSAggr->Key);
+       if (GetHash(EndHandlers, SKEY(RSSAggr->Key), &pv))
+       {
+               h = (rss_xml_handler*) pv;
 
+               if (((h->Flags & RSS_UNSET) != 0) &&
+                   (RSSAggr->ItemType == RSS_UNSET))
+               {
+                       h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
+               }
+               else if (((h->Flags & RSS_RSS) != 0) &&
+                   (RSSAggr->ItemType == RSS_RSS))
+               {
+                       h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
+               }
+               else if (((h->Flags & RSS_ATOM) != 0) &&
+                        (RSSAggr->ItemType == RSS_ATOM))
+               {
+                       h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
+               }
+               else
+                       syslog(LOG_DEBUG,
+                                        "RSS: END   unhandled: [%s]  [%s] = [%s]...",
+                                        pel,
+                                        supplied_el,
+                                        ChrPtr(RSSAggr->CData));
+       }
+       else
+               syslog(LOG_DEBUG,
+                                "RSS: END   unhandled: [%s]  [%s] = [%s]...",
+                                pel,
+                                supplied_el,
+                                ChrPtr(RSSAggr->CData));
+       FlushStrBuf(RSSAggr->CData);
+}
 
-eNextState ParseRSSReply(AsyncIO *IO)
+
+
+eNextState RSSAggregator_ParseReply(AsyncIO *IO)
 {
-       rsscollection *rssc;
+       StrBuf *Buf;
+       rss_aggregator *RSSAggr;
        rss_item *ri;
        const char *at;
        char *ptr;
        long len;
+       const char *Key;
 
-       rssc = IO->Data;
-       ri = rssc->Item;
-       ri->roomlist_parts = rssc->Cfg->roomlist_parts;
-       ri->roomlist = rssc->Cfg->rooms;
-       rssc->CData = NewStrBufPlain(NULL, SIZ);
-       rssc->Key = NewStrBuf();
+       RSSAggr = IO->Data;
+       ri = RSSAggr->Item;
+       RSSAggr->CData = NewStrBufPlain(NULL, SIZ);
+       RSSAggr->Key = NewStrBuf();
        at = NULL;
-       StrBufSipLine(rssc->Key, IO->HttpReq.ReplyData, &at);
+       StrBufSipLine(RSSAggr->Key, IO->HttpReq.ReplyData, &at);
        ptr = NULL;
 
 #define encoding "encoding=\""
-       ptr = strstr(ChrPtr(rssc->Key), encoding);
+       ptr = strstr(ChrPtr(RSSAggr->Key), encoding);
        if (ptr != NULL)
        {
                char *pche;
@@ -634,62 +861,108 @@ eNextState ParseRSSReply(AsyncIO *IO)
                ptr += sizeof (encoding) - 1;
                pche = strchr(ptr, '"');
                if (pche != NULL)
-                       StrBufCutAt(rssc->Key, -1, pche);
-               else 
+                       StrBufCutAt(RSSAggr->Key, -1, pche);
+               else
                        ptr = "UTF-8";
        }
        else
                ptr = "UTF-8";
 
-       CtdlLogPrintf(CTDL_ALERT, "RSS: Now parsing [%s] \n", ChrPtr(rssc->Cfg->Url));
+       syslog(LOG_DEBUG, "RSS: Now parsing [%s]", ChrPtr(RSSAggr->Url));
 
-       rssc->xp = XML_ParserCreateNS(ptr, ':');
-       if (!rssc->xp) {
-               CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n");
-               goto shutdown;
+       RSSAggr->xp = XML_ParserCreateNS(ptr, ':');
+       if (!RSSAggr->xp) {
+               syslog(LOG_ALERT, "Cannot create XML parser!");
+               return eAbort;
        }
-       FlushStrBuf(rssc->Key);
+       FlushStrBuf(RSSAggr->Key);
 
-       XML_SetElementHandler(rssc->xp, rss_xml_start, rss_xml_end);
-       XML_SetCharacterDataHandler(rssc->xp, rss_xml_chardata);
-       XML_SetUserData(rssc->xp, rssc);
-       XML_SetCdataSectionHandler(rssc->xp,
+       RSSAggr->Messages = NewHash(1, Flathash);
+       XML_SetElementHandler(RSSAggr->xp, rss_xml_start, rss_xml_end);
+       XML_SetCharacterDataHandler(RSSAggr->xp, rss_xml_chardata);
+       XML_SetUserData(RSSAggr->xp, RSSAggr);
+       XML_SetCdataSectionHandler(RSSAggr->xp,
                                   rss_xml_cdata_start,
-                                  rss_xml_cdata_end);
-
+                                  rss_xml_cdata_end
+       );
 
        len = StrLength(IO->HttpReq.ReplyData);
        ptr = SmashStrBuf(&IO->HttpReq.ReplyData);
-       XML_Parse(rssc->xp, ptr, len, 0);
+       XML_Parse(RSSAggr->xp, ptr, len, 0);
        free (ptr);
-       if (ri->done_parsing == 0)
-               XML_Parse(rssc->xp, "", 0, 1);
+       if (ri->done_parsing == 0) {
+               XML_Parse(RSSAggr->xp, "", 0, 1);
+       }
 
+       syslog(LOG_DEBUG, "RSS: XML Status [%s]", XML_ErrorString(XML_GetErrorCode(RSSAggr->xp)));
 
-       CtdlLogPrintf(CTDL_ALERT, "RSS: XML Status [%s] \n", 
-                     XML_ErrorString(
-                             XML_GetErrorCode(rssc->xp)));
+       XML_ParserFree(RSSAggr->xp);
+       flush_rss_item(ri);
 
-shutdown:
-       XML_ParserFree(rssc->xp);
+       Buf = NewStrBufDup(RSSAggr->rooms);
+       RSSAggr->recp.recp_room = SmashStrBuf(&Buf);
+       RSSAggr->recp.num_room = RSSAggr->roomlist_parts;
+       RSSAggr->recp.recptypes_magic = RECPTYPES_MAGIC;
 
-       flush_rss_item(ri);
-       FreeStrBuf(&rssc->CData);
-       FreeStrBuf(&rssc->Key);
+       RSSAggr->Pos = GetNewHashPos(RSSAggr->Messages, 1);
+
+#if 0
+// FIXME ajc
+       if (GetNextHashPos(RSSAggr->Messages,
+                          RSSAggr->Pos,
+                          &len,
+                          &Key,
+                          (void**) &RSSAggr->ThisMsg)) {
+               return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry);
+       }
+       else {
+#endif
+               return eAbort;
+#if 0
+       }
+#endif
+}
 
-        ///Cfg->next_poll = time(NULL) + config.c_net_freq; 
 
-       return eTerminateConnection;
+/******************************************************************************
+ *                    RSS handler registering logic                           *
+ ******************************************************************************/
+
+void AddRSSStartHandler(rss_handler_func Handler,
+                       int Flags,
+                       const char *key,
+                       long len)
+{
+       rss_xml_handler *h;
+       h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
+       h->Flags = Flags;
+       h->Handler = Handler;
+       Put(StartHandlers, key, len, h, NULL);
 }
 
+void AddRSSEndHandler(rss_handler_func Handler,
+                     int Flags,
+                     const char *key,
+                     long len)
+{
+       rss_xml_handler *h;
+       h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
+       h->Flags = Flags;
+       h->Handler = Handler;
+       Put(EndHandlers, key, len, h, NULL);
+}
 
-void rss_cleanup(void)
+void rss_parser_cleanup(void)
 {
-        DeleteHash(&StartHandlers);
-        DeleteHash(&EndHandlers);
+       DeleteHash(&StartHandlers);
+       DeleteHash(&EndHandlers);
        DeleteHash(&KnownNameSpaces);
 }
 
+void LogDebugEnableRSSATOMParser(const int n)
+{
+       RSSAtomParserDebugEnabled = n;
+}
 
 CTDL_MODULE_INIT(rssparser)
 {
@@ -700,7 +973,7 @@ CTDL_MODULE_INIT(rssparser)
 
                AddRSSStartHandler(RSS_item_rss_start,     RSS_UNSET, HKEY("rss"));
                AddRSSStartHandler(RSS_item_rdf_start,     RSS_UNSET, HKEY("rdf"));
-               AddRSSStartHandler(ATOM_item_feed_start,    RSS_UNSET, HKEY("feed"));
+               AddRSSStartHandler(ATOM_item_feed_start,   RSS_UNSET, HKEY("feed"));
                AddRSSStartHandler(RSS_item_item_start,    RSS_RSS, HKEY("item"));
                AddRSSStartHandler(ATOM_item_entry_start,  RSS_ATOM, HKEY("entry"));
                AddRSSStartHandler(ATOM_item_link_start,   RSS_ATOM, HKEY("link"));
@@ -709,8 +982,8 @@ CTDL_MODULE_INIT(rssparser)
                AddRSSEndHandler(RSS_item_guid_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid"));
                AddRSSEndHandler(ATOM_item_id_end,         RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id"));
                AddRSSEndHandler(RSS_item_link_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("link"));
-#if 0 
-// hm, rss to the comments of that blog, might be interesting in future, but... 
+#if 0
+// hm, rss to the comments of that blog, might be interesting in future, but...
                AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss"));
 // comment count...
                AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments"));
@@ -777,7 +1050,8 @@ CTDL_MODULE_INIT(rssparser)
                /* we don't like these namespaces because of they shadow our usefull parameters. */
                Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler);
 #endif
-                CtdlRegisterCleanupHook(rss_cleanup);
+               CtdlRegisterDebugFlagHook(HKEY("RSSAtomParser"), LogDebugEnableRSSATOMParser, &RSSAtomParserDebugEnabled);
+               CtdlRegisterCleanupHook(rss_parser_cleanup);
        }
        return "rssparser";
 }