/*
* Bring external RSS feeds into rooms.
*
- * Copyright (c) 2007-2010 by the citadel.org team
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
+ * Copyright (c) 2007-2015 by the citadel.org team
*
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3.
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
#include "event_client.h"
#include "rss_atom_parser.h"
-void rss_save_item(rss_item *ri, rss_aggregator *Cfg);
+void rss_remember_item(rss_item *ri, rss_aggregator *Cfg);
+
+int RSSAtomParserDebugEnabled = 0;
+
+#define N ((rss_aggregator*)IO->Data)->Cfg.QRnumber
+
+#define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSAtomParserDebugEnabled != 0))
+
+#define EVRSSATOM_syslog(LEVEL, FORMAT, ...) \
+ DBGLOG(LEVEL) syslog(LEVEL, \
+ "%s[%ld]CC[%d][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, CCID, N, __VA_ARGS__)
+#define EVRSSATOMM_syslog(LEVEL, FORMAT) \
+ DBGLOG(LEVEL) syslog(LEVEL, \
+ "%s[%ld]CC[%d][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, CCID, N)
+
+#define EVRSSATOMCS_syslog(LEVEL, FORMAT, ...) \
+ DBGLOG(LEVEL) syslog(LEVEL, "%s[%ld][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, N, __VA_ARGS__)
+
+#define EVRSSATOMSM_syslog(LEVEL, FORMAT) \
+ DBGLOG(LEVEL) syslog(LEVEL, "%s[%ld][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, N)
/*
* Convert an RDF/RSS datestamp into a time_t
}
-/*******************************************************************************
- * XML-Handler *
- *******************************************************************************/
+/******************************************************************************
+ * XML-Handler *
+ ******************************************************************************/
-void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_rss_start (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n");
- Cfg->ItemType = RSS_RSS;
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n");
+ RSSAggr->ItemType = RSS_RSS;
}
-void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_rdf_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n");
- Cfg->ItemType = RSS_RSS;
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n");
+ RSSAggr->ItemType = RSS_RSS;
}
-void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_feed_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n");
- Cfg->ItemType = RSS_ATOM;
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n");
+ RSSAggr->ItemType = RSS_ATOM;
}
-void RSS_item_item_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_item_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
ri->item_tag_nesting ++;
flush_rss_item(ri);
}
-void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_entry_start(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
/* Atom feed... */
ri->item_tag_nesting ++;
flush_rss_item(ri);
}
-void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_link_start (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
int i;
const char *pHref = NULL;
if (pHref == NULL)
return; /* WHUT? Pointing... where? */
if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml"))
- return; /* these just point to other rss resources, we're not interested in them. */
+ return;
+ /* these just point to other rss resources,
+ we're not interested in them. */
if (pRel != NULL)
{
if (!strcasecmp (pRel, "replies"))
{
NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1);
StrBufTrim(ri->link);
- NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1);
+ NewStrBufDupAppendFlush(&ri->reLinkTitle,
+ NULL,
+ pTitle,
+ -1);
}
- else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */
- {
+ else if (!strcasecmp(pRel, "alternate"))
+ { /* Alternative representation of this Item... */
NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
StrBufTrim(ri->link);
- NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
+ NewStrBufDupAppendFlush(&ri->linkTitle,
+ NULL,
+ pTitle,
+ -1);
}
#if 0 /* these are also defined, but dunno what to do with them.. */
{
}
else if (!strcasecmp(pRel, "enclosure"))
- {/* this reference can get big, and is probably the full article... */
+ {/*...reference can get big, and is probably the full article*/
}
else if (!strcasecmp(pRel, "via"))
{/* this article was provided via... */
-void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOMRSS_item_title_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) {
NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0);
}
}
-void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_guid_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
}
}
-void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_id_end(StrBuf *CData,
+ rss_item *ri, rss_aggregator *RSSAggr, const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
}
-void RSS_item_link_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_link_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0);
StrBufTrim(ri->link);
}
}
-void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_relink_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0);
}
}
-void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSSATOM_item_title_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0);
}
}
-void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_content_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
long olen = StrLength (ri->description);
long clen = StrLength (CData);
- if (clen > 0)
+ if (clen > 0)
{
if (olen == 0) {
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
StrBufTrim(ri->description);
}
else if (olen < clen) {
FlushStrBuf(ri->description);
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
+
StrBufTrim(ri->description);
}
}
}
-void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_summary_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */
+ /*
+ * this can contain an abstract of the article.
+ * but we don't want to verwrite a full document if we already have it.
+ */
if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0))
{
NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
}
}
-void RSS_item_description_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_description_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
long olen = StrLength (ri->description);
long clen = StrLength (CData);
- if (clen > 0)
+ if (clen > 0)
{
if (olen == 0) {
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
StrBufTrim(ri->description);
}
else if (olen < clen) {
FlushStrBuf(ri->description);
- NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
+ NewStrBufDupAppendFlush(&ri->description,
+ CData,
+ NULL,
+ 0);
StrBufTrim(ri->description);
}
}
}
-void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
-{
+void ATOM_item_published_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
+{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
ri->pubdate = rdf_parsedate(ChrPtr(CData));
}
}
-void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_updated_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
}
}
-void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_pubdate_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
}
-void RSS_item_date_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_date_end (StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
StrBufTrim(CData);
-void RSS_item_author_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_author_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
}
-void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_name_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
}
}
-void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_email_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0);
}
}
-void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_creator_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
- if ((StrLength(CData) > 0) &&
+ if ((StrLength(CData) > 0) &&
(StrLength(ri->author_or_creator) == 0))
{
NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
}
-void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_uri_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
if (StrLength(CData) > 0) {
NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0);
}
}
-void RSS_item_item_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_item_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
--ri->item_tag_nesting;
- rss_save_item(ri, Cfg);
+ rss_remember_item(ri, RSSAggr);
}
-void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void ATOM_item_entry_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
--ri->item_tag_nesting;
- rss_save_item(ri, Cfg);
+ rss_remember_item(ri, RSSAggr);
}
-void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSS_item_rss_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
-// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n");
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n");
ri->done_parsing = 1;
-
}
-void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+
+void RSS_item_rdf_end(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
-// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n");
+ AsyncIO *IO = &RSSAggr->IO;
+ EVRSSATOMM_syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n");
ri->done_parsing = 1;
}
-void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
+void RSSATOM_item_ignore(StrBuf *CData,
+ rss_item *ri,
+ rss_aggregator *RSSAggr,
+ const char** Attr)
{
}
/*
* This callback stores up the data which appears in between tags.
*/
-void rss_xml_cdata_start(void *data)
+void rss_xml_cdata_start(void *data)
{
rss_aggregator *RSSAggr = (rss_aggregator*) data;
FlushStrBuf(RSSAggr->CData);
}
-void rss_xml_cdata_end(void *data)
+void rss_xml_cdata_end(void *data)
{
}
-void rss_xml_chardata(void *data, const XML_Char *s, int len)
+void rss_xml_chardata(void *data, const XML_Char *s, int len)
{
rss_aggregator *RSSAggr = (rss_aggregator*) data;
}
-/*******************************************************************************
- * RSS parser logic *
- *******************************************************************************/
+/******************************************************************************
+ * RSS parser logic *
+ ******************************************************************************/
extern pthread_mutex_t RSSQueueMutex;
{
networker_save_message *Msg = (networker_save_message *) vMsg;
- CtdlFreeMessageContents(&Msg->Msg);
+ CM_FreeContents(&Msg->Msg);
FreeStrBuf(&Msg->Message);
FreeStrBuf(&Msg->MsgGUID);
- free(Msg);
-}
+ FreeStrBuf(&Msg->author_email);
+ FreeStrBuf(&Msg->author_or_creator);
+ FreeStrBuf(&Msg->title);
+ FreeStrBuf(&Msg->description);
-void AppendLink(StrBuf *Message,
- StrBuf *link,
- StrBuf *LinkTitle,
- const char *Title)
-{
- if (StrLength(link) > 0)
- {
- StrBufAppendBufPlain(Message, HKEY("<a href=\""), 0);
- StrBufAppendBuf(Message, link, 0);
- StrBufAppendBufPlain(Message, HKEY("\">"), 0);
- if (StrLength(LinkTitle) > 0)
- StrBufAppendBuf(Message, LinkTitle, 0);
- else if ((Title != NULL) && !IsEmptyStr(Title))
- StrBufAppendBufPlain(Message, Title, -1, 0);
- else
- StrBufAppendBuf(Message, link, 0);
- StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
- }
+ FreeStrBuf(&Msg->link);
+ FreeStrBuf(&Msg->linkTitle);
+
+ FreeStrBuf(&Msg->reLink);
+ FreeStrBuf(&Msg->reLinkTitle);
+
+ free(Msg);
}
+
/*
* Commit a fetched and parsed RSS item to disk
*/
-void rss_save_item(rss_item *ri, rss_aggregator *Cfg)
+void rss_remember_item(rss_item *ri, rss_aggregator *RSSAggr)
{
networker_save_message *SaveMsg;
struct MD5Context md5context;
u_char rawdigest[MD5_DIGEST_LEN];
- int msglen = 0;
- StrBuf *Message;
StrBuf *guid;
- AsyncIO *IO = &Cfg->IO;
+ AsyncIO *IO = &RSSAggr->IO;
int n;
-
- SaveMsg = (networker_save_message *) malloc(
- sizeof(networker_save_message));
+ SaveMsg = (networker_save_message *) malloc(sizeof(networker_save_message));
memset(SaveMsg, 0, sizeof(networker_save_message));
/* Construct a GUID to use in the S_USETABLE table.
else {
MD5Init(&md5context);
if (ri->title != NULL) {
- MD5Update(&md5context,
- (const unsigned char*)SKEY(ri->title));
+ MD5Update(&md5context, (const unsigned char*)SKEY(ri->title));
}
if (ri->link != NULL) {
- MD5Update(&md5context,
- (const unsigned char*)SKEY(ri->link));
+ MD5Update(&md5context, (const unsigned char*)SKEY(ri->link));
}
MD5Final(rawdigest, &md5context);
- guid = NewStrBufPlain(NULL,
- MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
+ guid = NewStrBufPlain(NULL, MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN);
StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0);
}
/* translate Item into message. */
- EVM_syslog(LOG_DEBUG, "RSS: translating item...\n");
+ EVRSSATOMM_syslog(LOG_DEBUG, "RSS: translating item...\n");
if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
StrBufSpaceToBlank(ri->description);
SaveMsg->Msg.cm_magic = CTDLMESSAGE_MAGIC;
SaveMsg->Msg.cm_anon_type = MES_NORMAL;
SaveMsg->Msg.cm_format_type = FMT_RFC822;
- if (ri->guid != NULL) {
- SaveMsg->Msg.cm_fields['E'] = strdup(ChrPtr(ri->guid));
- }
-
- if (ri->author_or_creator != NULL) {
- char *From;
- StrBuf *Encoded = NULL;
- int FromAt;
+ /* gather the cheaply computed information now... */
- From = html_to_ascii(ChrPtr(ri->author_or_creator),
- StrLength(ri->author_or_creator),
- 512, 0);
- StrBufPlain(ri->author_or_creator, From, -1);
- StrBufTrim(ri->author_or_creator);
- free(From);
-
- FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL;
- if (!FromAt && StrLength (ri->author_email) > 0)
- {
- StrBufRFC2047encode(&Encoded, ri->author_or_creator);
- SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded);
- SaveMsg->Msg.cm_fields['P'] =
- SmashStrBuf(&ri->author_email);
- }
- else
- {
- if (FromAt)
- {
- SaveMsg->Msg.cm_fields['A'] =
- SmashStrBuf(&ri->author_or_creator);
- SaveMsg->Msg.cm_fields['P'] =
- strdup(SaveMsg->Msg.cm_fields['A']);
- }
- else
- {
- StrBufRFC2047encode(&Encoded,
- ri->author_or_creator);
- SaveMsg->Msg.cm_fields['A'] =
- SmashStrBuf(&Encoded);
- SaveMsg->Msg.cm_fields['P'] =
- strdup("rss@localhost");
-
- }
- if (ri->pubdate <= 0) {
- ri->pubdate = time(NULL);
- }
- }
- }
- else {
- SaveMsg->Msg.cm_fields['A'] = strdup("rss");
+ if (ri->guid != NULL) {
+ CM_SetField(&SaveMsg->Msg, eExclusiveID, SKEY(ri->guid));
}
- SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME);
- if (ri->title != NULL) {
- long len;
- char *Sbj;
- StrBuf *Encoded, *QPEncoded;
-
- QPEncoded = NULL;
- StrBufSpaceToBlank(ri->title);
- len = StrLength(ri->title);
- Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0);
- len = strlen(Sbj);
- if (Sbj[len - 1] == '\n')
- {
- len --;
- Sbj[len] = '\0';
- }
- Encoded = NewStrBufPlain(Sbj, len);
- free(Sbj);
-
- StrBufTrim(Encoded);
- StrBufRFC2047encode(&QPEncoded, Encoded);
+ SaveMsg->MsgGUID = guid;
- SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded);
- FreeStrBuf(&Encoded);
+ if (ri->pubdate <= 0) {
+ ri->pubdate = time(NULL);
}
- SaveMsg->Msg.cm_fields['T'] = malloc(64);
- snprintf(SaveMsg->Msg.cm_fields['T'], 64, "%ld", ri->pubdate);
+ CM_SetFieldLONG(&SaveMsg->Msg, eTimestamp, ri->pubdate);
if (ri->channel_title != NULL) {
if (StrLength(ri->channel_title) > 0) {
- SaveMsg->Msg.cm_fields['O'] =
- strdup(ChrPtr(ri->channel_title));
+ CM_SetField(&SaveMsg->Msg, eOriginalRoom, SKEY(ri->channel_title));
}
}
- if (ri->link == NULL)
- ri->link = NewStrBufPlain(HKEY(""));
-#if 0 /* temporarily disable shorter urls. */
- SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] =
- GetShorterUrls(ri->description);
-#endif
+ /* remember the ones for defferred processing to save computing power after we know if we realy need it. */
- msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ;
+ SaveMsg->author_or_creator = ri->author_or_creator;
+ ri->author_or_creator = NULL;
- Message = NewStrBufPlain(NULL, StrLength(ri->description));
+ SaveMsg->author_email = ri->author_email;
+ ri->author_email = NULL;
- StrBufPlain(Message, HKEY(
- "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
- "<html><body>\n"));
-#if 0 /* disable shorter url for now. */
- SaveMsg->Msg.cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message);
-#endif
- StrBufAppendBuf(Message, ri->description, 0);
- StrBufAppendBufPlain(Message, HKEY("<br><br>\n"), 0);
+ SaveMsg->title = ri->title;
+ ri->title = NULL;
- AppendLink(Message, ri->link, ri->linkTitle, NULL);
- AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this");
- StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
+ SaveMsg->link = ri->link;
+ ri->link = NULL;
- SaveMsg->MsgGUID = guid;
- SaveMsg->Message = Message;
+ SaveMsg->description = ri->description;
+ ri->description = NULL;
+
+ SaveMsg->linkTitle = ri->linkTitle;
+ ri->linkTitle = NULL;
- n = GetCount(Cfg->Messages) + 1;
- Put(Cfg->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage);
+ SaveMsg->reLink = ri->reLink;
+ ri->reLink = NULL;
+
+ SaveMsg->reLinkTitle = ri->reLinkTitle;
+ ri->reLinkTitle = NULL;
+
+ n = GetCount(RSSAggr->Messages) + 1;
+ Put(RSSAggr->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage);
}
+
void rss_xml_start(void *data, const char *supplied_el, const char **attr)
{
rss_xml_handler *h;
rss_aggregator *RSSAggr = (rss_aggregator*) data;
+ AsyncIO *IO = &RSSAggr->IO;
rss_item *ri = RSSAggr->Item;
void *pv;
const char *pel;
char *sep = NULL;
/* Axe the namespace, we don't care about it */
-/// syslog(LOG_DEBUG, "RSS: supplied el %d: %s...\n", RSSAggr->Cfg->ItemType, supplied_el);
+ /*
+ syslog(LOG_DEBUG,
+ "RSS: supplied el %d: %s\n", RSSAggr->RSSAggr->ItemType, supplied_el);
+ */
pel = supplied_el;
while (sep = strchr(pel, ':'), sep) {
pel = sep + 1;
if (pel != supplied_el)
{
void *v;
-
- if (!GetHash(KnownNameSpaces,
- supplied_el,
+
+ if (!GetHash(KnownNameSpaces,
+ supplied_el,
pel - supplied_el - 1,
&v))
{
-#ifdef DEBUG_RSS
- syslog(LOG_DEBUG, "RSS: START ignoring because of wrong namespace [%s]\n",
- supplied_el);
-#endif
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: START ignoring "
+ "because of wrong namespace [%s]\n",
+ supplied_el);
return;
}
}
{
h = (rss_xml_handler*) pv;
- if (((h->Flags & RSS_UNSET) != 0) &&
+ if (((h->Flags & RSS_UNSET) != 0) &&
(RSSAggr->ItemType == RSS_UNSET))
{
h->Handler(RSSAggr->CData, ri, RSSAggr, attr);
else if (((h->Flags & RSS_ATOM) != 0) &&
(RSSAggr->ItemType == RSS_ATOM))
{
- h->Handler(RSSAggr->CData, ri, RSSAggr, attr);
+ h->Handler(RSSAggr->CData,
+ ri,
+ RSSAggr,
+ attr);
}
-#ifdef DEBUG_RSS
- else
- syslog(LOG_DEBUG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
-#endif
+ else
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: START unhandled: [%s] [%s]...\n",
+ pel,
+ supplied_el);
}
-#ifdef DEBUG_RSS
- else
- syslog(LOG_DEBUG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
-#endif
+ else
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: START unhandled: [%s] [%s]...\n",
+ pel,
+ supplied_el);
}
void rss_xml_end(void *data, const char *supplied_el)
{
rss_xml_handler *h;
rss_aggregator *RSSAggr = (rss_aggregator*) data;
+ AsyncIO *IO = &RSSAggr->IO;
rss_item *ri = RSSAggr->Item;
const char *pel;
char *sep = NULL;
while (sep = strchr(pel, ':'), sep) {
pel = sep + 1;
}
-// syslog(LOG_DEBUG, "RSS: END %s...\n", el);
+ EVRSSATOM_syslog(LOG_DEBUG, "RSS: END %s...\n", supplied_el);
if (pel != supplied_el)
{
void *v;
-
- if (!GetHash(KnownNameSpaces,
- supplied_el,
+
+ if (!GetHash(KnownNameSpaces,
+ supplied_el,
pel - supplied_el - 1,
&v))
{
-#ifdef DEBUG_RSS
- syslog(LOG_DEBUG, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n",
- supplied_el, ChrPtr(RSSAggr->CData));
-#endif
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: END ignoring because of wrong namespace"
+ "[%s] = [%s]\n",
+ supplied_el,
+ ChrPtr(RSSAggr->CData));
FlushStrBuf(RSSAggr->CData);
return;
}
{
h = (rss_xml_handler*) pv;
- if (((h->Flags & RSS_UNSET) != 0) &&
+ if (((h->Flags & RSS_UNSET) != 0) &&
(RSSAggr->ItemType == RSS_UNSET))
{
h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
{
h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
}
-#ifdef DEBUG_RSS
- else
- syslog(LOG_DEBUG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(RSSAggr->CData));
-#endif
+ else
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: END unhandled: [%s] [%s] = [%s]...\n",
+ pel,
+ supplied_el,
+ ChrPtr(RSSAggr->CData));
}
-#ifdef DEBUG_RSS
- else
- syslog(LOG_DEBUG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(RSSAggr->CData));
-#endif
+ else
+ EVRSSATOM_syslog(LOG_DEBUG,
+ "RSS: END unhandled: [%s] [%s] = [%s]...\n",
+ pel,
+ supplied_el,
+ ChrPtr(RSSAggr->CData));
FlushStrBuf(RSSAggr->CData);
}
+
+
/*
* Callback function for passing libcurl's output to expat for parsing
* we don't do streamed parsing so expat can handle non-utf8 documents
}
*/
+
+
eNextState RSSAggregator_ParseReply(AsyncIO *IO)
{
StrBuf *Buf;
long len;
const char *Key;
-
- if (IO->HttpReq.httpcode != 200)
- {
-
- EV_syslog(LOG_DEBUG, "need a 200, got a %ld !\n",
- IO->HttpReq.httpcode);
-// TODO: aide error message with rate limit
- return eAbort;
- }
-
RSSAggr = IO->Data;
ri = RSSAggr->Item;
RSSAggr->CData = NewStrBufPlain(NULL, SIZ);
pche = strchr(ptr, '"');
if (pche != NULL)
StrBufCutAt(RSSAggr->Key, -1, pche);
- else
+ else
ptr = "UTF-8";
}
else
ptr = "UTF-8";
- syslog(LOG_DEBUG, "RSS: Now parsing [%s] \n", ChrPtr(RSSAggr->Url));
+ EVRSSATOM_syslog(LOG_DEBUG, "RSS: Now parsing [%s] \n", ChrPtr(RSSAggr->Url));
RSSAggr->xp = XML_ParserCreateNS(ptr, ':');
if (!RSSAggr->xp) {
- syslog(LOG_DEBUG, "Cannot create XML parser!\n");
+ EVRSSATOMM_syslog(LOG_ALERT, "Cannot create XML parser!\n");
return eAbort;
}
FlushStrBuf(RSSAggr->Key);
XML_Parse(RSSAggr->xp, "", 0, 1);
- syslog(LOG_DEBUG, "RSS: XML Status [%s] \n",
- XML_ErrorString(
- XML_GetErrorCode(RSSAggr->xp)));
+ EVRSSATOM_syslog(LOG_DEBUG, "RSS: XML Status [%s] \n",
+ XML_ErrorString(XML_GetErrorCode(RSSAggr->xp)));
XML_ParserFree(RSSAggr->xp);
flush_rss_item(ri);
RSSAggr->Pos = GetNewHashPos(RSSAggr->Messages, 1);
- ///Cfg->next_poll = time(NULL) + config.c_net_freq;
- if (GetNextHashPos(RSSAggr->Messages, RSSAggr->Pos, &len, &Key, (void**) &RSSAggr->ThisMsg))
- return QueueDBOperation(IO, RSS_FetchNetworkUsetableEntry);
- else
+ if (GetNextHashPos(RSSAggr->Messages,
+ RSSAggr->Pos,
+ &len,
+ &Key,
+ (void**) &RSSAggr->ThisMsg)) {
+ return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry);
+ }
+ else {
return eAbort;
+ }
}
-/*******************************************************************************
- * RSS handler registering logic *
- *******************************************************************************/
+/******************************************************************************
+ * RSS handler registering logic *
+ ******************************************************************************/
-void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len)
+void AddRSSStartHandler(rss_handler_func Handler,
+ int Flags,
+ const char *key,
+ long len)
{
rss_xml_handler *h;
h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
h->Handler = Handler;
Put(StartHandlers, key, len, h, NULL);
}
-void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len)
+
+void AddRSSEndHandler(rss_handler_func Handler,
+ int Flags,
+ const char *key,
+ long len)
{
rss_xml_handler *h;
h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
void rss_parser_cleanup(void)
{
- DeleteHash(&StartHandlers);
- DeleteHash(&EndHandlers);
+ DeleteHash(&StartHandlers);
+ DeleteHash(&EndHandlers);
DeleteHash(&KnownNameSpaces);
}
+void LogDebugEnableRSSATOMParser(const int n)
+{
+ RSSAtomParserDebugEnabled = n;
+}
CTDL_MODULE_INIT(rssparser)
{
AddRSSEndHandler(RSS_item_guid_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid"));
AddRSSEndHandler(ATOM_item_id_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id"));
AddRSSEndHandler(RSS_item_link_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("link"));
-#if 0
-// hm, rss to the comments of that blog, might be interesting in future, but...
+#if 0
+// hm, rss to the comments of that blog, might be interesting in future, but...
AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss"));
// comment count...
AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments"));
/* we don't like these namespaces because of they shadow our usefull parameters. */
Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler);
#endif
- CtdlRegisterCleanupHook(rss_parser_cleanup);
+ CtdlRegisterDebugFlagHook(HKEY("RSSAtomParser"), LogDebugEnableRSSATOMParser, &RSSAtomParserDebugEnabled);
+ CtdlRegisterCleanupHook(rss_parser_cleanup);
}
return "rssparser";
}