/*
* Bring external RSS feeds into rooms.
*
- * Copyright (c) 2007-2012 by the citadel.org team
+ * Copyright (c) 2007-2015 by the citadel.org team
*
* This program is open source software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3.
*
- *
- *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- *
- *
- *
*/
#include <stdlib.h>
#include "event_client.h"
#include "rss_atom_parser.h"
-void rss_save_item(rss_item *ri, rss_aggregator *Cfg);
+void rss_remember_item(rss_item *ri, rss_aggregator *Cfg);
int RSSAtomParserDebugEnabled = 0;
-#define N ((rss_aggregator*)IO->Data)->QRnumber
+#define N ((rss_aggregator*)IO->Data)->Cfg.QRnumber
#define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSAtomParserDebugEnabled != 0))
#define EVRSSATOM_syslog(LEVEL, FORMAT, ...) \
DBGLOG(LEVEL) syslog(LEVEL, \
- "IO[%ld]CC[%d][%ld]RSSP" FORMAT, \
- IO->ID, CCID, N, __VA_ARGS__)
+ "%s[%ld]CC[%d][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, CCID, N, __VA_ARGS__)
#define EVRSSATOMM_syslog(LEVEL, FORMAT) \
DBGLOG(LEVEL) syslog(LEVEL, \
- "IO[%ld]CC[%d][%ld]RSSP" FORMAT, \
- IO->ID, CCID, N)
+ "%s[%ld]CC[%d][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, CCID, N)
#define EVRSSATOMCS_syslog(LEVEL, FORMAT, ...) \
- DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSSP" FORMAT, \
- IO->ID, N, __VA_ARGS__)
+ DBGLOG(LEVEL) syslog(LEVEL, "%s[%ld][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, N, __VA_ARGS__)
#define EVRSSATOMSM_syslog(LEVEL, FORMAT) \
- DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSSP" FORMAT, \
- IO->ID, N)
+ DBGLOG(LEVEL) syslog(LEVEL, "%s[%ld][%ld]RSSP" FORMAT, \
+ IOSTR, IO->ID, N)
/*
* Convert an RDF/RSS datestamp into a time_t
const char** Attr)
{
--ri->item_tag_nesting;
- rss_save_item(ri, RSSAggr);
+ rss_remember_item(ri, RSSAggr);
}
const char** Attr)
{
--ri->item_tag_nesting;
- rss_save_item(ri, RSSAggr);
+ rss_remember_item(ri, RSSAggr);
}
void RSS_item_rss_end(StrBuf *CData,
{
networker_save_message *Msg = (networker_save_message *) vMsg;
- CtdlFreeMessageContents(&Msg->Msg);
+ CM_FreeContents(&Msg->Msg);
FreeStrBuf(&Msg->Message);
FreeStrBuf(&Msg->MsgGUID);
- free(Msg);
-}
+ FreeStrBuf(&Msg->author_email);
+ FreeStrBuf(&Msg->author_or_creator);
+ FreeStrBuf(&Msg->title);
+ FreeStrBuf(&Msg->description);
-void AppendLink(StrBuf *Message,
- StrBuf *link,
- StrBuf *LinkTitle,
- const char *Title)
-{
- if (StrLength(link) > 0)
- {
- StrBufAppendBufPlain(Message, HKEY("<a href=\""), 0);
- StrBufAppendBuf(Message, link, 0);
- StrBufAppendBufPlain(Message, HKEY("\">"), 0);
- if (StrLength(LinkTitle) > 0)
- StrBufAppendBuf(Message, LinkTitle, 0);
- else if ((Title != NULL) && !IsEmptyStr(Title))
- StrBufAppendBufPlain(Message, Title, -1, 0);
- else
- StrBufAppendBuf(Message, link, 0);
- StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
- }
+ FreeStrBuf(&Msg->link);
+ FreeStrBuf(&Msg->linkTitle);
+
+ FreeStrBuf(&Msg->reLink);
+ FreeStrBuf(&Msg->reLinkTitle);
+
+ free(Msg);
}
+
/*
* Commit a fetched and parsed RSS item to disk
*/
-void rss_save_item(rss_item *ri, rss_aggregator *RSSAggr)
+void rss_remember_item(rss_item *ri, rss_aggregator *RSSAggr)
{
networker_save_message *SaveMsg;
struct MD5Context md5context;
u_char rawdigest[MD5_DIGEST_LEN];
- int msglen = 0;
- StrBuf *Message;
StrBuf *guid;
AsyncIO *IO = &RSSAggr->IO;
int n;
-
- SaveMsg = (networker_save_message *) malloc(
- sizeof(networker_save_message));
+ SaveMsg = (networker_save_message *) malloc(sizeof(networker_save_message));
memset(SaveMsg, 0, sizeof(networker_save_message));
/* Construct a GUID to use in the S_USETABLE table.
else {
MD5Init(&md5context);
if (ri->title != NULL) {
- MD5Update(&md5context,
- (const unsigned char*)SKEY(ri->title));
+ MD5Update(&md5context, (const unsigned char*)SKEY(ri->title));
}
if (ri->link != NULL) {
- MD5Update(&md5context,
- (const unsigned char*)SKEY(ri->link));
+ MD5Update(&md5context, (const unsigned char*)SKEY(ri->link));
}
MD5Final(rawdigest, &md5context);
- guid = NewStrBufPlain(NULL,
- MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
+ guid = NewStrBufPlain(NULL, MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN);
StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0);
}
SaveMsg->Msg.cm_anon_type = MES_NORMAL;
SaveMsg->Msg.cm_format_type = FMT_RFC822;
- if (ri->guid != NULL) {
- SaveMsg->Msg.cm_fields['E'] = strdup(ChrPtr(ri->guid));
- }
-
- if (ri->author_or_creator != NULL) {
- char *From;
- StrBuf *Encoded = NULL;
- int FromAt;
-
- From = html_to_ascii(ChrPtr(ri->author_or_creator),
- StrLength(ri->author_or_creator),
- 512, 0);
- StrBufPlain(ri->author_or_creator, From, -1);
- StrBufTrim(ri->author_or_creator);
- free(From);
+ /* gather the cheaply computed information now... */
- FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL;
- if (!FromAt && StrLength (ri->author_email) > 0)
- {
- StrBufRFC2047encode(&Encoded, ri->author_or_creator);
- SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded);
- SaveMsg->Msg.cm_fields['P'] =
- SmashStrBuf(&ri->author_email);
- }
- else
- {
- if (FromAt)
- {
- SaveMsg->Msg.cm_fields['A'] =
- SmashStrBuf(&ri->author_or_creator);
- SaveMsg->Msg.cm_fields['P'] =
- strdup(SaveMsg->Msg.cm_fields['A']);
- }
- else
- {
- StrBufRFC2047encode(&Encoded,
- ri->author_or_creator);
- SaveMsg->Msg.cm_fields['A'] =
- SmashStrBuf(&Encoded);
- SaveMsg->Msg.cm_fields['P'] =
- strdup("rss@localhost");
-
- }
- if (ri->pubdate <= 0) {
- ri->pubdate = time(NULL);
- }
- }
- }
- else {
- SaveMsg->Msg.cm_fields['A'] = strdup("rss");
+ if (ri->guid != NULL) {
+ CM_SetField(&SaveMsg->Msg, eExclusiveID, SKEY(ri->guid));
}
- SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME);
- if (ri->title != NULL) {
- long len;
- char *Sbj;
- StrBuf *Encoded, *QPEncoded;
-
- QPEncoded = NULL;
- StrBufSpaceToBlank(ri->title);
- len = StrLength(ri->title);
- Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0);
- len = strlen(Sbj);
- if (Sbj[len - 1] == '\n')
- {
- len --;
- Sbj[len] = '\0';
- }
- Encoded = NewStrBufPlain(Sbj, len);
- free(Sbj);
-
- StrBufTrim(Encoded);
- StrBufRFC2047encode(&QPEncoded, Encoded);
+ SaveMsg->MsgGUID = guid;
- SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded);
- FreeStrBuf(&Encoded);
+ if (ri->pubdate <= 0) {
+ ri->pubdate = time(NULL);
}
- SaveMsg->Msg.cm_fields['T'] = malloc(64);
- snprintf(SaveMsg->Msg.cm_fields['T'], 64, "%ld", ri->pubdate);
+ CM_SetFieldLONG(&SaveMsg->Msg, eTimestamp, ri->pubdate);
if (ri->channel_title != NULL) {
if (StrLength(ri->channel_title) > 0) {
- SaveMsg->Msg.cm_fields['O'] =
- strdup(ChrPtr(ri->channel_title));
+ CM_SetField(&SaveMsg->Msg, eOriginalRoom, SKEY(ri->channel_title));
}
}
- if (ri->link == NULL)
- ri->link = NewStrBufPlain(HKEY(""));
-#if 0 /* temporarily disable shorter urls. */
- SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] =
- GetShorterUrls(ri->description);
-#endif
+ /* remember the ones for defferred processing to save computing power after we know if we realy need it. */
- msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ;
+ SaveMsg->author_or_creator = ri->author_or_creator;
+ ri->author_or_creator = NULL;
- Message = NewStrBufPlain(NULL, StrLength(ri->description));
+ SaveMsg->author_email = ri->author_email;
+ ri->author_email = NULL;
- StrBufPlain(Message, HKEY(
- "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
- "<html><body>\n"));
-#if 0 /* disable shorter url for now. */
- SaveMsg->Msg.cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message);
-#endif
- StrBufAppendBuf(Message, ri->description, 0);
- StrBufAppendBufPlain(Message, HKEY("<br><br>\n"), 0);
+ SaveMsg->title = ri->title;
+ ri->title = NULL;
- AppendLink(Message, ri->link, ri->linkTitle, NULL);
- AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this");
- StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
+ SaveMsg->link = ri->link;
+ ri->link = NULL;
- SaveMsg->MsgGUID = guid;
- SaveMsg->Message = Message;
+ SaveMsg->description = ri->description;
+ ri->description = NULL;
+
+ SaveMsg->linkTitle = ri->linkTitle;
+ ri->linkTitle = NULL;
+
+ SaveMsg->reLink = ri->reLink;
+ ri->reLink = NULL;
+
+ SaveMsg->reLinkTitle = ri->reLinkTitle;
+ ri->reLinkTitle = NULL;
n = GetCount(RSSAggr->Messages) + 1;
Put(RSSAggr->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage);
}
+
void rss_xml_start(void *data, const char *supplied_el, const char **attr)
{
rss_xml_handler *h;
FlushStrBuf(RSSAggr->CData);
}
+
+
/*
* Callback function for passing libcurl's output to expat for parsing
* we don't do streamed parsing so expat can handle non-utf8 documents
}
*/
+
+
eNextState RSSAggregator_ParseReply(AsyncIO *IO)
{
StrBuf *Buf;
long len;
const char *Key;
-
- if (IO->HttpReq.httpcode != 200)
- {
-
- EVRSSATOM_syslog(LOG_ALERT, "need a 200, got a %ld !\n",
- IO->HttpReq.httpcode);
-// TODO: aide error message with rate limit
- return eAbort;
- }
-
RSSAggr = IO->Data;
ri = RSSAggr->Item;
RSSAggr->CData = NewStrBufPlain(NULL, SIZ);
RSSAggr->Pos = GetNewHashPos(RSSAggr->Messages, 1);
-//RSSAggr->next_poll = time(NULL) + config.c_net_freq;
if (GetNextHashPos(RSSAggr->Messages,
RSSAggr->Pos,
&len,
&Key,
- (void**) &RSSAggr->ThisMsg))
- return QueueDBOperation(IO, RSS_FetchNetworkUsetableEntry);
- else
+ (void**) &RSSAggr->ThisMsg)) {
+ return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry);
+ }
+ else {
return eAbort;
+ }
}
DeleteHash(&KnownNameSpaces);
}
-void LogDebugEnableRSSATOMParser(void)
+void LogDebugEnableRSSATOMParser(const int n)
{
- RSSAtomParserDebugEnabled = 1;
+ RSSAtomParserDebugEnabled = n;
}
CTDL_MODULE_INIT(rssparser)
/* we don't like these namespaces because of they shadow our usefull parameters. */
Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler);
#endif
- CtdlRegisterDebugFlagHook(HKEY("RSSAtomParser"), LogDebugEnableRSSATOMParser);
+ CtdlRegisterDebugFlagHook(HKEY("RSSAtomParser"), LogDebugEnableRSSATOMParser, &RSSAtomParserDebugEnabled);
CtdlRegisterCleanupHook(rss_parser_cleanup);
}
return "rssparser";