/*
- * $Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $
- *
* Bring external RSS feeds into rooms.
*
+ * Copyright (c) 2007-2010 by the citadel.org team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
#include "support.h"
#include "config.h"
#include "threads.h"
-#include "room_ops.h"
#include "ctdl_module.h"
-#include "clientsocket.h"
#include "msgbase.h"
#include "parsedate.h"
#include "database.h"
#include "citadel_dirs.h"
#include "md5.h"
+#include "context.h"
+#include "event_client.h"
+#include "rss_atom_parser.h"
-struct rssnetcfg {
- struct rssnetcfg *next;
- char url[256];
- char *rooms;
-};
-
-struct rss_item {
- char *chardata;
- int chardata_len;
- char *roomlist;
- int done_parsing;
- char *guid;
- char *title;
- char *link;
- char *description;
- time_t pubdate;
- char channel_title[256];
- int item_tag_nesting;
-};
-
struct rssnetcfg *rnclist = NULL;
-
-
+void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title)
+{
+ if (StrLength(link) > 0)
+ {
+ StrBufAppendBufPlain(Message, HKEY("<a href=\""), 0);
+ StrBufAppendBuf(Message, link, 0);
+ StrBufAppendBufPlain(Message, HKEY("\">"), 0);
+ if (StrLength(LinkTitle) > 0)
+ StrBufAppendBuf(Message, LinkTitle, 0);
+ else if ((Title != NULL) && !IsEmptyStr(Title))
+ StrBufAppendBufPlain(Message, Title, -1, 0);
+ else
+ StrBufAppendBuf(Message, link, 0);
+ StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
+ }
+}
/*
* Commit a fetched and parsed RSS item to disk
*/
-void rss_save_item(struct rss_item *ri) {
+void rss_save_item(rss_item *ri)
+{
struct MD5Context md5context;
u_char rawdigest[MD5_DIGEST_LEN];
struct CtdlMessage *msg;
struct recptypes *recp = NULL;
int msglen = 0;
+ StrBuf *Message;
recp = (struct recptypes *) malloc(sizeof(struct recptypes));
if (recp == NULL) return;
memset(recp, 0, sizeof(struct recptypes));
+ memset(&ut, 0, sizeof(struct UseTable));
recp->recp_room = strdup(ri->roomlist);
recp->num_room = num_tokens(ri->roomlist, '|');
recp->recptypes_magic = RECPTYPES_MAGIC;
* If one is not present in the item itself, make one up.
*/
if (ri->guid != NULL) {
- snprintf(utmsgid, sizeof utmsgid, "rss/%s", ri->guid);
+ StrBufSpaceToBlank(ri->guid);
+ StrBufTrim(ri->guid);
+ snprintf(utmsgid, sizeof utmsgid, "rss/%s", ChrPtr(ri->guid));
}
else {
MD5Init(&md5context);
if (ri->title != NULL) {
- MD5Update(&md5context, (unsigned char*)ri->title, strlen(ri->title));
+ MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->title), StrLength(ri->title));
}
if (ri->link != NULL) {
- MD5Update(&md5context, (unsigned char*)ri->link, strlen(ri->link));
+ MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->link), StrLength(ri->link));
}
MD5Final(rawdigest, &md5context);
for (i=0; i<MD5_DIGEST_LEN; i++) {
}
/* Find out if we've already seen this item */
+
cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid));
+#ifndef DEBUG_RSS
if (cdbut != NULL) {
/* Item has already been seen */
CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid);
ut.ut_timestamp = time(NULL);
cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
}
- else {
+ else
+#endif
+{
/* Item has not been seen, so save it. */
-
- if (ri->description == NULL) ri->description = strdup("");
- for (i=strlen(ri->description); i>=0; --i) {
- if (isspace(ri->description[i])) {
- ri->description[i] = ' ';
- }
- }
-
+ CtdlLogPrintf(CTDL_DEBUG, "RSS: saving item...\n");
+ if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
+ StrBufSpaceToBlank(ri->description);
msg = malloc(sizeof(struct CtdlMessage));
memset(msg, 0, sizeof(struct CtdlMessage));
msg->cm_magic = CTDLMESSAGE_MAGIC;
msg->cm_anon_type = MES_NORMAL;
msg->cm_format_type = FMT_RFC822;
- msg->cm_fields['A'] = strdup("rss");
- msg->cm_fields['N'] = strdup(NODENAME);
- msg->cm_fields['U'] = strdup(ri->title);
- msg->cm_fields['T'] = malloc(64);
- snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
- if (!IsEmptyStr(ri->channel_title)) {
- msg->cm_fields['O'] = strdup(ri->channel_title);
- }
-
- msglen = 1024 + strlen(ri->link) + strlen(ri->description) ;
- msg->cm_fields['M'] = malloc(msglen);
- snprintf(msg->cm_fields['M'], msglen,
- "Content-type: text/html\r\n\r\n"
- "<html><body>\n"
- "%s<br><br>\n"
- "<a href=\"%s\">%s</a>\n"
- "</body></html>\n"
- ,
- ri->description,
- ri->link, ri->link
- );
- CtdlSubmitMsg(msg, recp, NULL, 0);
- CtdlFreeMessage(msg);
-
- /* write the uidl to the use table so we don't store this item again */
- strcpy(ut.ut_msgid, utmsgid);
- ut.ut_timestamp = time(NULL);
- cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
- }
- free_recipients(recp);
-}
-
-
-
-/*
- * Convert an RDF/RSS datestamp into a time_t
- */
-time_t rdf_parsedate(char *p)
-{
- struct tm tm;
- time_t t = 0;
-
- if (!p) return 0L;
- if (strlen(p) < 10) return 0L;
-
- memset(&tm, 0, sizeof tm);
-
- /* YYYY-MM-DDTHH:MM format...
- */
- if ( (p[4] == '-') && (p[7] == '-') ) {
- tm.tm_year = atoi(&p[0]) - 1900;
- tm.tm_mon = atoi(&p[5]) - 1;
- tm.tm_mday = atoi(&p[8]);
- if ( (p[10] == 'T') && (p[13] == ':') ) {
- tm.tm_hour = atoi(&p[11]);
- tm.tm_min = atoi(&p[14]);
+ if (ri->guid != NULL) {
+ msg->cm_fields['E'] = strdup(ChrPtr(ri->guid));
}
- return mktime(&tm);
- }
-
- /* hmm... try RFC822 date stamp format */
-
- t = parsedate(p);
- if (t > 0) return(t);
-
- /* yeesh. ok, just return the current date and time. */
- return(time(NULL));
-}
-
-
-
-void rss_xml_start(void *data, const char *supplied_el, const char **attr) {
- struct rss_item *ri = (struct rss_item *) data;
- char el[256];
- char *sep = NULL;
- /* Axe the namespace, we don't care about it */
- safestrncpy(el, supplied_el, sizeof el);
- while (sep = strchr(el, ':'), sep) {
- strcpy(el, ++sep);
- }
-
- if (!strcasecmp(el, "item")) {
- ++ri->item_tag_nesting;
-
- /* Initialize the feed item data structure */
- if (ri->guid != NULL) free(ri->guid);
- ri->guid = NULL;
- if (ri->title != NULL) free(ri->title);
- ri->title = NULL;
- if (ri->link != NULL) free(ri->link);
- ri->link = NULL;
- if (ri->description != NULL) free(ri->description);
- ri->description = NULL;
-
- /* Throw away any existing character data */
- if (ri->chardata_len > 0) {
- free(ri->chardata);
- ri->chardata = 0;
- ri->chardata_len = 0;
+ if (ri->author_or_creator != NULL) {
+ char *From;
+ StrBuf *Encoded = NULL;
+ int FromAt;
+
+ From = html_to_ascii(ChrPtr(ri->author_or_creator),
+ StrLength(ri->author_or_creator),
+ 512, 0);
+ StrBufPlain(ri->author_or_creator, From, -1);
+ StrBufTrim(ri->author_or_creator);
+ free(From);
+
+ FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL;
+ if (!FromAt && StrLength (ri->author_email) > 0)
+ {
+ StrBufRFC2047encode(&Encoded, ri->author_or_creator);
+ msg->cm_fields['A'] = SmashStrBuf(&Encoded);
+ msg->cm_fields['P'] = SmashStrBuf(&ri->author_email);
+ }
+ else
+ {
+ if (FromAt)
+ msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator);
+ else
+ {
+ StrBufRFC2047encode(&Encoded, ri->author_or_creator);
+ msg->cm_fields['A'] = SmashStrBuf(&Encoded);
+ msg->cm_fields['P'] = strdup("rss@localhost");
+ }
+ }
+ }
+ else {
+ msg->cm_fields['A'] = strdup("rss");
}
- }
-
-
-
-}
-
-void rss_xml_end(void *data, const char *supplied_el) {
- struct rss_item *ri = (struct rss_item *) data;
- char el[256];
- char *sep = NULL;
-
- /* Axe the namespace, we don't care about it */
- safestrncpy(el, supplied_el, sizeof el);
- while (sep = strchr(el, ':'), sep) {
- strcpy(el, ++sep);
- }
-
- if ( (!strcasecmp(el, "title")) && (ri->item_tag_nesting == 0) && (ri->chardata != NULL) ) {
- safestrncpy(ri->channel_title, ri->chardata, sizeof ri->channel_title);
- striplt(ri->channel_title);
- }
-
- if ( (!strcasecmp(el, "guid")) && (ri->chardata != NULL) ) {
- if (ri->guid != NULL) free(ri->guid);
- striplt(ri->chardata);
- ri->guid = strdup(ri->chardata);
- }
- if ( (!strcasecmp(el, "title")) && (ri->chardata != NULL) ) {
- if (ri->title != NULL) free(ri->title);
- striplt(ri->chardata);
- ri->title = strdup(ri->chardata);
- }
+ msg->cm_fields['N'] = strdup(NODENAME);
+ if (ri->title != NULL) {
+ long len;
+ char *Sbj;
+ StrBuf *Encoded, *QPEncoded;
+
+ QPEncoded = NULL;
+ StrBufSpaceToBlank(ri->title);
+ len = StrLength(ri->title);
+ Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0);
+ len = strlen(Sbj);
+ if (Sbj[len - 1] == '\n')
+ {
+ len --;
+ Sbj[len] = '\0';
+ }
+ Encoded = NewStrBufPlain(Sbj, len);
+ free(Sbj);
- if ( (!strcasecmp(el, "link")) && (ri->chardata != NULL) ) {
- if (ri->link != NULL) free(ri->link);
- striplt(ri->chardata);
- ri->link = strdup(ri->chardata);
- }
+ StrBufTrim(Encoded);
+ StrBufRFC2047encode(&QPEncoded, Encoded);
- if ( (!strcasecmp(el, "description")) && (ri->chardata != NULL) ) {
- if (ri->description != NULL) free(ri->description);
- ri->description = strdup(ri->chardata);
- }
+ msg->cm_fields['U'] = SmashStrBuf(&QPEncoded);
+ FreeStrBuf(&Encoded);
+ }
+ msg->cm_fields['T'] = malloc(64);
+ snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
+ if (ri->channel_title != NULL) {
+ if (StrLength(ri->channel_title) > 0) {
+ msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title));
+ }
+ }
+ if (ri->link == NULL)
+ ri->link = NewStrBufPlain(HKEY(""));
+ // TODO: reenable me ExpandShortUrls(ri->description);
+ msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ;
- if ( ((!strcasecmp(el, "pubdate")) || (!strcasecmp(el, "date"))) && (ri->chardata != NULL) ) {
- striplt(ri->chardata);
- ri->pubdate = rdf_parsedate(ri->chardata);
- }
+ Message = NewStrBufPlain(NULL, StrLength(ri->description));
- if (!strcasecmp(el, "item")) {
- --ri->item_tag_nesting;
- rss_save_item(ri);
- }
+ StrBufPlain(Message, HKEY(
+ "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
+ "<html><body>\n"));
- if ( (!strcasecmp(el, "rss")) || (!strcasecmp(el, "rdf")) ) {
- CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n");
- ri->done_parsing = 1;
- }
+ StrBufAppendBuf(Message, ri->description, 0);
+ StrBufAppendBufPlain(Message, HKEY("<br><br>\n"), 0);
- if (ri->chardata_len > 0) {
- free(ri->chardata);
- ri->chardata = 0;
- ri->chardata_len = 0;
- }
+ AppendLink(Message, ri->link, ri->linkTitle, NULL);
+ AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this");
+ StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
-}
+ msg->cm_fields['M'] = SmashStrBuf(&Message);
+ CtdlSubmitMsg(msg, recp, NULL, 0);
+ CtdlFreeMessage(msg);
-/*
- * This callback stores up the data which appears in between tags.
- */
-void rss_xml_chardata(void *data, const XML_Char *s, int len) {
- struct rss_item *ri = (struct rss_item *) data;
- int old_len;
- int new_len;
- char *new_buffer;
-
- old_len = ri->chardata_len;
- new_len = old_len + len;
- new_buffer = realloc(ri->chardata, new_len + 1);
- if (new_buffer != NULL) {
- memcpy(&new_buffer[old_len], s, len);
- new_buffer[new_len] = 0;
- ri->chardata = new_buffer;
- ri->chardata_len = new_len;
+ /* write the uidl to the use table so we don't store this item again */
+ strcpy(ut.ut_msgid, utmsgid);
+ ut.ut_timestamp = time(NULL);
+ cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
}
+ free_recipients(recp);
}
-/*
- * Callback function for passing libcurl's output to expat for parsing
- */
-size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream)
-{
- XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0);
- return (size*nmemb);
-}
-
/*
* Begin a feed parse
*/
-void rss_do_fetching(char *url, char *rooms) {
- struct rss_item ri;
- XML_Parser xp;
+void rss_do_fetching(rssnetcfg *Cfg) {
+ rsscollection *rssc;
+ rss_item *ri;
+
+ time_t now;
- CURL *curl;
- CURLcode res;
- char errmsg[1024] = "";
+ CURL *chnd;
+ AsyncIO *IO;
- CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", url);
+ now = time(NULL);
- curl = curl_easy_init();
- if (!curl) {
- CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
+ if ((Cfg->next_poll != 0) && (now < Cfg->next_poll))
return;
- }
- xp = XML_ParserCreateNS("UTF-8", ':');
- if (!xp) {
- CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n");
- curl_easy_cleanup(curl);
- return;
- }
- curl_easy_setopt(curl, CURLOPT_URL, url);
- curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
- curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, xp);
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback);
- curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errmsg);
- curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
- curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL);
- if (!IsEmptyStr(config.c_ip_addr)) {
- curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr);
- }
+ ri = (rss_item*) malloc(sizeof(rss_item));
+ rssc = (rsscollection*) malloc(sizeof(rsscollection));
+ memset(ri, 0, sizeof(rss_item));
+ memset(rssc, 0, sizeof(rsscollection));
+ rssc->Item = ri;
+ rssc->Cfg = Cfg;
+ IO = &rssc->IO;
+ IO->CitContext = CloneContext(CC);
+ IO->Data = rssc;
+ ri->roomlist = Cfg->rooms;
- memset(&ri, 0, sizeof(struct rss_item));
- ri.roomlist = rooms;
- XML_SetElementHandler(xp, rss_xml_start, rss_xml_end);
- XML_SetCharacterDataHandler(xp, rss_xml_chardata);
- XML_SetUserData(xp, &ri);
- if (CtdlThreadCheckStop())
- {
- XML_ParserFree(xp);
- curl_easy_cleanup(curl);
- return;
- }
-
- if (CtdlThreadCheckStop())
- goto shutdown ;
+ CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url));
+ ParseURL(&IO->ConnectMe, Cfg->Url, 80);
+ CurlPrepareURL(IO->ConnectMe);
- res = curl_easy_perform(curl);
- if (res) {
- CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", res, errmsg);
+ if (! evcurl_init(IO,
+// Ctx,
+ NULL,
+ "Citadel RSS Client",
+ ParseRSSReply))
+ {
+ CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
+// goto abort;
}
+ chnd = IO->HttpReq.chnd;
- if (CtdlThreadCheckStop())
- goto shutdown ;
-
- if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1);
-
-
-shutdown:
- curl_easy_cleanup(curl);
- XML_ParserFree(xp);
-
- /* Free the feed item data structure */
- if (ri.guid != NULL) free(ri.guid);
- ri.guid = NULL;
- if (ri.title != NULL) free(ri.title);
- ri.title = NULL;
- if (ri.link != NULL) free(ri.link);
- ri.link = NULL;
- if (ri.description != NULL) free(ri.description);
- ri.description = NULL;
- if (ri.chardata_len > 0) {
- free(ri.chardata);
- ri.chardata = 0;
- ri.chardata_len = 0;
- }
+ evcurl_handle_start(IO);
}
+
+
/*
* Scan a room's netconfig to determine whether it is requesting any RSS feeds
*/
char instr[32];
FILE *fp;
char feedurl[256];
- struct rssnetcfg *rncptr = NULL;
- struct rssnetcfg *use_this_rncptr = NULL;
+ rssnetcfg *rncptr = NULL;
+ rssnetcfg *use_this_rncptr = NULL;
int len = 0;
char *ptr = NULL;
* room to the target list for that client request.
*/
for (rncptr=rnclist; rncptr!=NULL; rncptr=rncptr->next) {
- if (!strcmp(rncptr->url, feedurl)) {
+ if (!strcmp(ChrPtr(rncptr->Url), feedurl)) {
use_this_rncptr = rncptr;
}
}
/* Otherwise create a new client request */
if (use_this_rncptr == NULL) {
- rncptr = (struct rssnetcfg *) malloc(sizeof(struct rssnetcfg));
+ rncptr = (rssnetcfg *) malloc(sizeof(rssnetcfg));
+ memset(rncptr, 0, sizeof(rssnetcfg));
+ rncptr->ItemType = RSS_UNSET;
if (rncptr != NULL) {
rncptr->next = rnclist;
- safestrncpy(rncptr->url, feedurl, sizeof rncptr->url);
+ rncptr->Url = NewStrBufPlain(feedurl, -1);
rncptr->rooms = NULL;
rnclist = rncptr;
use_this_rncptr = rncptr;
/*
* Scan for rooms that have RSS client requests configured
*/
-void *rssclient_scan(void *args) {
+void rssclient_scan(void) {
static time_t last_run = 0L;
static int doing_rssclient = 0;
- struct rssnetcfg *rptr = NULL;
- struct CitContext rssclientCC;
-
- /* Give this thread its own private CitContext */
- CtdlFillPrivateContext(&rssclientCC, "rssclient");
- citthread_setspecific(MyConKey, (void *)&rssclientCC );
-
- CtdlThreadAllocTSD();
+ rssnetcfg *rptr = NULL;
/*
* This is a simple concurrency check to make sure only one rssclient run
* don't really require extremely fine granularity here, we'll do it
* with a static variable instead.
*/
- if (doing_rssclient) return NULL;
+ if (doing_rssclient) return;
doing_rssclient = 1;
CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n");
- ForEachRoom(rssclient_scan_room, NULL);
+ CtdlForEachRoom(rssclient_scan_room, NULL);
while (rnclist != NULL && !CtdlThreadCheckStop()) {
- rss_do_fetching(rnclist->url, rnclist->rooms);
+ rss_do_fetching(rnclist);
rptr = rnclist;
rnclist = rnclist->next;
if (rptr->rooms != NULL) free(rptr->rooms);
CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n");
last_run = time(NULL);
doing_rssclient = 0;
- if (!CtdlThreadCheckStop())
- CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq);
- else
- CtdlLogPrintf(CTDL_DEBUG, "rssclient: Task STOPPED.\n");
- return NULL;
+ return;
}
if (threading)
{
CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version());
- CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, 0);
+ CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);
}
- /* return our Subversion id for the Log */
- return "$Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $";
+ return "rssclient";
}