#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <expat.h>
+#include <libcitadel.h>
#include "citadel.h"
#include "server.h"
#include "citserver.h"
#include "support.h"
#include "config.h"
-#include "tools.h"
+#include "threads.h"
#include "room_ops.h"
#include "ctdl_module.h"
#include "clientsocket.h"
#include "msgbase.h"
+#include "parsedate.h"
#include "database.h"
#include "citadel_dirs.h"
#include "md5.h"
-#ifdef HAVE_EXPAT
-#include <expat.h>
-
struct rssnetcfg {
struct rssnetcfg *next;
char *link;
char *description;
time_t pubdate;
+ char channel_title[256];
+ int item_tag_nesting;
};
struct rssnetcfg *rnclist = NULL;
cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid));
if (cdbut != NULL) {
/* Item has already been seen */
- lprintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid);
+ CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid);
cdb_free(cdbut);
/* rewrite the record anyway, to update the timestamp */
else {
/* Item has not been seen, so save it. */
+ if (ri->description == NULL) ri->description = strdup("");
for (i=strlen(ri->description); i>=0; --i) {
if (isspace(ri->description[i])) {
ri->description[i] = ' ';
msg->cm_fields['U'] = strdup(ri->title);
msg->cm_fields['T'] = malloc(64);
snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
+ if (!IsEmptyStr(ri->channel_title)) {
+ msg->cm_fields['O'] = strdup(ri->channel_title);
+ }
msglen = 1024 + strlen(ri->link) + strlen(ri->description) ;
msg->cm_fields['M'] = malloc(msglen);
CtdlSubmitMsg(msg, recp, NULL);
CtdlFreeMessage(msg);
- free_recipients(recp);
/* write the uidl to the use table so we don't store this item again */
strcpy(ut.ut_msgid, utmsgid);
ut.ut_timestamp = time(NULL);
cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
}
+ free_recipients(recp);
}
time_t rdf_parsedate(char *p)
{
struct tm tm;
+ time_t t = 0;
if (!p) return 0L;
if (strlen(p) < 10) return 0L;
tm.tm_hour = atoi(&p[11]);
tm.tm_min = atoi(&p[14]);
}
+ return mktime(&tm);
}
- else {
- /* FIXME try an imap timestamp conversion */
- }
+ /* hmm... try RFC822 date stamp format */
- return mktime(&tm);
+ t = parsedate(p);
+ if (t > 0) return(t);
+
+ /* yeesh. ok, just return the current date and time. */
+ return(time(NULL));
}
-void rss_xml_start(void *data, const char *el, const char **attr) {
+void rss_xml_start(void *data, const char *supplied_el, const char **attr) {
struct rss_item *ri = (struct rss_item *) data;
+ char el[256];
+ char *sep = NULL;
+
+ /* Axe the namespace, we don't care about it */
+ safestrncpy(el, supplied_el, sizeof el);
+ while (sep = strchr(el, ':'), sep) {
+ strcpy(el, ++sep);
+ }
if (!strcasecmp(el, "item")) {
+ ++ri->item_tag_nesting;
/* Initialize the feed item data structure */
if (ri->guid != NULL) free(ri->guid);
char el[256];
char *sep = NULL;
-
/* Axe the namespace, we don't care about it */
-
safestrncpy(el, supplied_el, sizeof el);
while (sep = strchr(el, ':'), sep) {
strcpy(el, ++sep);
}
- if (!strcasecmp(el, "guid")) {
+ if ( (!strcasecmp(el, "title")) && (ri->item_tag_nesting == 0) && (ri->chardata != NULL) ) {
+ safestrncpy(ri->channel_title, ri->chardata, sizeof ri->channel_title);
+ striplt(ri->channel_title);
+ }
+
+ if ( (!strcasecmp(el, "guid")) && (ri->chardata != NULL) ) {
if (ri->guid != NULL) free(ri->guid);
striplt(ri->chardata);
ri->guid = strdup(ri->chardata);
}
- if (!strcasecmp(el, "title")) {
+ if ( (!strcasecmp(el, "title")) && (ri->chardata != NULL) ) {
if (ri->title != NULL) free(ri->title);
striplt(ri->chardata);
ri->title = strdup(ri->chardata);
}
- if (!strcasecmp(el, "link")) {
+ if ( (!strcasecmp(el, "link")) && (ri->chardata != NULL) ) {
if (ri->link != NULL) free(ri->link);
striplt(ri->chardata);
ri->link = strdup(ri->chardata);
}
- if (!strcasecmp(el, "description")) {
+ if ( (!strcasecmp(el, "description")) && (ri->chardata != NULL) ) {
if (ri->description != NULL) free(ri->description);
ri->description = strdup(ri->chardata);
}
- if ( (!strcasecmp(el, "pubdate")) || (!strcasecmp(el, "date")) ) {
+ if ( ((!strcasecmp(el, "pubdate")) || (!strcasecmp(el, "date"))) && (ri->chardata != NULL) ) {
striplt(ri->chardata);
ri->pubdate = rdf_parsedate(ri->chardata);
}
if (!strcasecmp(el, "item")) {
+ --ri->item_tag_nesting;
rss_save_item(ri);
}
if ( (!strcasecmp(el, "rss")) || (!strcasecmp(el, "rdf")) ) {
- lprintf(CTDL_DEBUG, "KILL THE PARSER, KILL THE PARSER, KILL THE PARSER!\n");
+ CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n");
ri->done_parsing = 1;
}
/*
- * Parses an url into hostname, port number and resource identifier.
+ * Parse a URL into host, port number, and resource identifier.
*/
int parse_url(char *url, char *hostname, int *port, char *identifier)
{
- char protocol[1024], scratch[1024];
+ char protocol[1024];
+ char scratch[1024];
char *ptr = NULL;
char *nptr = NULL;
if (!ptr) {
return(1); /* no protocol specified */
}
+
strcpy(ptr, "");
strcpy(protocol, scratch);
if (strcmp(protocol, "http")) {
}
-
-
-
-
/*
* Begin a feed parse
*/
XML_Parser xp;
int sock = (-1);
int got_bytes = (-1);
+ int redirect_count = 0;
+ /* Parse the URL */
+ if (parse_url(url, rsshost, &rssport, rssurl) != 0) {
+ CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", url);
+ }
+
+ if (CtdlThreadCheckStop())
+ return;
xp = XML_ParserCreateNS("UTF-8", ':');
if (!xp) {
- lprintf(CTDL_ALERT, "Cannot create XML parser!\n");
+ CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n");
return;
}
XML_SetCharacterDataHandler(xp, rss_xml_chardata);
XML_SetUserData(xp, &ri);
- /* Parse the URL */
-
-retry: sock = (-1);
- if (parse_url(url, rsshost, &rssport, rssurl) != 0) {
- lprintf(CTDL_ALERT, "Invalid URL: %s\n", url);
- }
- else {
- lprintf(CTDL_NOTICE, "Connecting to <%s>\n", rsshost);
- sprintf(buf, "%d", rssport);
- sock = sock_connect(rsshost, buf, "tcp");
+ if (CtdlThreadCheckStop())
+ {
+ XML_ParserFree(xp);
+ return;
}
-
+
+retry: CtdlLogPrintf(CTDL_NOTICE, "Connecting to <%s>\n", rsshost);
+ sprintf(buf, "%d", rssport);
+ sock = sock_connect(rsshost, buf, "tcp");
if (sock >= 0) {
- lprintf(CTDL_DEBUG, "Connected!\n");
+ CtdlLogPrintf(CTDL_DEBUG, "Connected!\n");
+
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
+
+ snprintf(buf, sizeof buf, "GET %s HTTP/1.0", rssurl);
+ CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf);
+ sock_puts(sock, buf);
+
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
+
+ snprintf(buf, sizeof buf, "Host: %s", rsshost);
+ CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf);
+ sock_puts(sock, buf);
- snprintf(buf, sizeof buf, "GET %s HTTP/1.0\r", rssurl);
- lprintf(CTDL_DEBUG, "<%s\n", buf);
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
+
+ snprintf(buf, sizeof buf, "User-Agent: %s", CITADEL);
+ CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf);
sock_puts(sock, buf);
- snprintf(buf, sizeof buf, "Server: %s\r", rsshost);
- lprintf(CTDL_DEBUG, "<%s\n", buf);
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
+
+ snprintf(buf, sizeof buf, "Accept: */*");
+ CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf);
sock_puts(sock, buf);
- sock_puts(sock, "\r");
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
+
+ sock_puts(sock, "");
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
+
if (sock_getln(sock, buf, sizeof buf) >= 0) {
- lprintf(CTDL_DEBUG, ">%s\n", buf);
+ CtdlLogPrintf(CTDL_DEBUG, ">%s\n", buf);
remove_token(buf, 0, ' ');
+
+ /* 200 OK */
if (buf[0] == '2') {
- while (got_bytes = sock_getln(sock, buf, sizeof buf),
+ while (got_bytes = sock_getln(sock, buf, sizeof buf),
(got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) {
- /* FIXME handle 302 redirects!! */
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
+ /* discard headers */
}
-
while (got_bytes = sock_read(sock, buf, sizeof buf, 0),
((got_bytes>=0) && (ri.done_parsing == 0)) ) {
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
XML_Parse(xp, buf, got_bytes, 0);
}
if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1);
}
+
+ /* 30X redirect */
+ else if ( (!strncmp(buf, "30", 2)) && (redirect_count < 16) ) {
+ while (got_bytes = sock_getln(sock, buf, sizeof buf),
+ (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) {
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
+ if (!strncasecmp(buf, "Location:", 9)) {
+ ++redirect_count;
+ strcpy(buf, &buf[9]);
+ striplt(buf);
+ if (parse_url(buf, rsshost, &rssport, rssurl) == 0) {
+ sock_close(sock);
+ goto retry;
+ }
+ else {
+ CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", buf);
+ }
+ }
+ }
+ }
+
}
+shutdown:
sock_close(sock);
}
else {
- lprintf(CTDL_ERR, "Could not connect: %s\n", strerror(errno));
+ CtdlLogPrintf(CTDL_ERR, "Could not connect: %s\n", strerror(errno));
}
XML_ParserFree(xp);
assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir);
+ if (CtdlThreadCheckStop())
+ return;
+
/* Only do net processing for rooms that have netconfigs */
fp = fopen(filename, "r");
if (fp == NULL) {
return;
}
- while (fgets(buf, sizeof buf, fp) != NULL) {
+ while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) {
buf[strlen(buf)-1] = 0;
extract_token(instr, buf, 0, '|', sizeof instr);
if (!strcasecmp(instr, "rssclient")) {
+
+ use_this_rncptr = NULL;
+
extract_token(feedurl, buf, 1, '|', sizeof feedurl);
/* If any other rooms have requested the same feed, then we will just add this
/*
* Scan for rooms that have RSS client requests configured
*/
-void rssclient_scan(void) {
+void *rssclient_scan(void *args) {
static time_t last_run = 0L;
static int doing_rssclient = 0;
struct rssnetcfg *rptr = NULL;
+ struct CitContext rssclientCC;
- /*
- * Run RSS client no more frequently than once every n seconds
- */
- if ( (time(NULL) - last_run) < config.c_net_freq ) {
- return;
- }
+ /* Give this thread its own private CitContext */
+ memset(&rssclientCC, 0, sizeof(struct CitContext));
+ rssclientCC.internal_pgm = 1;
+ rssclientCC.cs_pid = 0;
+ pthread_setspecific(MyConKey, (void *)&rssclientCC );
+
+ CtdlThreadAllocTSD();
/*
* This is a simple concurrency check to make sure only one rssclient run
* don't really require extremely fine granularity here, we'll do it
* with a static variable instead.
*/
- if (doing_rssclient) return;
+ if (doing_rssclient) return NULL;
doing_rssclient = 1;
- lprintf(CTDL_DEBUG, "rssclient started\n");
+ CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n");
ForEachRoom(rssclient_scan_room, NULL);
- while (rnclist != NULL) {
+ while (rnclist != NULL && !CtdlThreadCheckStop()) {
rss_do_fetching(rnclist->url, rnclist->rooms);
rptr = rnclist;
rnclist = rnclist->next;
free(rptr);
}
- lprintf(CTDL_DEBUG, "rssclient ended\n");
+ CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n");
last_run = time(NULL);
doing_rssclient = 0;
+ if (!CtdlThreadCheckStop())
+ CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq);
+ else
+ CtdlLogPrintf(CTDL_DEBUG, "rssclient: Task STOPPED.\n");
+ return NULL;
}
-#endif /* HAVE_EXPAT */
-
CTDL_MODULE_INIT(rssclient)
{
-#ifdef HAVE_EXPAT
- CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);
-#else
- lprintf(CTDL_INFO, "This server is missing the Expat XML parser. RSS client will be disabled.\n");
-#endif
+ if (threading)
+ {
+ CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, 0);
+ }
/* return our Subversion id for the Log */
return "$Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $";
}