/*
- * $Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $
+ * $Id$
*
* Bring external RSS feeds into rooms.
*
+ * Copyright (c) 2007-2009 by the citadel.org team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <expat.h>
+#include <curl/curl.h>
#include <libcitadel.h>
#include "citadel.h"
#include "server.h"
time_t pubdate;
char channel_title[256];
int item_tag_nesting;
+ char *author_or_creator;
};
struct rssnetcfg *rnclist = NULL;
else {
MD5Init(&md5context);
if (ri->title != NULL) {
- MD5Update(&md5context, ri->title, strlen(ri->title));
+ MD5Update(&md5context, (unsigned char*)ri->title, strlen(ri->title));
}
if (ri->link != NULL) {
- MD5Update(&md5context, ri->link, strlen(ri->link));
+ MD5Update(&md5context, (unsigned char*)ri->link, strlen(ri->link));
}
MD5Final(rawdigest, &md5context);
for (i=0; i<MD5_DIGEST_LEN; i++) {
msg->cm_magic = CTDLMESSAGE_MAGIC;
msg->cm_anon_type = MES_NORMAL;
msg->cm_format_type = FMT_RFC822;
- msg->cm_fields['A'] = strdup("rss");
+
+ if (ri->author_or_creator != NULL) {
+ msg->cm_fields['A'] = html_to_ascii(ri->author_or_creator,
+ strlen(ri->author_or_creator), 512, 0);
+ striplt(msg->cm_fields['A']);
+ }
+ else {
+ msg->cm_fields['A'] = strdup("rss");
+ }
+
msg->cm_fields['N'] = strdup(NODENAME);
- msg->cm_fields['U'] = strdup(ri->title);
+ if (ri->title != NULL) {
+ msg->cm_fields['U'] = html_to_ascii(ri->title, strlen(ri->title), 512, 0);
+ striplt(msg->cm_fields['U']);
+ }
msg->cm_fields['T'] = malloc(64);
snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
- if (!IsEmptyStr(ri->channel_title)) {
- msg->cm_fields['O'] = strdup(ri->channel_title);
+ if (ri->channel_title != NULL) {
+ if (!IsEmptyStr(ri->channel_title)) {
+ msg->cm_fields['O'] = strdup(ri->channel_title);
+ }
}
-
- msglen = 1024 + strlen(ri->link) + strlen(ri->description) ;
+ if (ri->link == NULL)
+ ri->link = strdup("");
+ msglen += 1024 + strlen(ri->link) + strlen(ri->description) ;
msg->cm_fields['M'] = malloc(msglen);
snprintf(msg->cm_fields['M'], msglen,
- "Content-type: text/html\r\n\r\n"
+ "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
"<html><body>\n"
"%s<br><br>\n"
"<a href=\"%s\">%s</a>\n"
ri->link, ri->link
);
- CtdlSubmitMsg(msg, recp, NULL);
+ CtdlSubmitMsg(msg, recp, NULL, 0);
CtdlFreeMessage(msg);
/* write the uidl to the use table so we don't store this item again */
ri->title = NULL;
if (ri->link != NULL) free(ri->link);
ri->link = NULL;
+ if (ri->author_or_creator != NULL) free(ri->author_or_creator);
+ ri->author_or_creator = NULL;
if (ri->description != NULL) free(ri->description);
ri->description = NULL;
ri->pubdate = rdf_parsedate(ri->chardata);
}
+ if ( ((!strcasecmp(el, "author")) || (!strcasecmp(el, "creator"))) && (ri->chardata != NULL) ) {
+ if (ri->author_or_creator != NULL) free(ri->author_or_creator);
+ striplt(ri->chardata);
+ ri->author_or_creator = strdup(ri->chardata);
+ }
+
if (!strcasecmp(el, "item")) {
--ri->item_tag_nesting;
rss_save_item(ri);
-/*
- * Parse a URL into host, port number, and resource identifier.
+/*
+ * Callback function for passing libcurl's output to expat for parsing
*/
-int parse_url(char *url, char *hostname, int *port, char *identifier)
+size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream)
{
- char protocol[1024];
- char scratch[1024];
- char *ptr = NULL;
- char *nptr = NULL;
-
- strcpy(scratch, url);
- ptr = (char *)strchr(scratch, ':');
- if (!ptr) {
- return(1); /* no protocol specified */
- }
-
- strcpy(ptr, "");
- strcpy(protocol, scratch);
- if (strcmp(protocol, "http")) {
- return(2); /* not HTTP */
- }
-
- strcpy(scratch, url);
- ptr = (char *) strstr(scratch, "//");
- if (!ptr) {
- return(3); /* no server specified */
- }
- ptr += 2;
-
- strcpy(hostname, ptr);
- nptr = (char *)strchr(ptr, ':');
- if (!nptr) {
- *port = 80; /* default */
- nptr = (char *)strchr(hostname, '/');
- }
- else {
- sscanf(nptr, ":%d", port);
- nptr = (char *)strchr(hostname, ':');
- }
-
- if (nptr) {
- *nptr = '\0';
- }
-
- nptr = (char *)strchr(ptr, '/');
-
- if (!nptr) {
- return(4); /* no url specified */
- }
-
- strcpy(identifier, nptr);
- return(0);
+ XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0);
+ return (size*nmemb);
}
+
/*
* Begin a feed parse
*/
void rss_do_fetching(char *url, char *rooms) {
- char buf[1024];
- char rsshost[1024];
- int rssport = 80;
- char rssurl[1024];
struct rss_item ri;
XML_Parser xp;
- int sock = (-1);
- int got_bytes = (-1);
- int redirect_count = 0;
- /* Parse the URL */
- if (parse_url(url, rsshost, &rssport, rssurl) != 0) {
- CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", url);
- }
-
- if (CtdlThreadCheckStop())
+ CURL *curl;
+ CURLcode res;
+ char errmsg[1024] = "";
+
+ CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", url);
+
+ curl = curl_easy_init();
+ if (!curl) {
+ CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
return;
+ }
xp = XML_ParserCreateNS("UTF-8", ':');
if (!xp) {
CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n");
+ curl_easy_cleanup(curl);
return;
}
+ curl_easy_setopt(curl, CURLOPT_URL, url);
+ curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
+ curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, xp);
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback);
+ curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errmsg);
+ curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
+#ifdef CURLOPT_HTTP_CONTENT_DECODING
+ curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, 1);
+ curl_easy_setopt(curl, CURLOPT_ENCODING, "");
+#endif
+ curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL);
+ curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180); /* die after 180 seconds */
+ if (!IsEmptyStr(config.c_ip_addr)) {
+ curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr);
+ }
+
memset(&ri, 0, sizeof(struct rss_item));
ri.roomlist = rooms;
XML_SetElementHandler(xp, rss_xml_start, rss_xml_end);
if (CtdlThreadCheckStop())
{
XML_ParserFree(xp);
+ curl_easy_cleanup(curl);
return;
}
-retry: CtdlLogPrintf(CTDL_NOTICE, "Connecting to <%s>\n", rsshost);
- sprintf(buf, "%d", rssport);
- sock = sock_connect(rsshost, buf, "tcp");
- if (sock >= 0) {
- CtdlLogPrintf(CTDL_DEBUG, "Connected!\n");
-
- if (CtdlThreadCheckStop())
- goto shutdown ;
-
- snprintf(buf, sizeof buf, "GET %s HTTP/1.0", rssurl);
- CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf);
- sock_puts(sock, buf);
-
- if (CtdlThreadCheckStop())
- goto shutdown ;
-
- snprintf(buf, sizeof buf, "Host: %s", rsshost);
- CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf);
- sock_puts(sock, buf);
-
- if (CtdlThreadCheckStop())
- goto shutdown ;
-
- snprintf(buf, sizeof buf, "User-Agent: %s", CITADEL);
- CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf);
- sock_puts(sock, buf);
-
- if (CtdlThreadCheckStop())
- goto shutdown ;
-
- snprintf(buf, sizeof buf, "Accept: */*");
- CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf);
- sock_puts(sock, buf);
-
- if (CtdlThreadCheckStop())
- goto shutdown ;
-
- sock_puts(sock, "");
-
- if (CtdlThreadCheckStop())
- goto shutdown ;
-
- if (sock_getln(sock, buf, sizeof buf) >= 0) {
- CtdlLogPrintf(CTDL_DEBUG, ">%s\n", buf);
- remove_token(buf, 0, ' ');
-
- /* 200 OK */
- if (buf[0] == '2') {
-
- while (got_bytes = sock_getln(sock, buf, sizeof buf),
- (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) {
- if (CtdlThreadCheckStop())
- goto shutdown ;
- /* discard headers */
- }
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
- while (got_bytes = sock_read(sock, buf, sizeof buf, 0),
- ((got_bytes>=0) && (ri.done_parsing == 0)) ) {
- if (CtdlThreadCheckStop())
- goto shutdown ;
- XML_Parse(xp, buf, got_bytes, 0);
- }
- if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1);
- }
+ res = curl_easy_perform(curl);
+ if (res) {
+ CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", res, errmsg);
+ }
- /* 30X redirect */
- else if ( (!strncmp(buf, "30", 2)) && (redirect_count < 16) ) {
- while (got_bytes = sock_getln(sock, buf, sizeof buf),
- (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) {
- if (CtdlThreadCheckStop())
- goto shutdown ;
- if (!strncasecmp(buf, "Location:", 9)) {
- ++redirect_count;
- strcpy(buf, &buf[9]);
- striplt(buf);
- if (parse_url(buf, rsshost, &rssport, rssurl) == 0) {
- sock_close(sock);
- goto retry;
- }
- else {
- CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", buf);
- }
- }
- }
- }
+ if (CtdlThreadCheckStop())
+ goto shutdown ;
+
+ if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1);
- }
-shutdown:
- sock_close(sock);
- }
- else {
- CtdlLogPrintf(CTDL_ERR, "Could not connect: %s\n", strerror(errno));
- }
+shutdown:
+ curl_easy_cleanup(curl);
XML_ParserFree(xp);
/* Free the feed item data structure */
ri.title = NULL;
if (ri.link != NULL) free(ri.link);
ri.link = NULL;
+ if (ri.author_or_creator != NULL) free(ri.author_or_creator);
+ ri.author_or_creator = NULL;
if (ri.description != NULL) free(ri.description);
ri.description = NULL;
if (ri.chardata_len > 0) {
struct CitContext rssclientCC;
/* Give this thread its own private CitContext */
- memset(&rssclientCC, 0, sizeof(struct CitContext));
- rssclientCC.internal_pgm = 1;
- rssclientCC.cs_pid = 0;
- pthread_setspecific(MyConKey, (void *)&rssclientCC );
+ CtdlFillSystemContext(&rssclientCC, "rssclient");
+ citthread_setspecific(MyConKey, (void *)&rssclientCC );
CtdlThreadAllocTSD();
{
if (threading)
{
+ CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version());
CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, 0);
}
/* return our Subversion id for the Log */
- return "$Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $";
+ return "$Id$";
}