X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=47ef682073391851d5941330ac685298f0a58446;hb=8c47559cb5ae97ec0fa35660ee16fd61a9451c72;hp=b3ff2b7a4fcde65f5db7800a3c4ad498b25afa25;hpb=6c27cc8c0a96c5670c590b830f901b22bd257ae9;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index b3ff2b7a4..47ef68207 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -1,8 +1,23 @@ /* - * $Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $ + * $Id$ * * Bring external RSS feeds into rooms. * + * Copyright (c) 2007-2009 by the citadel.org team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -25,12 +40,15 @@ #include #include #include +#include +#include #include #include "citadel.h" #include "server.h" #include "citserver.h" #include "support.h" #include "config.h" +#include "threads.h" #include "room_ops.h" #include "ctdl_module.h" #include "clientsocket.h" @@ -40,9 +58,6 @@ #include "citadel_dirs.h" #include "md5.h" -#ifdef HAVE_EXPAT -#include - struct rssnetcfg { struct rssnetcfg *next; @@ -62,6 +77,7 @@ struct rss_item { time_t pubdate; char channel_title[256]; int item_tag_nesting; + char *author_or_creator; }; struct rssnetcfg *rnclist = NULL; @@ -98,10 +114,10 @@ void rss_save_item(struct rss_item *ri) { else { MD5Init(&md5context); if (ri->title != NULL) { - MD5Update(&md5context, ri->title, strlen(ri->title)); + MD5Update(&md5context, (unsigned char*)ri->title, strlen(ri->title)); } if (ri->link != NULL) { - MD5Update(&md5context, ri->link, strlen(ri->link)); + MD5Update(&md5context, (unsigned char*)ri->link, strlen(ri->link)); } MD5Final(rawdigest, &md5context); for (i=0; icm_magic = CTDLMESSAGE_MAGIC; msg->cm_anon_type = MES_NORMAL; msg->cm_format_type = FMT_RFC822; - msg->cm_fields['A'] = strdup("rss"); + + if (ri->author_or_creator != NULL) { + msg->cm_fields['A'] = html_to_ascii(ri->author_or_creator, + strlen(ri->author_or_creator), 512, 0); + striplt(msg->cm_fields['A']); + } + else { + msg->cm_fields['A'] = strdup("rss"); + } + msg->cm_fields['N'] = strdup(NODENAME); - msg->cm_fields['U'] = strdup(ri->title); + if (ri->title != NULL) { + msg->cm_fields['U'] = html_to_ascii(ri->title, strlen(ri->title), 512, 0); + striplt(msg->cm_fields['U']); + } msg->cm_fields['T'] = malloc(64); snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); - if (!IsEmptyStr(ri->channel_title)) { - msg->cm_fields['O'] = strdup(ri->channel_title); + if (ri->channel_title != NULL) { + if (!IsEmptyStr(ri->channel_title)) { + msg->cm_fields['O'] = strdup(ri->channel_title); + } } - - msglen = 1024 + strlen(ri->link) + strlen(ri->description) ; + if (ri->link == NULL) + ri->link = strdup(""); + msglen += 1024 + strlen(ri->link) + strlen(ri->description) ; msg->cm_fields['M'] = malloc(msglen); snprintf(msg->cm_fields['M'], msglen, - "Content-type: text/html\r\n\r\n" + "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" "\n" "%s

\n" "%s\n" @@ -161,7 +192,7 @@ void rss_save_item(struct rss_item *ri) { ri->link, ri->link ); - CtdlSubmitMsg(msg, recp, NULL); + CtdlSubmitMsg(msg, recp, NULL, 0); CtdlFreeMessage(msg); /* write the uidl to the use table so we don't store this item again */ @@ -232,6 +263,8 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) { ri->title = NULL; if (ri->link != NULL) free(ri->link); ri->link = NULL; + if (ri->author_or_creator != NULL) free(ri->author_or_creator); + ri->author_or_creator = NULL; if (ri->description != NULL) free(ri->description); ri->description = NULL; @@ -291,13 +324,19 @@ void rss_xml_end(void *data, const char *supplied_el) { ri->pubdate = rdf_parsedate(ri->chardata); } + if ( ((!strcasecmp(el, "author")) || (!strcasecmp(el, "creator"))) && (ri->chardata != NULL) ) { + if (ri->author_or_creator != NULL) free(ri->author_or_creator); + striplt(ri->chardata); + ri->author_or_creator = strdup(ri->chardata); + } + if (!strcasecmp(el, "item")) { --ri->item_tag_nesting; rss_save_item(ri); } if ( (!strcasecmp(el, "rss")) || (!strcasecmp(el, "rdf")) ) { - lprintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); + CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); ri->done_parsing = 1; } @@ -332,160 +371,89 @@ void rss_xml_chardata(void *data, const XML_Char *s, int len) { -/* - * Parse a URL into host, port number, and resource identifier. +/* + * Callback function for passing libcurl's output to expat for parsing */ -int parse_url(char *url, char *hostname, int *port, char *identifier) +size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream) { - char protocol[1024]; - char scratch[1024]; - char *ptr = NULL; - char *nptr = NULL; - - strcpy(scratch, url); - ptr = (char *)strchr(scratch, ':'); - if (!ptr) { - return(1); /* no protocol specified */ - } - - strcpy(ptr, ""); - strcpy(protocol, scratch); - if (strcmp(protocol, "http")) { - return(2); /* not HTTP */ - } - - strcpy(scratch, url); - ptr = (char *) strstr(scratch, "//"); - if (!ptr) { - return(3); /* no server specified */ - } - ptr += 2; - - strcpy(hostname, ptr); - nptr = (char *)strchr(ptr, ':'); - if (!nptr) { - *port = 80; /* default */ - nptr = (char *)strchr(hostname, '/'); - } - else { - sscanf(nptr, ":%d", port); - nptr = (char *)strchr(hostname, ':'); - } - - if (nptr) { - *nptr = '\0'; - } - - nptr = (char *)strchr(ptr, '/'); - - if (!nptr) { - return(4); /* no url specified */ - } - - strcpy(identifier, nptr); - return(0); + XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0); + return (size*nmemb); } + /* * Begin a feed parse */ void rss_do_fetching(char *url, char *rooms) { - char buf[1024]; - char rsshost[1024]; - int rssport = 80; - char rssurl[1024]; struct rss_item ri; XML_Parser xp; - int sock = (-1); - int got_bytes = (-1); - int redirect_count = 0; - /* Parse the URL */ - if (parse_url(url, rsshost, &rssport, rssurl) != 0) { - lprintf(CTDL_ALERT, "Invalid URL: %s\n", url); + CURL *curl; + CURLcode res; + char errmsg[1024] = ""; + + CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", url); + + curl = curl_easy_init(); + if (!curl) { + CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n"); + return; } xp = XML_ParserCreateNS("UTF-8", ':'); if (!xp) { - lprintf(CTDL_ALERT, "Cannot create XML parser!\n"); + CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n"); + curl_easy_cleanup(curl); return; } + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, xp); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback); + curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errmsg); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); +#ifdef CURLOPT_HTTP_CONTENT_DECODING + curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, 1); + curl_easy_setopt(curl, CURLOPT_ENCODING, ""); +#endif + curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180); /* die after 180 seconds */ + if (!IsEmptyStr(config.c_ip_addr)) { + curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr); + } + memset(&ri, 0, sizeof(struct rss_item)); ri.roomlist = rooms; XML_SetElementHandler(xp, rss_xml_start, rss_xml_end); XML_SetCharacterDataHandler(xp, rss_xml_chardata); XML_SetUserData(xp, &ri); -retry: lprintf(CTDL_NOTICE, "Connecting to <%s>\n", rsshost); - sprintf(buf, "%d", rssport); - sock = sock_connect(rsshost, buf, "tcp"); - if (sock >= 0) { - lprintf(CTDL_DEBUG, "Connected!\n"); - - snprintf(buf, sizeof buf, "GET %s HTTP/1.0", rssurl); - lprintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - snprintf(buf, sizeof buf, "Host: %s", rsshost); - lprintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - snprintf(buf, sizeof buf, "User-Agent: %s", CITADEL); - lprintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - snprintf(buf, sizeof buf, "Accept: */*"); - lprintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - sock_puts(sock, ""); - - if (sock_getln(sock, buf, sizeof buf) >= 0) { - lprintf(CTDL_DEBUG, ">%s\n", buf); - remove_token(buf, 0, ' '); - - /* 200 OK */ - if (buf[0] == '2') { + if (CtdlThreadCheckStop()) + { + XML_ParserFree(xp); + curl_easy_cleanup(curl); + return; + } + + if (CtdlThreadCheckStop()) + goto shutdown ; - while (got_bytes = sock_getln(sock, buf, sizeof buf), - (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) { - /* discard headers */ - } + res = curl_easy_perform(curl); + if (res) { + CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", res, errmsg); + } - while (got_bytes = sock_read(sock, buf, sizeof buf, 0), - ((got_bytes>=0) && (ri.done_parsing == 0)) ) { - XML_Parse(xp, buf, got_bytes, 0); - } - if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1); - } + if (CtdlThreadCheckStop()) + goto shutdown ; - /* 30X redirect */ - else if ( (!strncmp(buf, "30", 2)) && (redirect_count < 16) ) { - while (got_bytes = sock_getln(sock, buf, sizeof buf), - (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) { - if (!strncasecmp(buf, "Location:", 9)) { - ++redirect_count; - strcpy(buf, &buf[9]); - striplt(buf); - if (parse_url(buf, rsshost, &rssport, rssurl) == 0) { - goto retry; - } - else { - lprintf(CTDL_ALERT, "Invalid URL: %s\n", buf); - } - } - } - } + if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1); - } - sock_close(sock); - } - else { - lprintf(CTDL_ERR, "Could not connect: %s\n", strerror(errno)); - } +shutdown: + curl_easy_cleanup(curl); XML_ParserFree(xp); /* Free the feed item data structure */ @@ -495,6 +463,8 @@ retry: lprintf(CTDL_NOTICE, "Connecting to <%s>\n", rsshost); ri.title = NULL; if (ri.link != NULL) free(ri.link); ri.link = NULL; + if (ri.author_or_creator != NULL) free(ri.author_or_creator); + ri.author_or_creator = NULL; if (ri.description != NULL) free(ri.description); ri.description = NULL; if (ri.chardata_len > 0) { @@ -522,13 +492,16 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); + if (CtdlThreadCheckStop()) + return; + /* Only do net processing for rooms that have netconfigs */ fp = fopen(filename, "r"); if (fp == NULL) { return; } - while (fgets(buf, sizeof buf, fp) != NULL) { + while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) { buf[strlen(buf)-1] = 0; extract_token(instr, buf, 0, '|', sizeof instr); @@ -589,14 +562,13 @@ void *rssclient_scan(void *args) { static time_t last_run = 0L; static int doing_rssclient = 0; struct rssnetcfg *rptr = NULL; + struct CitContext rssclientCC; + + /* Give this thread its own private CitContext */ + CtdlFillSystemContext(&rssclientCC, "rssclient"); + citthread_setspecific(MyConKey, (void *)&rssclientCC ); CtdlThreadAllocTSD(); - /* - * Run RSS client no more frequently than once every n seconds - */ -// if ( (time(NULL) - last_run) < config.c_net_freq ) { -// return; -// } /* * This is a simple concurrency check to make sure only one rssclient run @@ -607,10 +579,10 @@ void *rssclient_scan(void *args) { if (doing_rssclient) return NULL; doing_rssclient = 1; - lprintf(CTDL_DEBUG, "rssclient started\n"); + CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n"); ForEachRoom(rssclient_scan_room, NULL); - while (rnclist != NULL) { + while (rnclist != NULL && !CtdlThreadCheckStop()) { rss_do_fetching(rnclist->url, rnclist->rooms); rptr = rnclist; rnclist = rnclist->next; @@ -618,28 +590,24 @@ void *rssclient_scan(void *args) { free(rptr); } - lprintf(CTDL_DEBUG, "rssclient ended\n"); + CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n"); last_run = time(NULL); doing_rssclient = 0; - CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq); + if (!CtdlThreadCheckStop()) + CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq); + else + CtdlLogPrintf(CTDL_DEBUG, "rssclient: Task STOPPED.\n"); return NULL; } -#endif /* HAVE_EXPAT */ - CTDL_MODULE_INIT(rssclient) { if (threading) { -#ifdef HAVE_EXPAT -// CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); + CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version()); CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, 0); -#else - lprintf(CTDL_INFO, "This server is missing the Expat XML parser. RSS client will be disabled.\n"); -#endif } - /* return our Subversion id for the Log */ - return "$Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $"; + return "$Id$"; }