From ddc8b9b2eb5b2247dd03dc05836000f411374048 Mon Sep 17 00:00:00 2001 From: Art Cancro Date: Mon, 19 May 2008 01:36:58 +0000 Subject: [PATCH] RSS client now uses libcurl instead of the crappy built-in HTTP client I wrote for this purpose. Not only is it more robust, but it should be able to handle HTTPS as well. Please note that because of this change, libcurl is now a dependency. --- citadel/configure.ac | 27 ++++ citadel/modules/rssclient/serv_rssclient.c | 142 ++++++--------------- 2 files changed, 66 insertions(+), 103 deletions(-) diff --git a/citadel/configure.ac b/citadel/configure.ac index b11c1175a..c36130b04 100644 --- a/citadel/configure.ac +++ b/citadel/configure.ac @@ -617,6 +617,33 @@ AC_CHECK_HEADER(expat.h, ) + + + + + +dnl Checks for libcurl. +AC_CHECK_HEADER(curl/curl.h, + [AC_CHECK_LIB(curl, curl_easy_init, + [ + SERVER_LIBS="-lcurl $SERVER_LIBS" + ], + [ + AC_MSG_ERROR(libcurl was not found and is required. More info: http://www.citadel.org/doku.php/installation:start) + ] + , + )], + [ + AC_MSG_ERROR(curl/curl.h was not found and is required. More info: http://www.citadel.org/doku.php/installation:start) + ] +) + + + + + + + dnl Checks for the libical iCalendar library. AC_CHECK_HEADER(ical.h, [AC_CHECK_LIB(ical, icalcomponent_new, diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 2aaa140ef..fdf509f2e 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "citadel.h" #include "server.h" @@ -331,34 +332,46 @@ void rss_xml_chardata(void *data, const XML_Char *s, int len) { +/* + * Callback function for passing libcurl's output to expat for parsing + */ +size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream) +{ + XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0); + return (size*nmemb); +} + + + /* * Begin a feed parse */ void rss_do_fetching(char *url, char *rooms) { - char buf[1024]; - char rsshost[1024]; - int rssport = 80; - char rssurl[1024]; struct rss_item ri; XML_Parser xp; - int sock = (-1); - int got_bytes = (-1); - int redirect_count = 0; - /* Parse the URL */ - if (parse_url(url, rsshost, &rssport, rssurl) != 0) { - CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", url); - } - - if (CtdlThreadCheckStop()) + CURL *curl; + CURLcode res; + + curl = curl_easy_init(); + if (!curl) { + CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n"); return; + } xp = XML_ParserCreateNS("UTF-8", ':'); if (!xp) { CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n"); + curl_easy_cleanup(curl); return; } + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, xp); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback); + memset(&ri, 0, sizeof(struct rss_item)); ri.roomlist = rooms; XML_SetElementHandler(xp, rss_xml_start, rss_xml_end); @@ -368,103 +381,26 @@ void rss_do_fetching(char *url, char *rooms) { if (CtdlThreadCheckStop()) { XML_ParserFree(xp); + curl_easy_cleanup(curl); return; } -retry: CtdlLogPrintf(CTDL_NOTICE, "Connecting to <%s>\n", rsshost); - sprintf(buf, "%d", rssport); - sock = sock_connect(rsshost, buf, "tcp"); - if (sock >= 0) { - CtdlLogPrintf(CTDL_DEBUG, "Connected!\n"); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - snprintf(buf, sizeof buf, "GET %s HTTP/1.0", rssurl); - CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - snprintf(buf, sizeof buf, "Host: %s", rsshost); - CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - snprintf(buf, sizeof buf, "User-Agent: %s", CITADEL); - CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - snprintf(buf, sizeof buf, "Accept: */*"); - CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); - sock_puts(sock, buf); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - sock_puts(sock, ""); - - if (CtdlThreadCheckStop()) - goto shutdown ; - - if (sock_getln(sock, buf, sizeof buf) >= 0) { - CtdlLogPrintf(CTDL_DEBUG, ">%s\n", buf); - remove_token(buf, 0, ' '); - - /* 200 OK */ - if (buf[0] == '2') { - - while (got_bytes = sock_getln(sock, buf, sizeof buf), - (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) { - if (CtdlThreadCheckStop()) - goto shutdown ; - /* discard headers */ - } + if (CtdlThreadCheckStop()) + goto shutdown ; - while (got_bytes = sock_read(sock, buf, sizeof buf, 0), - ((got_bytes>=0) && (ri.done_parsing == 0)) ) { - if (CtdlThreadCheckStop()) - goto shutdown ; - XML_Parse(xp, buf, got_bytes, 0); - } - if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1); - } - /* 30X redirect */ - else if ( (!strncmp(buf, "30", 2)) && (redirect_count < 16) ) { - while (got_bytes = sock_getln(sock, buf, sizeof buf), - (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) { - if (CtdlThreadCheckStop()) - goto shutdown ; - if (!strncasecmp(buf, "Location:", 9)) { - ++redirect_count; - strcpy(buf, &buf[9]); - striplt(buf); - if (parse_url(buf, rsshost, &rssport, rssurl) == 0) { - sock_close(sock); - goto retry; - } - else { - CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", buf); - } - } - } - } + res = curl_easy_perform(curl); + //while got bytes + //XML_Parse(xp, buf, got_bytes, 0); - } -shutdown: - sock_close(sock); - } - else { - CtdlLogPrintf(CTDL_ERR, "Could not connect: %s\n", strerror(errno)); - } + if (CtdlThreadCheckStop()) + goto shutdown ; + if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1); + + +shutdown: + curl_easy_cleanup(curl); XML_ParserFree(xp); /* Free the feed item data structure */ -- 2.39.2