X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=605b0772d70524977cae7784f783954ecf8784ea;hb=a2fda4eafb51bbf58c04471522aa2d0f116c797e;hp=424aec4894448cd44a8b9771adb65d9524ba34dc;hpb=b4a9bcece3412f41b82bda83f98e1b23dee27a82;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 424aec489..605b0772d 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -25,23 +25,23 @@ #include #include #include +#include +#include #include "citadel.h" #include "server.h" #include "citserver.h" #include "support.h" #include "config.h" -#include "tools.h" +#include "threads.h" #include "room_ops.h" #include "ctdl_module.h" #include "clientsocket.h" #include "msgbase.h" +#include "parsedate.h" #include "database.h" #include "citadel_dirs.h" #include "md5.h" -#ifdef HAVE_EXPAT -#include - struct rssnetcfg { struct rssnetcfg *next; @@ -59,6 +59,8 @@ struct rss_item { char *link; char *description; time_t pubdate; + char channel_title[256]; + int item_tag_nesting; }; struct rssnetcfg *rnclist = NULL; @@ -113,7 +115,7 @@ void rss_save_item(struct rss_item *ri) { cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid)); if (cdbut != NULL) { /* Item has already been seen */ - lprintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid); + CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid); cdb_free(cdbut); /* rewrite the record anyway, to update the timestamp */ @@ -124,6 +126,7 @@ void rss_save_item(struct rss_item *ri) { else { /* Item has not been seen, so save it. */ + if (ri->description == NULL) ri->description = strdup(""); for (i=strlen(ri->description); i>=0; --i) { if (isspace(ri->description[i])) { ri->description[i] = ' '; @@ -140,6 +143,9 @@ void rss_save_item(struct rss_item *ri) { msg->cm_fields['U'] = strdup(ri->title); msg->cm_fields['T'] = malloc(64); snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); + if (!IsEmptyStr(ri->channel_title)) { + msg->cm_fields['O'] = strdup(ri->channel_title); + } msglen = 1024 + strlen(ri->link) + strlen(ri->description) ; msg->cm_fields['M'] = malloc(msglen); @@ -156,13 +162,13 @@ void rss_save_item(struct rss_item *ri) { CtdlSubmitMsg(msg, recp, NULL); CtdlFreeMessage(msg); - free_recipients(recp); /* write the uidl to the use table so we don't store this item again */ strcpy(ut.ut_msgid, utmsgid); ut.ut_timestamp = time(NULL); cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) ); } + free_recipients(recp); } @@ -173,6 +179,7 @@ void rss_save_item(struct rss_item *ri) { time_t rdf_parsedate(char *p) { struct tm tm; + time_t t = 0; if (!p) return 0L; if (strlen(p) < 10) return 0L; @@ -189,21 +196,33 @@ time_t rdf_parsedate(char *p) tm.tm_hour = atoi(&p[11]); tm.tm_min = atoi(&p[14]); } + return mktime(&tm); } - else { - /* FIXME try an imap timestamp conversion */ - } + /* hmm... try RFC822 date stamp format */ - return mktime(&tm); + t = parsedate(p); + if (t > 0) return(t); + + /* yeesh. ok, just return the current date and time. */ + return(time(NULL)); } -void rss_xml_start(void *data, const char *el, const char **attr) { +void rss_xml_start(void *data, const char *supplied_el, const char **attr) { struct rss_item *ri = (struct rss_item *) data; + char el[256]; + char *sep = NULL; + + /* Axe the namespace, we don't care about it */ + safestrncpy(el, supplied_el, sizeof el); + while (sep = strchr(el, ':'), sep) { + strcpy(el, ++sep); + } if (!strcasecmp(el, "item")) { + ++ri->item_tag_nesting; /* Initialize the feed item data structure */ if (ri->guid != NULL) free(ri->guid); @@ -232,48 +251,52 @@ void rss_xml_end(void *data, const char *supplied_el) { char el[256]; char *sep = NULL; - /* Axe the namespace, we don't care about it */ - safestrncpy(el, supplied_el, sizeof el); while (sep = strchr(el, ':'), sep) { strcpy(el, ++sep); } - if (!strcasecmp(el, "guid")) { + if ( (!strcasecmp(el, "title")) && (ri->item_tag_nesting == 0) && (ri->chardata != NULL) ) { + safestrncpy(ri->channel_title, ri->chardata, sizeof ri->channel_title); + striplt(ri->channel_title); + } + + if ( (!strcasecmp(el, "guid")) && (ri->chardata != NULL) ) { if (ri->guid != NULL) free(ri->guid); striplt(ri->chardata); ri->guid = strdup(ri->chardata); } - if (!strcasecmp(el, "title")) { + if ( (!strcasecmp(el, "title")) && (ri->chardata != NULL) ) { if (ri->title != NULL) free(ri->title); striplt(ri->chardata); ri->title = strdup(ri->chardata); } - if (!strcasecmp(el, "link")) { + if ( (!strcasecmp(el, "link")) && (ri->chardata != NULL) ) { if (ri->link != NULL) free(ri->link); striplt(ri->chardata); ri->link = strdup(ri->chardata); } - if (!strcasecmp(el, "description")) { + if ( (!strcasecmp(el, "description")) && (ri->chardata != NULL) ) { if (ri->description != NULL) free(ri->description); ri->description = strdup(ri->chardata); } - if ( (!strcasecmp(el, "pubdate")) || (!strcasecmp(el, "date")) ) { + if ( ((!strcasecmp(el, "pubdate")) || (!strcasecmp(el, "date"))) && (ri->chardata != NULL) ) { striplt(ri->chardata); ri->pubdate = rdf_parsedate(ri->chardata); } if (!strcasecmp(el, "item")) { + --ri->item_tag_nesting; rss_save_item(ri); } if ( (!strcasecmp(el, "rss")) || (!strcasecmp(el, "rdf")) ) { - lprintf(CTDL_DEBUG, "KILL THE PARSER, KILL THE PARSER, KILL THE PARSER!\n"); + CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); ri->done_parsing = 1; } @@ -309,11 +332,12 @@ void rss_xml_chardata(void *data, const XML_Char *s, int len) { /* - * Parses an url into hostname, port number and resource identifier. + * Parse a URL into host, port number, and resource identifier. */ int parse_url(char *url, char *hostname, int *port, char *identifier) { - char protocol[1024], scratch[1024]; + char protocol[1024]; + char scratch[1024]; char *ptr = NULL; char *nptr = NULL; @@ -322,6 +346,7 @@ int parse_url(char *url, char *hostname, int *port, char *identifier) if (!ptr) { return(1); /* no protocol specified */ } + strcpy(ptr, ""); strcpy(protocol, scratch); if (strcmp(protocol, "http")) { @@ -361,10 +386,6 @@ int parse_url(char *url, char *hostname, int *port, char *identifier) } - - - - /* * Begin a feed parse */ @@ -377,11 +398,19 @@ void rss_do_fetching(char *url, char *rooms) { XML_Parser xp; int sock = (-1); int got_bytes = (-1); + int redirect_count = 0; + /* Parse the URL */ + if (parse_url(url, rsshost, &rssport, rssurl) != 0) { + CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", url); + } + + if (CtdlThreadCheckStop()) + return; xp = XML_ParserCreateNS("UTF-8", ':'); if (!xp) { - lprintf(CTDL_ALERT, "Cannot create XML parser!\n"); + CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n"); return; } @@ -391,53 +420,104 @@ void rss_do_fetching(char *url, char *rooms) { XML_SetCharacterDataHandler(xp, rss_xml_chardata); XML_SetUserData(xp, &ri); - /* Parse the URL */ - -retry: sock = (-1); - if (parse_url(url, rsshost, &rssport, rssurl) != 0) { - lprintf(CTDL_ALERT, "Invalid URL: %s\n", url); - } - else { - lprintf(CTDL_NOTICE, "Connecting to <%s>\n", rsshost); - sprintf(buf, "%d", rssport); - sock = sock_connect(rsshost, buf, "tcp"); + if (CtdlThreadCheckStop()) + { + XML_ParserFree(xp); + return; } - + +retry: CtdlLogPrintf(CTDL_NOTICE, "Connecting to <%s>\n", rsshost); + sprintf(buf, "%d", rssport); + sock = sock_connect(rsshost, buf, "tcp"); if (sock >= 0) { - lprintf(CTDL_DEBUG, "Connected!\n"); + CtdlLogPrintf(CTDL_DEBUG, "Connected!\n"); + + if (CtdlThreadCheckStop()) + goto shutdown ; + + snprintf(buf, sizeof buf, "GET %s HTTP/1.0", rssurl); + CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); + sock_puts(sock, buf); + + if (CtdlThreadCheckStop()) + goto shutdown ; + + snprintf(buf, sizeof buf, "Host: %s", rsshost); + CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); + sock_puts(sock, buf); - snprintf(buf, sizeof buf, "GET %s HTTP/1.0\r", rssurl); - lprintf(CTDL_DEBUG, "<%s\n", buf); + if (CtdlThreadCheckStop()) + goto shutdown ; + + snprintf(buf, sizeof buf, "User-Agent: %s", CITADEL); + CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); sock_puts(sock, buf); - snprintf(buf, sizeof buf, "Server: %s\r", rsshost); - lprintf(CTDL_DEBUG, "<%s\n", buf); + if (CtdlThreadCheckStop()) + goto shutdown ; + + snprintf(buf, sizeof buf, "Accept: */*"); + CtdlLogPrintf(CTDL_DEBUG, "<%s\n", buf); sock_puts(sock, buf); - sock_puts(sock, "\r"); + if (CtdlThreadCheckStop()) + goto shutdown ; + + sock_puts(sock, ""); + if (CtdlThreadCheckStop()) + goto shutdown ; + if (sock_getln(sock, buf, sizeof buf) >= 0) { - lprintf(CTDL_DEBUG, ">%s\n", buf); + CtdlLogPrintf(CTDL_DEBUG, ">%s\n", buf); remove_token(buf, 0, ' '); + + /* 200 OK */ if (buf[0] == '2') { - while (got_bytes = sock_getln(sock, buf, sizeof buf), + while (got_bytes = sock_getln(sock, buf, sizeof buf), (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) { - /* FIXME handle 302 redirects!! */ + if (CtdlThreadCheckStop()) + goto shutdown ; + /* discard headers */ } - while (got_bytes = sock_read(sock, buf, sizeof buf, 0), ((got_bytes>=0) && (ri.done_parsing == 0)) ) { + if (CtdlThreadCheckStop()) + goto shutdown ; XML_Parse(xp, buf, got_bytes, 0); } if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1); } + + /* 30X redirect */ + else if ( (!strncmp(buf, "30", 2)) && (redirect_count < 16) ) { + while (got_bytes = sock_getln(sock, buf, sizeof buf), + (got_bytes >= 0 && (strcmp(buf, "")) && (strcmp(buf, "\r"))) ) { + if (CtdlThreadCheckStop()) + goto shutdown ; + if (!strncasecmp(buf, "Location:", 9)) { + ++redirect_count; + strcpy(buf, &buf[9]); + striplt(buf); + if (parse_url(buf, rsshost, &rssport, rssurl) == 0) { + sock_close(sock); + goto retry; + } + else { + CtdlLogPrintf(CTDL_ALERT, "Invalid URL: %s\n", buf); + } + } + } + } + } +shutdown: sock_close(sock); } else { - lprintf(CTDL_ERR, "Could not connect: %s\n", strerror(errno)); + CtdlLogPrintf(CTDL_ERR, "Could not connect: %s\n", strerror(errno)); } XML_ParserFree(xp); @@ -476,17 +556,23 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); + if (CtdlThreadCheckStop()) + return; + /* Only do net processing for rooms that have netconfigs */ fp = fopen(filename, "r"); if (fp == NULL) { return; } - while (fgets(buf, sizeof buf, fp) != NULL) { + while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) { buf[strlen(buf)-1] = 0; extract_token(instr, buf, 0, '|', sizeof instr); if (!strcasecmp(instr, "rssclient")) { + + use_this_rncptr = NULL; + extract_token(feedurl, buf, 1, '|', sizeof feedurl); /* If any other rooms have requested the same feed, then we will just add this @@ -536,17 +622,19 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) /* * Scan for rooms that have RSS client requests configured */ -void rssclient_scan(void) { +void *rssclient_scan(void *args) { static time_t last_run = 0L; static int doing_rssclient = 0; struct rssnetcfg *rptr = NULL; + struct CitContext rssclientCC; - /* - * Run RSS client no more frequently than once every n seconds - */ - if ( (time(NULL) - last_run) < config.c_net_freq ) { - return; - } + /* Give this thread its own private CitContext */ + memset(&rssclientCC, 0, sizeof(struct CitContext)); + rssclientCC.internal_pgm = 1; + rssclientCC.cs_pid = 0; + pthread_setspecific(MyConKey, (void *)&rssclientCC ); + + CtdlThreadAllocTSD(); /* * This is a simple concurrency check to make sure only one rssclient run @@ -554,13 +642,13 @@ void rssclient_scan(void) { * don't really require extremely fine granularity here, we'll do it * with a static variable instead. */ - if (doing_rssclient) return; + if (doing_rssclient) return NULL; doing_rssclient = 1; - lprintf(CTDL_DEBUG, "rssclient started\n"); + CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n"); ForEachRoom(rssclient_scan_room, NULL); - while (rnclist != NULL) { + while (rnclist != NULL && !CtdlThreadCheckStop()) { rss_do_fetching(rnclist->url, rnclist->rooms); rptr = rnclist; rnclist = rnclist->next; @@ -568,21 +656,23 @@ void rssclient_scan(void) { free(rptr); } - lprintf(CTDL_DEBUG, "rssclient ended\n"); + CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n"); last_run = time(NULL); doing_rssclient = 0; + if (!CtdlThreadCheckStop()) + CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq); + else + CtdlLogPrintf(CTDL_DEBUG, "rssclient: Task STOPPED.\n"); + return NULL; } -#endif /* HAVE_EXPAT */ - CTDL_MODULE_INIT(rssclient) { -#ifdef HAVE_EXPAT - CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); -#else - lprintf(CTDL_INFO, "This server is missing the Expat XML parser. RSS client will be disabled.\n"); -#endif + if (threading) + { + CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, 0); + } /* return our Subversion id for the Log */ return "$Id: serv_rssclient.c 5652 2007-10-29 20:14:48Z ajc $"; }