X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=b991b4df13b2c372e794d36fcd98378d3af3ffb5;hb=4a8abab15d6662f2bca504e496013a42d7a65efd;hp=39fdd7bb39d91d1436f1c332e8ed3f4a13757d68;hpb=496debf89657e8b2d09b60b8595d7046f494ad62;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 39fdd7bb3..b991b4df1 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -1,23 +1,21 @@ /* - * $Id$ - * * Bring external RSS feeds into rooms. * - * Copyright (c) 2007-2009 by the citadel.org team + * Copyright (c) 2007-2010 by the citadel.org team * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -58,49 +56,86 @@ #include "md5.h" #include "context.h" -typedef struct rssnetcfg rssnetcfg; +typedef struct rssnetcfg rssnetcfg; struct rssnetcfg { rssnetcfg *next; char url[256]; char *rooms; time_t last_error_when; int ItemType; + time_t next_poll; }; -#define RSS_UNSET 0 -#define RSS_RSS 1 -#define RSS_ATOM 2 +#define RSS_UNSET (1<<0) +#define RSS_RSS (1<<1) +#define RSS_ATOM (1<<2) +#define RSS_REQUIRE_BUF (1<<3) typedef struct _rss_item { - char *chardata; - int chardata_len; char *roomlist; int done_parsing; - char *guid; - char *title; - char *link; - char *description; + StrBuf *guid; + StrBuf *title; + StrBuf *link; + StrBuf *linkTitle; + StrBuf *reLink; + StrBuf *reLinkTitle; + StrBuf *description; time_t pubdate; - char channel_title[256]; + StrBuf *channel_title; int item_tag_nesting; - char *author_or_creator; - char *author_url; + StrBuf *author_or_creator; + StrBuf *author_url; + StrBuf *author_email; }rss_item; +typedef void (*rss_handler_func)(StrBuf *CData, + rss_item *ri, + rssnetcfg *Cfg, + const char** Attr); + +typedef struct __rss_xml_handler { + int Flags; + rss_handler_func Handler; +}rss_xml_handler; + + typedef struct _rsscollection { + StrBuf *CData; + StrBuf *Key; + rss_item *Item; rssnetcfg *Cfg; - + rss_xml_handler *Current; } rsscollection; struct rssnetcfg *rnclist = NULL; - +HashList *StartHandlers = NULL; +HashList *EndHandlers = NULL; +HashList *KnownNameSpaces = NULL; +HashList *UrlShorteners = NULL; +void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len) +{ + rss_xml_handler *h; + h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler)); + h->Flags = Flags; + h->Handler = Handler; + Put(StartHandlers, key, len, h, NULL); +} +void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len) +{ + rss_xml_handler *h; + h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler)); + h->Flags = Flags; + h->Handler = Handler; + Put(EndHandlers, key, len, h, NULL); +} #if 0 -#ifdef HAVE_ICONV +//#ifdef HAVE_ICONV #include @@ -123,7 +158,7 @@ fill_encoding_info (const char *charset, XML_Encoding * info) { iconv_t cd = (iconv_t)(-1); int flag; - CtdlLogPrintf(0, "RSS: fill encoding info ...\n"); + syslog(LOG_EMERG, "RSS: fill encoding info ...\n"); #if G_BYTE_ORDER == G_LITTLE_ENDIAN cd = iconv_open ("UCS-2LE", charset); @@ -137,10 +172,10 @@ fill_encoding_info (const char *charset, XML_Encoding * info) } { - unsigned short out; + unsigned short out = 0; unsigned char buf[4]; unsigned int i0, i1, i2; - int result; + int result = 0; flag = 0; for (i0 = 0; i0 < 0x100; i0++) { @@ -201,7 +236,7 @@ iconv_convertor (void *data, const char *s) { XML_Encoding *info = data; int res; - CtdlLogPrintf(0, "RSS: Converting ...\n"); + syslog(LOG_EMERG, "RSS: Converting ...\n"); if (s == NULL) return -1; @@ -234,7 +269,7 @@ handle_unknown_xml_encoding (void *encodingHandleData, XML_Encoding * info) { int result; - CtdlLogPrintf(0, "RSS: unknown encoding ...\n"); + syslog(LOG_EMERG, "RSS: unknown encoding ...\n"); result = fill_encoding_info (name, info); if (result >= 0) { @@ -270,13 +305,236 @@ handle_unknown_xml_encoding (void *encodingHandleData, return 0; } +///#endif #endif +size_t GetLocationString( void *ptr, size_t size, size_t nmemb, void *userdata) +{ +#define LOCATION "location" + if (strncasecmp((char*)ptr, LOCATION, sizeof(LOCATION) - 1) == 0) + { + StrBuf *pURL = (StrBuf*) userdata; + char *pch = (char*) ptr; + char *pche; + + pche = pch + (size * nmemb); + pch += sizeof(LOCATION); + + while (isspace(*pch) || (*pch == ':')) + pch ++; + + while (isspace(*pche) || (*pche == '\0')) + pche--; + + FlushStrBuf(pURL); + StrBufPlain(pURL, pch, pche - pch + 1); + } + return size * nmemb; +} + +int LookupUrl(StrBuf *ShorterUrlStr) +{ + CURL *curl; + char errmsg[1024] = ""; + StrBuf *Answer; + int rc = 0; + + curl = curl_easy_init(); + if (!curl) { + syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); + return 0; + } + Answer = NewStrBufPlain(NULL, SIZ); + + curl_easy_setopt(curl, CURLOPT_URL, ChrPtr(ShorterUrlStr)); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, Answer); +// curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlFillStrBuf_callback); + curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errmsg); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); +#ifdef CURLOPT_HTTP_CONTENT_DECODING + curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, 1); + curl_easy_setopt(curl, CURLOPT_ENCODING, ""); #endif + curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180); /* die after 180 seconds */ + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 0); + + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION , GetLocationString); + curl_easy_setopt(curl, CURLOPT_WRITEHEADER, ShorterUrlStr); + + + if ( + (!IsEmptyStr(config.c_ip_addr)) + && (strcmp(config.c_ip_addr, "*")) + && (strcmp(config.c_ip_addr, "::")) + && (strcmp(config.c_ip_addr, "0.0.0.0")) + ) { + curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr); + } + + if (server_shutting_down) + goto shutdown ; + + rc = curl_easy_perform(curl); + if (rc) { + syslog(LOG_ALERT, "libcurl error %d: %s\n", rc, errmsg); + rc = 0; + } + else + rc = 1; + +shutdown: + FreeStrBuf(&Answer); + curl_easy_cleanup(curl); + + return rc; + +} + + + +void CrawlMessageForShorterUrls(HashList *pUrls, StrBuf *Message) +{ + int nHits = 0; + void *pv; + int nShorter = 0; + const char *pch; + const char *pUrl; + ConstStr *pCUrl; + + while (GetHash(UrlShorteners, IKEY(nShorter), &pv)) + { + nShorter++; + pch = ChrPtr(Message); + pUrl = strstr(pch, ChrPtr((StrBuf*)pv)); + while ((pUrl != NULL) && (nHits < 99)) + { + pCUrl = malloc(sizeof(ConstStr)); + + pCUrl->Key = pUrl; + pch = pUrl + StrLength((StrBuf*)pv); + while (isalnum(*pch)||(*pch == '-')||(*pch == '/')) + pch++; + pCUrl->len = pch - pCUrl->Key; + + Put(pUrls, IKEY(nHits), pCUrl, NULL); + nHits ++; + pUrl = strstr(pch, ChrPtr((StrBuf*)pv)); + } + } +} + +int SortConstStrByPosition(const void *Item1, const void *Item2) +{ + const ConstStr *p1, *p2; + p1 = (const ConstStr*) Item1; + p2 = (const ConstStr*) Item2; + if (p1->Key == p2->Key) + return 0; + if (p1->Key > p2->Key) + return 1; + return -1; +} + +void ExpandShortUrls(StrBuf *Message) +{ + StrBuf *Shadow; + HashList *pUrls; + ConstStr *pCUrl; + const char *pch; + const char *pche; + + /* we just suspect URL shorteners to be inside of feeds from twitter + * or other short content messages, so don't crawl through real blogs. + */ + if (StrLength(Message) > 500) + return; + + pUrls = NewHash(1, Flathash); + CrawlMessageForShorterUrls(pUrls, Message); + + if (GetCount(pUrls) > 0) + { + StrBuf *ShorterUrlStr; + HashPos *Pos; + const char *RetrKey; + void *pv; + long len; + + Shadow = NewStrBufPlain(NULL, StrLength(Message)); + SortByPayload (pUrls, SortConstStrByPosition); + + ShorterUrlStr = NewStrBufPlain(NULL, StrLength(Message)); + + pch = ChrPtr(Message); + pche = pch + StrLength(Message); + Pos = GetNewHashPos(pUrls, 1); + while (GetNextHashPos(pUrls, Pos, &len, &RetrKey, &pv)) + { + pCUrl = (ConstStr*) pv; + + if (pch != pCUrl->Key) + StrBufAppendBufPlain(Shadow, pch, pCUrl->Key - pch, 0); + + StrBufPlain(ShorterUrlStr, CKEY(*pCUrl)); + if (LookupUrl(ShorterUrlStr)) + { + StrBufAppendBufPlain(Shadow, HKEY(""), 0); + StrBufAppendBuf(Shadow, ShorterUrlStr, 0); + StrBufAppendBufPlain(Shadow, HKEY("["), 0); + StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0); + StrBufAppendBufPlain(Shadow, HKEY("]"), 0); + } + else + { + StrBufAppendBufPlain(Shadow, HKEY("Key, pCUrl->len, 0); + StrBufAppendBufPlain(Shadow, HKEY("\">"), 0); + StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0); + StrBufAppendBufPlain(Shadow, HKEY(""), 0); + } + pch = pCUrl->Key + pCUrl->len + 1; + + } + if (pch < pche) + StrBufAppendBufPlain(Shadow, pch, pche - pch, 0); + FlushStrBuf(Message); + StrBufAppendBuf(Message, Shadow, 0); + + FreeStrBuf(&ShorterUrlStr); + FreeStrBuf(&Shadow); + DeleteHashPos(&Pos); + } + + DeleteHash(&pUrls); +} + +void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title) +{ + if (StrLength(link) > 0) + { + StrBufAppendBufPlain(Message, HKEY(""), 0); + if (StrLength(LinkTitle) > 0) + StrBufAppendBuf(Message, LinkTitle, 0); + else if ((Title != NULL) && !IsEmptyStr(Title)) + StrBufAppendBufPlain(Message, Title, -1, 0); + else + StrBufAppendBuf(Message, link, 0); + StrBufAppendBufPlain(Message, HKEY("
\n"), 0); + } +} /* * Commit a fetched and parsed RSS item to disk */ -void rss_save_item(rsscollection *rssc) { +void rss_save_item(rss_item *ri) +{ struct MD5Context md5context; u_char rawdigest[MD5_DIGEST_LEN]; @@ -287,7 +545,7 @@ void rss_save_item(rsscollection *rssc) { struct CtdlMessage *msg; struct recptypes *recp = NULL; int msglen = 0; - rss_item *ri = rssc->Item; + StrBuf *Message; recp = (struct recptypes *) malloc(sizeof(struct recptypes)); if (recp == NULL) return; @@ -301,15 +559,17 @@ void rss_save_item(rsscollection *rssc) { * If one is not present in the item itself, make one up. */ if (ri->guid != NULL) { - snprintf(utmsgid, sizeof utmsgid, "rss/%s", ri->guid); + StrBufSpaceToBlank(ri->guid); + StrBufTrim(ri->guid); + snprintf(utmsgid, sizeof utmsgid, "rss/%s", ChrPtr(ri->guid)); } else { MD5Init(&md5context); if (ri->title != NULL) { - MD5Update(&md5context, (unsigned char*)ri->title, strlen(ri->title)); + MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->title), StrLength(ri->title)); } if (ri->link != NULL) { - MD5Update(&md5context, (unsigned char*)ri->link, strlen(ri->link)); + MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->link), StrLength(ri->link)); } MD5Final(rawdigest, &md5context); for (i=0; idescription == NULL) ri->description = strdup(""); - for (i=strlen(ri->description); i>=0; --i) { - if (isspace(ri->description[i])) { - ri->description[i] = ' '; - } - } - + syslog(LOG_DEBUG, "RSS: saving item...\n"); + if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); + StrBufSpaceToBlank(ri->description); msg = malloc(sizeof(struct CtdlMessage)); memset(msg, 0, sizeof(struct CtdlMessage)); msg->cm_magic = CTDLMESSAGE_MAGIC; @@ -349,13 +608,42 @@ void rss_save_item(rsscollection *rssc) { msg->cm_format_type = FMT_RFC822; if (ri->guid != NULL) { - msg->cm_fields['E'] = strdup(ri->guid); + msg->cm_fields['E'] = strdup(ChrPtr(ri->guid)); } if (ri->author_or_creator != NULL) { - msg->cm_fields['A'] = html_to_ascii(ri->author_or_creator, - strlen(ri->author_or_creator), 512, 0); - striplt(msg->cm_fields['A']); + char *From; + StrBuf *Encoded = NULL; + int FromAt; + + From = html_to_ascii(ChrPtr(ri->author_or_creator), + StrLength(ri->author_or_creator), + 512, 0); + StrBufPlain(ri->author_or_creator, From, -1); + StrBufTrim(ri->author_or_creator); + free(From); + + FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL; + if (!FromAt && StrLength (ri->author_email) > 0) + { + StrBufRFC2047encode(&Encoded, ri->author_or_creator); + msg->cm_fields['A'] = SmashStrBuf(&Encoded); + msg->cm_fields['P'] = SmashStrBuf(&ri->author_email); + } + else + { + if (FromAt) + { + msg->cm_fields['A'] = SmashStrBuf(&ri->author_or_creator); + msg->cm_fields['P'] = strdup(msg->cm_fields['A']); + } + else + { + StrBufRFC2047encode(&Encoded, ri->author_or_creator); + msg->cm_fields['A'] = SmashStrBuf(&Encoded); + msg->cm_fields['P'] = strdup("rss@localhost"); + } + } } else { msg->cm_fields['A'] = strdup("rss"); @@ -363,30 +651,63 @@ void rss_save_item(rsscollection *rssc) { msg->cm_fields['N'] = strdup(NODENAME); if (ri->title != NULL) { - msg->cm_fields['U'] = html_to_ascii(ri->title, strlen(ri->title), 512, 0); - striplt(msg->cm_fields['U']); + long len; + char *Sbj; + StrBuf *Encoded, *QPEncoded; + + QPEncoded = NULL; + StrBufSpaceToBlank(ri->title); + len = StrLength(ri->title); + Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); + len = strlen(Sbj); + if (Sbj[len - 1] == '\n') + { + len --; + Sbj[len] = '\0'; + } + Encoded = NewStrBufPlain(Sbj, len); + free(Sbj); + + StrBufTrim(Encoded); + StrBufRFC2047encode(&QPEncoded, Encoded); + + msg->cm_fields['U'] = SmashStrBuf(&QPEncoded); + FreeStrBuf(&Encoded); + } + + if (ri->pubdate <= 0) { + ri->pubdate = time(NULL); } msg->cm_fields['T'] = malloc(64); snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); + if (ri->channel_title != NULL) { - if (!IsEmptyStr(ri->channel_title)) { - msg->cm_fields['O'] = strdup(ri->channel_title); + if (StrLength(ri->channel_title) > 0) { + msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title)); } } if (ri->link == NULL) - ri->link = strdup(""); - msglen += 1024 + strlen(ri->link) + strlen(ri->description) ; - msg->cm_fields['M'] = malloc(msglen); - snprintf(msg->cm_fields['M'], msglen, - "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" - "\n" - "%s

\n" - "%s\n" - "\n" - , - ri->description, - ri->link, ri->link - ); + ri->link = NewStrBufPlain(HKEY("")); +#ifdef EXPERIMENTAL_SHORTER_URLS +/* its rather hard to implement this libevent compatible, so we don't ship it. */ + ExpandShortUrls(ri->description); +#endif + msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; + + Message = NewStrBufPlain(NULL, StrLength(ri->description)); + + StrBufPlain(Message, HKEY( + "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" + "\n")); + + StrBufAppendBuf(Message, ri->description, 0); + StrBufAppendBufPlain(Message, HKEY("

\n"), 0); + + AppendLink(Message, ri->link, ri->linkTitle, NULL); + AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this"); + StrBufAppendBufPlain(Message, HKEY("\n"), 0); + + msg->cm_fields['M'] = SmashStrBuf(&Message); CtdlSubmitMsg(msg, recp, NULL, 0); CtdlFreeMessage(msg); @@ -404,7 +725,7 @@ void rss_save_item(rsscollection *rssc) { /* * Convert an RDF/RSS datestamp into a time_t */ -time_t rdf_parsedate(char *p) +time_t rdf_parsedate(const char *p) { struct tm tm; time_t t = 0; @@ -441,240 +762,485 @@ time_t rdf_parsedate(char *p) return(time(NULL)); } -#define RSS_UNSET 0 -#define RSS_RSS 1 -#define RSS_ATOM 2 - -void flush_rss_ite(rss_item *ri) +void flush_rss_item(rss_item *ri) { /* Initialize the feed item data structure */ - if (ri->guid != NULL) free(ri->guid); - ri->guid = NULL; - if (ri->title != NULL) free(ri->title); - ri->title = NULL; - if (ri->link != NULL) free(ri->link); - ri->link = NULL; - if (ri->author_or_creator != NULL) free(ri->author_or_creator); - ri->author_or_creator = NULL; - if (ri->author_url != NULL) free(ri->author_url); - ri->author_url = NULL; - if (ri->description != NULL) free(ri->description); - ri->description = NULL; - /* Throw away any existing character data */ - if (ri->chardata_len > 0) { - free(ri->chardata); - ri->chardata = 0; - ri->chardata_len = 0; - } -} - -void rss_xml_start(void *data, const char *supplied_el, const char **attr) { - rsscollection *rssc = (rsscollection*) data; - rss_item *ri = rssc->Item; - char el[256]; - char *sep = NULL; + FreeStrBuf(&ri->guid); + FreeStrBuf(&ri->title); + FreeStrBuf(&ri->link); + FreeStrBuf(&ri->linkTitle); + FreeStrBuf(&ri->reLink); + FreeStrBuf(&ri->reLinkTitle); + FreeStrBuf(&ri->description); + FreeStrBuf(&ri->channel_title); + FreeStrBuf(&ri->author_or_creator); + FreeStrBuf(&ri->author_url); + FreeStrBuf(&ri->author_email); +} + +void rss_xml_start(void *data, const char *supplied_el, const char **attr) +{ + rss_xml_handler *h; + rsscollection *rssc = (rsscollection*) data; + rssnetcfg *Cfg = rssc->Cfg; + rss_item *ri = rssc->Item; + void *pv; + const char *pel; + char *sep = NULL; /* Axe the namespace, we don't care about it */ -/// CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el); - safestrncpy(el, supplied_el, sizeof el); - while (sep = strchr(el, ':'), sep) { - strcpy(el, ++sep); +/// syslog(LOG_EMERG, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el); + pel = supplied_el; + while (sep = strchr(pel, ':'), sep) { + pel = sep + 1; } - if ((rssc->Cfg->ItemType == RSS_UNSET) && !strcasecmp(el, "rss")) + if (pel != supplied_el) { - CtdlLogPrintf(9, "RSS: This is an RSS feed.\n"); - rssc->Cfg->ItemType = RSS_RSS; + void *v; + + if (!GetHash(KnownNameSpaces, + supplied_el, + pel - supplied_el - 1, + &v)) + { +#ifdef DEBUG_RSS + syslog(LOG_EMERG, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", + supplied_el); +#endif + return; + } } - if ((rssc->Cfg->ItemType == RSS_UNSET) && !strcasecmp(el, "rdf")) + + StrBufPlain(rssc->Key, pel, -1); + StrBufLowerCase(rssc->Key); + if (GetHash(StartHandlers, SKEY(rssc->Key), &pv)) { - CtdlLogPrintf(9, "RSS: This is an RDF feed.\n"); - rssc->Cfg->ItemType = RSS_RSS; + rssc->Current = h = (rss_xml_handler*) pv; + + if (((h->Flags & RSS_UNSET) != 0) && + (Cfg->ItemType == RSS_UNSET)) + { + h->Handler(rssc->CData, ri, Cfg, attr); + } + else if (((h->Flags & RSS_RSS) != 0) && + (Cfg->ItemType == RSS_RSS)) + { + h->Handler(rssc->CData, ri, Cfg, attr); + } + else if (((h->Flags & RSS_ATOM) != 0) && + (Cfg->ItemType == RSS_ATOM)) + { + h->Handler(rssc->CData, ri, Cfg, attr); + } +#ifdef DEBUG_RSS + else + syslog(LOG_EMERG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); +#endif } - else if ((rssc->Cfg->ItemType == RSS_UNSET) && !strcasecmp(el, "feed")) - { - CtdlLogPrintf(9, "RSS: This is an ATOM feed.\n"); - rssc->Cfg->ItemType = RSS_ATOM; +#ifdef DEBUG_RSS + else + syslog(LOG_EMERG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); +#endif +} + +void rss_xml_end(void *data, const char *supplied_el) +{ + rss_xml_handler *h; + rsscollection *rssc = (rsscollection*) data; + rssnetcfg *Cfg = rssc->Cfg; + rss_item *ri = rssc->Item; + const char *pel; + char *sep = NULL; + void *pv; + + /* Axe the namespace, we don't care about it */ + pel = supplied_el; + while (sep = strchr(pel, ':'), sep) { + pel = sep + 1; } - else if ((rssc->Cfg->ItemType == RSS_RSS) && - !strcasecmp(el, "item")) +// syslog(LOG_EMERG, "RSS: END %s...\n", el); + if (pel != supplied_el) { - ri->item_tag_nesting ++ ; - flush_rss_ite(ri); - } - else if ( (rssc->Cfg->ItemType == RSS_ATOM) && - !strcasecmp(el, "entry")) - { /* Atom feed... */ - ++ri->item_tag_nesting; - flush_rss_ite(ri); + void *v; + + if (!GetHash(KnownNameSpaces, + supplied_el, + pel - supplied_el - 1, + &v)) + { +#ifdef DEBUG_RSS + syslog(LOG_EMERG, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", + supplied_el, ChrPtr(rssc->CData)); +#endif + FlushStrBuf(rssc->CData); + return; + } } - else if ((rssc->Cfg->ItemType == RSS_ATOM) && - !strcasecmp(el, "link")) + + StrBufPlain(rssc->Key, pel, -1); + StrBufLowerCase(rssc->Key); + if (GetHash(EndHandlers, SKEY(rssc->Key), &pv)) { - int found ; - int i; + h = (rss_xml_handler*) pv; - for (found = 0, i = 0;!found && attr[i] != NULL; i+=2) + if (((h->Flags & RSS_UNSET) != 0) && + (Cfg->ItemType == RSS_UNSET)) { - if (!strcmp(attr[i], "href")) - { - found = 1; - if (ri->link != NULL) - free(ri->link); - ri->link = strdup(attr[i+1]); - striplt(ri->link); - } + h->Handler(rssc->CData, ri, Cfg, NULL); } - + else if (((h->Flags & RSS_RSS) != 0) && + (Cfg->ItemType == RSS_RSS)) + { + h->Handler(rssc->CData, ri, Cfg, NULL); + } + else if (((h->Flags & RSS_ATOM) != 0) && + (Cfg->ItemType == RSS_ATOM)) + { + h->Handler(rssc->CData, ri, Cfg, NULL); + } +#ifdef DEBUG_RSS + else + syslog(LOG_EMERG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); +#endif } +#ifdef DEBUG_RSS + else + syslog(LOG_EMERG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); +#endif + FlushStrBuf(rssc->CData); + rssc->Current = NULL; +} + + + + +void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n"); + Cfg->ItemType = RSS_RSS; } -void rss_xml_end(void *data, const char *supplied_el) { - rsscollection *rssc = (rsscollection*) data; - rss_item *ri = rssc->Item; - char el[256]; - char *sep = NULL; +void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n"); + Cfg->ItemType = RSS_RSS; +} - /* Axe the namespace, we don't care about it */ - safestrncpy(el, supplied_el, sizeof el); - while (sep = strchr(el, ':'), sep) { - strcpy(el, ++sep); +void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n"); + Cfg->ItemType = RSS_ATOM; +} + + +void RSS_item_item_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + ri->item_tag_nesting ++; + flush_rss_item(ri); +} + +void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ +/* Atom feed... */ + ri->item_tag_nesting ++; + flush_rss_item(ri); +} + +void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + int i; + const char *pHref = NULL; + const char *pType = NULL; + const char *pRel = NULL; + const char *pTitle = NULL; + + for (i = 0; Attr[i] != NULL; i+=2) + { + if (!strcmp(Attr[i], "href")) + { + pHref = Attr[i+1]; + } + else if (!strcmp(Attr[i], "rel")) + { + pRel = Attr[i+1]; + } + else if (!strcmp(Attr[i], "type")) + { + pType = Attr[i+1]; + } + else if (!strcmp(Attr[i], "title")) + { + pTitle = Attr[i+1]; + } } -// CtdlLogPrintf(0, "RSS: END %s...\n", el); + if (pHref == NULL) + return; /* WHUT? Pointing... where? */ + if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml")) + return; /* these just point to other rss resources, we're not interested in them. */ + if (pRel != NULL) + { + if (!strcasecmp (pRel, "replies")) + { + NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1); + StrBufTrim(ri->link); + NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1); + } + else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */ + { + NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1); + StrBufTrim(ri->link); + NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1); - if ( (!strcasecmp(el, "title")) && (ri->item_tag_nesting == 0) && (ri->chardata != NULL) ) { - safestrncpy(ri->channel_title, ri->chardata, sizeof ri->channel_title); - striplt(ri->channel_title); + } +#if 0 /* these are also defined, but dunno what to do with them.. */ + else if (!strcasecmp(pRel, "related")) + { + } + else if (!strcasecmp(pRel, "self")) + { + } + else if (!strcasecmp(pRel, "enclosure")) + {/* this reference can get big, and is probably the full article... */ + } + else if (!strcasecmp(pRel, "via")) + {/* this article was provided via... */ + } +#endif } + else if (StrLength(ri->link) == 0) + { + NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1); + StrBufTrim(ri->link); + NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1); + } +} + + + - if ( (rssc->Cfg->ItemType == RSS_RSS) && - (!strcasecmp(el, "guid")) && (ri->chardata != NULL) ) { - if (ri->guid != NULL) free(ri->guid); - striplt(ri->chardata); - ri->guid = strdup(ri->chardata); +void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) { + NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0); + StrBufTrim(ri->channel_title); } - else if ( (rssc->Cfg->ItemType == RSS_ATOM) && - (!strcasecmp(el, "id")) && (ri->chardata != NULL) ) { - if (ri->guid != NULL) free(ri->guid); - striplt(ri->chardata); - ri->guid = strdup(ri->chardata); +} + +void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0); } +} - else if ( (rssc->Cfg->ItemType == RSS_RSS) && (!strcasecmp(el, "link")) && (ri->chardata != NULL) ) { - if (ri->link != NULL) free(ri->link); - striplt(ri->chardata); - ri->link = strdup(ri->chardata); +void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0); } +} + - else if ( (!strcasecmp(el, "title")) && (ri->chardata != NULL) ) { - if (ri->title != NULL) free(ri->title); - striplt(ri->chardata); - ri->title = strdup(ri->chardata); +void RSS_item_link_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0); + StrBufTrim(ri->link); } +} +void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0); + StrBufTrim(ri->reLink); + } +} - else if ((rssc->Cfg->ItemType == RSS_ATOM) && - (!strcasecmp(el, "content")) && - (ri->chardata != NULL) ) { - if (ri->description != NULL) free(ri->description); - ri->description = strdup(ri->chardata); +void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0); + StrBufTrim(ri->title); } - else if ( (rssc->Cfg->ItemType == RSS_RSS) && - (!strcasecmp(el, "description")) && - (ri->chardata != NULL) ) { - if (ri->description != NULL) free(ri->description); - ri->description = strdup(ri->chardata); +} + +void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + long olen = StrLength (ri->description); + long clen = StrLength (CData); + if (clen > 0) + { + if (olen == 0) { + NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + StrBufTrim(ri->description); + } + else if (olen < clen) { + FlushStrBuf(ri->description); + NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + StrBufTrim(ri->description); + } + } +} +void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */ + if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0)) + { + NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + StrBufTrim(ri->description); } - - else if ((rssc->Cfg->ItemType == RSS_ATOM) && - ((!strcasecmp(el, "published")) || - (!strcasecmp(el, "updated"))) && - (ri->chardata != NULL) ) { - striplt(ri->chardata); - ri->pubdate = rdf_parsedate(ri->chardata); +} + +void RSS_item_description_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + long olen = StrLength (ri->description); + long clen = StrLength (CData); + if (clen > 0) + { + if (olen == 0) { + NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + StrBufTrim(ri->description); + } + else if (olen < clen) { + FlushStrBuf(ri->description); + NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + StrBufTrim(ri->description); + } } +} +void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + StrBufTrim(CData); + ri->pubdate = rdf_parsedate(ChrPtr(CData)); + } +} - else if ((rssc->Cfg->ItemType == RSS_RSS) && - ((!strcasecmp(el, "pubdate")) || - (!strcasecmp(el, "date"))) && - (ri->chardata != NULL) ) { - striplt(ri->chardata); - ri->pubdate = rdf_parsedate(ri->chardata); +void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + StrBufTrim(CData); + ri->pubdate = rdf_parsedate(ChrPtr(CData)); } +} - else if ((rssc->Cfg->ItemType == RSS_RSS) && - ((!strcasecmp(el, "author")) || - (!strcasecmp(el, "creator"))) && - (ri->chardata != NULL) ) { - if (ri->author_or_creator != NULL) free(ri->author_or_creator); - striplt(ri->chardata); - ri->author_or_creator = strdup(ri->chardata); +void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + StrBufTrim(CData); + ri->pubdate = rdf_parsedate(ChrPtr(CData)); } +} + - else if ((rssc->Cfg->ItemType == RSS_ATOM) && - (!strcasecmp(el, "name")) && - (ri->chardata != NULL) ) { - if (ri->author_or_creator != NULL) free(ri->author_or_creator); - striplt(ri->chardata); - ri->author_or_creator = strdup(ri->chardata); +void RSS_item_date_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + StrBufTrim(CData); + ri->pubdate = rdf_parsedate(ChrPtr(CData)); } - else if ((rssc->Cfg->ItemType == RSS_ATOM) && - (!strcasecmp(el, "uri")) && - (ri->chardata != NULL) ) { - if (ri->author_url != NULL) free(ri->author_url); - striplt(ri->chardata); - ri->author_url = strdup(ri->chardata); +} + + + +void RSS_item_author_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); + StrBufTrim(ri->author_or_creator); } +} + - else if ((rssc->Cfg->ItemType == RSS_RSS) && - !strcasecmp(el, "item")) { - --ri->item_tag_nesting; - rss_save_item(rssc); +void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); + StrBufTrim(ri->author_or_creator); } - else if ((rssc->Cfg->ItemType == RSS_ATOM) && - !strcasecmp(el, "entry")) { - --ri->item_tag_nesting; - rss_save_item(rssc); +} + +void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0); + StrBufTrim(ri->author_email); } +} - else if ( (!strcasecmp(el, "rss")) || - (!strcasecmp(el, "rdf")) ) { -// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); - ri->done_parsing = 1; +void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if ((StrLength(CData) > 0) && + (StrLength(ri->author_or_creator) == 0)) + { + NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); + StrBufTrim(ri->author_or_creator); } +} + - if (ri->chardata_len > 0) { - free(ri->chardata); - ri->chardata = 0; - ri->chardata_len = 0; +void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0); + StrBufTrim(ri->author_url); } +} + +void RSS_item_item_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + --ri->item_tag_nesting; + rss_save_item(ri); +} + +void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + --ri->item_tag_nesting; + rss_save_item(ri); } +void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ +// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n"); + ri->done_parsing = 1; + +} +void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ +// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n"); + ri->done_parsing = 1; +} + + +void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ +} + + /* * This callback stores up the data which appears in between tags. */ -void rss_xml_chardata(void *data, const XML_Char *s, int len) { +void rss_xml_cdata_start(void *data) +{ rsscollection *rssc = (rsscollection*) data; - rss_item *ri = rssc->Item; - int old_len; - int new_len; - char *new_buffer; - old_len = ri->chardata_len; - new_len = old_len + len; - new_buffer = realloc(ri->chardata, new_len + 1); - if (new_buffer != NULL) { - memcpy(&new_buffer[old_len], s, len); - new_buffer[new_len] = 0; - ri->chardata = new_buffer; - ri->chardata_len = new_len; - } + FlushStrBuf(rssc->CData); } +void rss_xml_cdata_end(void *data) +{ +} +void rss_xml_chardata(void *data, const XML_Char *s, int len) +{ + rsscollection *rssc = (rsscollection*) data; + StrBufAppendBufPlain (rssc->CData, s, len, 0); +} /* * Callback function for passing libcurl's output to expat for parsing @@ -693,35 +1259,41 @@ size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream) void rss_do_fetching(rssnetcfg *Cfg) { rsscollection rssc; rss_item ri; - XML_Parser xp; + XML_Parser xp = NULL; + StrBuf *Answer; CURL *curl; CURLcode res; char errmsg[1024] = ""; + char *ptr; + const char *at; + long len; + + time_t now; + now = time(NULL); + + if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) + return; + memset(&ri, 0, sizeof(rss_item)); rssc.Item = &ri; rssc.Cfg = Cfg; - CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", Cfg->url); + syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", Cfg->url); curl = curl_easy_init(); if (!curl) { - CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n"); - return; - } - - xp = XML_ParserCreateNS("UTF-8", ':'); - if (!xp) { - CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n"); - curl_easy_cleanup(curl); + syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); return; } + Answer = NewStrBufPlain(NULL, SIZ); curl_easy_setopt(curl, CURLOPT_URL, Cfg->url); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, xp); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, Answer); +// curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlFillStrBuf_callback); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errmsg); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); #ifdef CURLOPT_HTTP_CONTENT_DECODING @@ -730,53 +1302,103 @@ void rss_do_fetching(rssnetcfg *Cfg) { #endif curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL); curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180); /* die after 180 seconds */ - if (!IsEmptyStr(config.c_ip_addr)) { + if ( + (!IsEmptyStr(config.c_ip_addr)) + && (strcmp(config.c_ip_addr, "*")) + && (strcmp(config.c_ip_addr, "::")) + && (strcmp(config.c_ip_addr, "0.0.0.0")) + ) { curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr); } - memset(&ri, 0, sizeof(rss_item)); - ri.roomlist = Cfg->rooms; -#ifdef HAVE_ICONV -#if 0 - XML_SetUnknownEncodingHandler(xp, - handle_unknown_xml_encoding, - NULL); -#endif -#endif - XML_SetElementHandler(xp, rss_xml_start, rss_xml_end); - XML_SetCharacterDataHandler(xp, rss_xml_chardata); - XML_SetUserData(xp, &rssc); - if (CtdlThreadCheckStop()) + if (server_shutting_down) { - XML_ParserFree(xp); curl_easy_cleanup(curl); return; } - if (CtdlThreadCheckStop()) + if (server_shutting_down) goto shutdown ; res = curl_easy_perform(curl); if (res) { - CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", res, errmsg); + syslog(LOG_ALERT, "libcurl error %d: %s\n", res, errmsg); } - if (CtdlThreadCheckStop()) + if (server_shutting_down) goto shutdown ; + + + + memset(&ri, 0, sizeof(rss_item)); + ri.roomlist = Cfg->rooms; + rssc.CData = NewStrBufPlain(NULL, SIZ); + rssc.Key = NewStrBuf(); + at = NULL; + StrBufSipLine(rssc.Key, Answer, &at); + ptr = NULL; + +#define encoding "encoding=\"" + ptr = strstr(ChrPtr(rssc.Key), encoding); + if (ptr != NULL) + { + char *pche; + + ptr += sizeof (encoding) - 1; + pche = strchr(ptr, '"'); + if (pche != NULL) + StrBufCutAt(rssc.Key, -1, pche); + else + ptr = "UTF-8"; + } + else + ptr = "UTF-8"; + + + xp = XML_ParserCreateNS(ptr, ':'); + if (!xp) { + syslog(LOG_ALERT, "Cannot create XML parser!\n"); + goto shutdown; + } + FlushStrBuf(rssc.Key); +//#ifdef HAVE_ICONV +#if 0 + XML_SetUnknownEncodingHandler(xp, + handle_unknown_xml_encoding, + &rssc); +#endif +//#endif + XML_SetElementHandler(xp, rss_xml_start, rss_xml_end); + XML_SetCharacterDataHandler(xp, rss_xml_chardata); + XML_SetUserData(xp, &rssc); + XML_SetCdataSectionHandler(xp, + rss_xml_cdata_start, + rss_xml_cdata_end); + + + len = StrLength(Answer); + ptr = SmashStrBuf(&Answer); + XML_Parse(xp, ptr, len, 0); + free (ptr); if (ri.done_parsing == 0) XML_Parse(xp, "", 0, 1); - CtdlLogPrintf(CTDL_ALERT, "RSS: XML Status [%s] \n", + syslog(LOG_ALERT, "RSS: XML Status [%s] \n", XML_ErrorString( XML_GetErrorCode(xp))); shutdown: + FreeStrBuf(&Answer); curl_easy_cleanup(curl); XML_ParserFree(xp); - flush_rss_ite(&ri); + flush_rss_item(&ri); + FreeStrBuf(&rssc.CData); + FreeStrBuf(&rssc.Key); + + Cfg->next_poll = time(NULL) + config.c_net_freq; } @@ -797,7 +1419,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); - if (CtdlThreadCheckStop()) + if (server_shutting_down) return; /* Only do net processing for rooms that have netconfigs */ @@ -806,7 +1428,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) return; } - while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) { + while (fgets(buf, sizeof buf, fp) != NULL && !server_shutting_down) { buf[strlen(buf)-1] = 0; extract_token(instr, buf, 0, '|', sizeof instr); @@ -828,6 +1450,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) /* Otherwise create a new client request */ if (use_this_rncptr == NULL) { rncptr = (rssnetcfg *) malloc(sizeof(rssnetcfg)); + memset(rncptr, 0, sizeof(rssnetcfg)); rncptr->ItemType = RSS_UNSET; if (rncptr != NULL) { rncptr->next = rnclist; @@ -864,15 +1487,15 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) /* * Scan for rooms that have RSS client requests configured */ -void *rssclient_scan(void *args) { +void rssclient_scan(void) { static time_t last_run = 0L; static int doing_rssclient = 0; rssnetcfg *rptr = NULL; - CitContext rssclientCC; - /* Give this thread its own private CitContext */ - CtdlFillSystemContext(&rssclientCC, "rssclient"); - citthread_setspecific(MyConKey, (void *)&rssclientCC ); + /* Run no more than once every 15 minutes. */ + if ((time(NULL) - last_run) < 900) { + return; + } /* * This is a simple concurrency check to make sure only one rssclient run @@ -880,13 +1503,13 @@ void *rssclient_scan(void *args) { * don't really require extremely fine granularity here, we'll do it * with a static variable instead. */ - if (doing_rssclient) return NULL; + if (doing_rssclient) return; doing_rssclient = 1; - CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n"); + syslog(LOG_DEBUG, "rssclient started\n"); CtdlForEachRoom(rssclient_scan_room, NULL); - while (rnclist != NULL && !CtdlThreadCheckStop()) { + while (rnclist != NULL && !server_shutting_down) { rss_do_fetching(rnclist); rptr = rnclist; rnclist = rnclist->next; @@ -894,25 +1517,152 @@ void *rssclient_scan(void *args) { free(rptr); } - CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n"); + syslog(LOG_DEBUG, "rssclient ended\n"); last_run = time(NULL); doing_rssclient = 0; - if (!CtdlThreadCheckStop()) - CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq); - else - CtdlLogPrintf(CTDL_DEBUG, "rssclient: Task STOPPED.\n"); - CtdlClearSystemContext(); - return NULL; + return; +} + +void LoadUrlShorteners(void) +{ + int i = 0; + int fd; + const char *POS = NULL; + const char *Err = NULL; + StrBuf *Content, *Line; + + + UrlShorteners = NewHash(0, Flathash); + + fd = open(file_citadel_urlshorteners, 0); + + if (fd != 0) + { + Content = NewStrBufPlain(NULL, SIZ); + Line = NewStrBuf(); + while (POS != StrBufNOTNULL) + { + StrBufTCP_read_buffered_line_fast (Line, Content, &POS, &fd, 1, 1, &Err); + StrBufTrim(Line); + if ((*ChrPtr(Line) != '#') && (StrLength(Line) > 0)) + { + Put(UrlShorteners, IKEY(i), Line, HFreeStrBuf); + i++; + Line = NewStrBuf(); + } + else + FlushStrBuf(Line); + if (POS == NULL) + POS = StrBufNOTNULL; + } + FreeStrBuf(&Line); + FreeStrBuf(&Content); + } + close(fd); } +void rss_cleanup(void) +{ + DeleteHash(&StartHandlers); + DeleteHash(&EndHandlers); + DeleteHash(&UrlShorteners); + DeleteHash(&KnownNameSpaces); +} CTDL_MODULE_INIT(rssclient) { if (threading) { - CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version()); - CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, 0); + syslog(LOG_INFO, "%s\n", curl_version()); + CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); + } + else + { + LoadUrlShorteners (); + + StartHandlers = NewHash(1, NULL); + EndHandlers = NewHash(1, NULL); + + AddRSSStartHandler(RSS_item_rss_start, RSS_UNSET, HKEY("rss")); + AddRSSStartHandler(RSS_item_rdf_start, RSS_UNSET, HKEY("rdf")); + AddRSSStartHandler(ATOM_item_feed_start, RSS_UNSET, HKEY("feed")); + AddRSSStartHandler(RSS_item_item_start, RSS_RSS, HKEY("item")); + AddRSSStartHandler(ATOM_item_entry_start, RSS_ATOM, HKEY("entry")); + AddRSSStartHandler(ATOM_item_link_start, RSS_ATOM, HKEY("link")); + + AddRSSEndHandler(ATOMRSS_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); + AddRSSEndHandler(RSS_item_guid_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid")); + AddRSSEndHandler(ATOM_item_id_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id")); + AddRSSEndHandler(RSS_item_link_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("link")); +#if 0 +// hm, rss to the comments of that blog, might be interesting in future, but... + AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss")); +// comment count... + AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments")); +#endif + AddRSSEndHandler(RSSATOM_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); + AddRSSEndHandler(ATOM_item_content_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content")); + AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded")); + AddRSSEndHandler(ATOM_item_summary_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary")); + AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description")); + AddRSSEndHandler(ATOM_item_published_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published")); + AddRSSEndHandler(ATOM_item_updated_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated")); + AddRSSEndHandler(RSS_item_pubdate_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate")); + AddRSSEndHandler(RSS_item_date_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("date")); + AddRSSEndHandler(RSS_item_author_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("author")); + AddRSSEndHandler(RSS_item_creator_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator")); +/* */ + AddRSSEndHandler(ATOM_item_email_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email")); + AddRSSEndHandler(ATOM_item_name_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name")); + AddRSSEndHandler(ATOM_item_uri_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri")); +/* */ + AddRSSEndHandler(RSS_item_item_end, RSS_RSS, HKEY("item")); + AddRSSEndHandler(RSS_item_rss_end, RSS_RSS, HKEY("rss")); + AddRSSEndHandler(RSS_item_rdf_end, RSS_RSS, HKEY("rdf")); + AddRSSEndHandler(ATOM_item_entry_end, RSS_ATOM, HKEY("entry")); + + +/* at the start of atoms:
  • link to resource
  • ignore them. */ + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); + +/* links to other feed generators... */ + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); + + KnownNameSpaces = NewHash(1, NULL); + Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler); +#if 0 + /* we don't like these namespaces because of they shadow our usefull parameters. */ + Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler); +#endif + CtdlRegisterCleanupHook(rss_cleanup); } - /* return our Subversion id for the Log */ - return "$Id$"; + return "rssclient"; }