X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=b991b4df13b2c372e794d36fcd98378d3af3ffb5;hb=4a8abab15d6662f2bca504e496013a42d7a65efd;hp=6d6deb610a3c79d93132d7059e6620a890c33e21;hpb=ad5e4c9b8532acf5de9ea425d1ecd725c5aa8271;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 6d6deb610..b991b4df1 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -56,6 +56,7 @@ #include "md5.h" #include "context.h" + typedef struct rssnetcfg rssnetcfg; struct rssnetcfg { rssnetcfg *next; @@ -63,6 +64,7 @@ struct rssnetcfg { char *rooms; time_t last_error_when; int ItemType; + time_t next_poll; }; #define RSS_UNSET (1<<0) @@ -111,9 +113,10 @@ typedef struct _rsscollection { } rsscollection; struct rssnetcfg *rnclist = NULL; -HashList *StartHandlers; -HashList *EndHandlers; -HashList *KnownNameSpaces; +HashList *StartHandlers = NULL; +HashList *EndHandlers = NULL; +HashList *KnownNameSpaces = NULL; +HashList *UrlShorteners = NULL; void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len) { rss_xml_handler *h; @@ -131,7 +134,7 @@ void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long Put(EndHandlers, key, len, h, NULL); } -///#if 0 +#if 0 //#ifdef HAVE_ICONV #include @@ -155,7 +158,7 @@ fill_encoding_info (const char *charset, XML_Encoding * info) { iconv_t cd = (iconv_t)(-1); int flag; - CtdlLogPrintf(0, "RSS: fill encoding info ...\n"); + syslog(LOG_EMERG, "RSS: fill encoding info ...\n"); #if G_BYTE_ORDER == G_LITTLE_ENDIAN cd = iconv_open ("UCS-2LE", charset); @@ -169,10 +172,10 @@ fill_encoding_info (const char *charset, XML_Encoding * info) } { - unsigned short out; + unsigned short out = 0; unsigned char buf[4]; unsigned int i0, i1, i2; - int result; + int result = 0; flag = 0; for (i0 = 0; i0 < 0x100; i0++) { @@ -233,7 +236,7 @@ iconv_convertor (void *data, const char *s) { XML_Encoding *info = data; int res; - CtdlLogPrintf(0, "RSS: Converting ...\n"); + syslog(LOG_EMERG, "RSS: Converting ...\n"); if (s == NULL) return -1; @@ -266,7 +269,7 @@ handle_unknown_xml_encoding (void *encodingHandleData, XML_Encoding * info) { int result; - CtdlLogPrintf(0, "RSS: unknown encoding ...\n"); + syslog(LOG_EMERG, "RSS: unknown encoding ...\n"); result = fill_encoding_info (name, info); if (result >= 0) { @@ -303,7 +306,213 @@ handle_unknown_xml_encoding (void *encodingHandleData, } ///#endif -//#endif +#endif +size_t GetLocationString( void *ptr, size_t size, size_t nmemb, void *userdata) +{ +#define LOCATION "location" + if (strncasecmp((char*)ptr, LOCATION, sizeof(LOCATION) - 1) == 0) + { + StrBuf *pURL = (StrBuf*) userdata; + char *pch = (char*) ptr; + char *pche; + + pche = pch + (size * nmemb); + pch += sizeof(LOCATION); + + while (isspace(*pch) || (*pch == ':')) + pch ++; + + while (isspace(*pche) || (*pche == '\0')) + pche--; + + FlushStrBuf(pURL); + StrBufPlain(pURL, pch, pche - pch + 1); + } + return size * nmemb; +} + +int LookupUrl(StrBuf *ShorterUrlStr) +{ + CURL *curl; + char errmsg[1024] = ""; + StrBuf *Answer; + int rc = 0; + + curl = curl_easy_init(); + if (!curl) { + syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); + return 0; + } + Answer = NewStrBufPlain(NULL, SIZ); + + curl_easy_setopt(curl, CURLOPT_URL, ChrPtr(ShorterUrlStr)); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, Answer); +// curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlFillStrBuf_callback); + curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errmsg); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); +#ifdef CURLOPT_HTTP_CONTENT_DECODING + curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, 1); + curl_easy_setopt(curl, CURLOPT_ENCODING, ""); +#endif + curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180); /* die after 180 seconds */ + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 0); + + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION , GetLocationString); + curl_easy_setopt(curl, CURLOPT_WRITEHEADER, ShorterUrlStr); + + + if ( + (!IsEmptyStr(config.c_ip_addr)) + && (strcmp(config.c_ip_addr, "*")) + && (strcmp(config.c_ip_addr, "::")) + && (strcmp(config.c_ip_addr, "0.0.0.0")) + ) { + curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr); + } + + if (server_shutting_down) + goto shutdown ; + + rc = curl_easy_perform(curl); + if (rc) { + syslog(LOG_ALERT, "libcurl error %d: %s\n", rc, errmsg); + rc = 0; + } + else + rc = 1; + +shutdown: + FreeStrBuf(&Answer); + curl_easy_cleanup(curl); + + return rc; + +} + + + +void CrawlMessageForShorterUrls(HashList *pUrls, StrBuf *Message) +{ + int nHits = 0; + void *pv; + int nShorter = 0; + const char *pch; + const char *pUrl; + ConstStr *pCUrl; + + while (GetHash(UrlShorteners, IKEY(nShorter), &pv)) + { + nShorter++; + pch = ChrPtr(Message); + pUrl = strstr(pch, ChrPtr((StrBuf*)pv)); + while ((pUrl != NULL) && (nHits < 99)) + { + pCUrl = malloc(sizeof(ConstStr)); + + pCUrl->Key = pUrl; + pch = pUrl + StrLength((StrBuf*)pv); + while (isalnum(*pch)||(*pch == '-')||(*pch == '/')) + pch++; + pCUrl->len = pch - pCUrl->Key; + + Put(pUrls, IKEY(nHits), pCUrl, NULL); + nHits ++; + pUrl = strstr(pch, ChrPtr((StrBuf*)pv)); + } + } +} + +int SortConstStrByPosition(const void *Item1, const void *Item2) +{ + const ConstStr *p1, *p2; + p1 = (const ConstStr*) Item1; + p2 = (const ConstStr*) Item2; + if (p1->Key == p2->Key) + return 0; + if (p1->Key > p2->Key) + return 1; + return -1; +} + +void ExpandShortUrls(StrBuf *Message) +{ + StrBuf *Shadow; + HashList *pUrls; + ConstStr *pCUrl; + const char *pch; + const char *pche; + + /* we just suspect URL shorteners to be inside of feeds from twitter + * or other short content messages, so don't crawl through real blogs. + */ + if (StrLength(Message) > 500) + return; + + pUrls = NewHash(1, Flathash); + CrawlMessageForShorterUrls(pUrls, Message); + + if (GetCount(pUrls) > 0) + { + StrBuf *ShorterUrlStr; + HashPos *Pos; + const char *RetrKey; + void *pv; + long len; + + Shadow = NewStrBufPlain(NULL, StrLength(Message)); + SortByPayload (pUrls, SortConstStrByPosition); + + ShorterUrlStr = NewStrBufPlain(NULL, StrLength(Message)); + + pch = ChrPtr(Message); + pche = pch + StrLength(Message); + Pos = GetNewHashPos(pUrls, 1); + while (GetNextHashPos(pUrls, Pos, &len, &RetrKey, &pv)) + { + pCUrl = (ConstStr*) pv; + + if (pch != pCUrl->Key) + StrBufAppendBufPlain(Shadow, pch, pCUrl->Key - pch, 0); + + StrBufPlain(ShorterUrlStr, CKEY(*pCUrl)); + if (LookupUrl(ShorterUrlStr)) + { + StrBufAppendBufPlain(Shadow, HKEY(""), 0); + StrBufAppendBuf(Shadow, ShorterUrlStr, 0); + StrBufAppendBufPlain(Shadow, HKEY("["), 0); + StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0); + StrBufAppendBufPlain(Shadow, HKEY("]"), 0); + } + else + { + StrBufAppendBufPlain(Shadow, HKEY("Key, pCUrl->len, 0); + StrBufAppendBufPlain(Shadow, HKEY("\">"), 0); + StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0); + StrBufAppendBufPlain(Shadow, HKEY(""), 0); + } + pch = pCUrl->Key + pCUrl->len + 1; + + } + if (pch < pche) + StrBufAppendBufPlain(Shadow, pch, pche - pch, 0); + FlushStrBuf(Message); + StrBufAppendBuf(Message, Shadow, 0); + + FreeStrBuf(&ShorterUrlStr); + FreeStrBuf(&Shadow); + DeleteHashPos(&Pos); + } + + DeleteHash(&pUrls); +} + void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title) { @@ -374,10 +583,10 @@ void rss_save_item(rss_item *ri) /* Find out if we've already seen this item */ cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid)); -#ifdef DEBUG_RSS/////TODO ifndef +#ifndef DEBUG_RSS if (cdbut != NULL) { /* Item has already been seen */ - CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid); + syslog(LOG_DEBUG, "%s has already been seen\n", utmsgid); cdb_free(cdbut); /* rewrite the record anyway, to update the timestamp */ @@ -385,11 +594,11 @@ void rss_save_item(rss_item *ri) ut.ut_timestamp = time(NULL); cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) ); } - else + else #endif { /* Item has not been seen, so save it. */ - CtdlLogPrintf(CTDL_DEBUG, "RSS: saving item...\n"); + syslog(LOG_DEBUG, "RSS: saving item...\n"); if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); StrBufSpaceToBlank(ri->description); msg = malloc(sizeof(struct CtdlMessage)); @@ -404,60 +613,37 @@ void rss_save_item(rss_item *ri) if (ri->author_or_creator != NULL) { char *From; - StrBuf *Encoded, *QPEncoded; - StrBuf *UserName; - StrBuf *EmailAddress; - StrBuf *EncBuf; + StrBuf *Encoded = NULL; int FromAt; - UserName = NewStrBuf(); - EmailAddress = NewStrBuf(); - EncBuf = NewStrBuf(); -////TODO! From = html_to_ascii(ChrPtr(ri->author_or_creator), StrLength(ri->author_or_creator), 512, 0); - FromAt = strchr(From, '@') != NULL; + StrBufPlain(ri->author_or_creator, From, -1); + StrBufTrim(ri->author_or_creator); + free(From); + + FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL; if (!FromAt && StrLength (ri->author_email) > 0) { - Encoded = NewStrBuf(); - if (!IsEmptyStr(From)) - { - StrBufPrintf(Encoded, - "\"%s\" <%s>", - From, - ChrPtr(ri->author_email)); - } - else - { - StrBufPrintf(Encoded, - "<%s>", - ChrPtr(ri->author_email)); - } + StrBufRFC2047encode(&Encoded, ri->author_or_creator); + msg->cm_fields['A'] = SmashStrBuf(&Encoded); + msg->cm_fields['P'] = SmashStrBuf(&ri->author_email); } else { if (FromAt) - Encoded = NewStrBufPlain(From, -1); + { + msg->cm_fields['A'] = SmashStrBuf(&ri->author_or_creator); + msg->cm_fields['P'] = strdup(msg->cm_fields['A']); + } else { - Encoded = NewStrBuf(); - StrBufPrintf(Encoded, - "\"%s\" <%s>", - From, - "rss@localhost"); /// TODO: get hostname? + StrBufRFC2047encode(&Encoded, ri->author_or_creator); + msg->cm_fields['A'] = SmashStrBuf(&Encoded); + msg->cm_fields['P'] = strdup("rss@localhost"); } } - free(From); - StrBufTrim(Encoded); - QPEncoded = StrBufSanitizeEmailRecipientVector(Encoded, UserName, EmailAddress, EncBuf); - msg->cm_fields['A'] = SmashStrBuf(&QPEncoded); - - FreeStrBuf(&Encoded); - FreeStrBuf(&UserName); - FreeStrBuf(&EmailAddress); - FreeStrBuf(&EncBuf); - } else { msg->cm_fields['A'] = strdup("rss"); @@ -473,7 +659,13 @@ void rss_save_item(rss_item *ri) StrBufSpaceToBlank(ri->title); len = StrLength(ri->title); Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); - Encoded = NewStrBufPlain(Sbj, -1); + len = strlen(Sbj); + if (Sbj[len - 1] == '\n') + { + len --; + Sbj[len] = '\0'; + } + Encoded = NewStrBufPlain(Sbj, len); free(Sbj); StrBufTrim(Encoded); @@ -482,8 +674,13 @@ void rss_save_item(rss_item *ri) msg->cm_fields['U'] = SmashStrBuf(&QPEncoded); FreeStrBuf(&Encoded); } + + if (ri->pubdate <= 0) { + ri->pubdate = time(NULL); + } msg->cm_fields['T'] = malloc(64); snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); + if (ri->channel_title != NULL) { if (StrLength(ri->channel_title) > 0) { msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title)); @@ -491,6 +688,10 @@ void rss_save_item(rss_item *ri) } if (ri->link == NULL) ri->link = NewStrBufPlain(HKEY("")); +#ifdef EXPERIMENTAL_SHORTER_URLS +/* its rather hard to implement this libevent compatible, so we don't ship it. */ + ExpandShortUrls(ri->description); +#endif msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; Message = NewStrBufPlain(NULL, StrLength(ri->description)); @@ -567,10 +768,14 @@ void flush_rss_item(rss_item *ri) FreeStrBuf(&ri->guid); FreeStrBuf(&ri->title); FreeStrBuf(&ri->link); + FreeStrBuf(&ri->linkTitle); + FreeStrBuf(&ri->reLink); + FreeStrBuf(&ri->reLinkTitle); + FreeStrBuf(&ri->description); + FreeStrBuf(&ri->channel_title); FreeStrBuf(&ri->author_or_creator); - FreeStrBuf(&ri->author_email); FreeStrBuf(&ri->author_url); - FreeStrBuf(&ri->description); + FreeStrBuf(&ri->author_email); } void rss_xml_start(void *data, const char *supplied_el, const char **attr) @@ -584,7 +789,7 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) char *sep = NULL; /* Axe the namespace, we don't care about it */ -/// CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el); +/// syslog(LOG_EMERG, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el); pel = supplied_el; while (sep = strchr(pel, ':'), sep) { pel = sep + 1; @@ -600,7 +805,7 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) &v)) { #ifdef DEBUG_RSS - CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", + syslog(LOG_EMERG, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", supplied_el); #endif return; @@ -630,12 +835,12 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); + syslog(LOG_EMERG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); #endif } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); + syslog(LOG_EMERG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); #endif } @@ -654,7 +859,7 @@ void rss_xml_end(void *data, const char *supplied_el) while (sep = strchr(pel, ':'), sep) { pel = sep + 1; } -// CtdlLogPrintf(0, "RSS: END %s...\n", el); +// syslog(LOG_EMERG, "RSS: END %s...\n", el); if (pel != supplied_el) { void *v; @@ -665,7 +870,7 @@ void rss_xml_end(void *data, const char *supplied_el) &v)) { #ifdef DEBUG_RSS - CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", + syslog(LOG_EMERG, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", supplied_el, ChrPtr(rssc->CData)); #endif FlushStrBuf(rssc->CData); @@ -696,12 +901,12 @@ void rss_xml_end(void *data, const char *supplied_el) } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); + syslog(LOG_EMERG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); #endif } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); + syslog(LOG_EMERG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); #endif FlushStrBuf(rssc->CData); rssc->Current = NULL; @@ -713,19 +918,19 @@ void rss_xml_end(void *data, const char *supplied_el) void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RSS feed.\n"); + syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n"); Cfg->ItemType = RSS_RSS; } void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RDF feed.\n"); + syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n"); Cfg->ItemType = RSS_RSS; } void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an ATOM feed.\n"); + syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n"); Cfg->ItemType = RSS_ATOM; } @@ -845,6 +1050,13 @@ void RSS_item_link_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char* StrBufTrim(ri->link); } } +void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +{ + if (StrLength(CData) > 0) { + NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0); + StrBufTrim(ri->reLink); + } +} void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { @@ -856,9 +1068,19 @@ void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); - StrBufTrim(ri->description); + long olen = StrLength (ri->description); + long clen = StrLength (CData); + if (clen > 0) + { + if (olen == 0) { + NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + StrBufTrim(ri->description); + } + else if (olen < clen) { + FlushStrBuf(ri->description); + NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + StrBufTrim(ri->description); + } } } void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) @@ -873,8 +1095,19 @@ void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const c void RSS_item_description_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + long olen = StrLength (ri->description); + long clen = StrLength (CData); + if (clen > 0) + { + if (olen == 0) { + NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + StrBufTrim(ri->description); + } + else if (olen < clen) { + FlushStrBuf(ri->description); + NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); + StrBufTrim(ri->description); + } } } @@ -972,13 +1205,13 @@ void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { -// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); +// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n"); ri->done_parsing = 1; } void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { -// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); +// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n"); ri->done_parsing = 1; } @@ -1026,7 +1259,7 @@ size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream) void rss_do_fetching(rssnetcfg *Cfg) { rsscollection rssc; rss_item ri; - XML_Parser xp; + XML_Parser xp = NULL; StrBuf *Answer; CURL *curl; @@ -1036,15 +1269,21 @@ void rss_do_fetching(rssnetcfg *Cfg) { const char *at; long len; + time_t now; + + now = time(NULL); + + if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) + return; memset(&ri, 0, sizeof(rss_item)); rssc.Item = &ri; rssc.Cfg = Cfg; - CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", Cfg->url); + syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", Cfg->url); curl = curl_easy_init(); if (!curl) { - CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n"); + syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); return; } Answer = NewStrBufPlain(NULL, SIZ); @@ -1072,21 +1311,21 @@ void rss_do_fetching(rssnetcfg *Cfg) { curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr); } - if (CtdlThreadCheckStop()) + if (server_shutting_down) { curl_easy_cleanup(curl); return; } - if (CtdlThreadCheckStop()) + if (server_shutting_down) goto shutdown ; res = curl_easy_perform(curl); if (res) { - CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", res, errmsg); + syslog(LOG_ALERT, "libcurl error %d: %s\n", res, errmsg); } - if (CtdlThreadCheckStop()) + if (server_shutting_down) goto shutdown ; @@ -1119,7 +1358,7 @@ void rss_do_fetching(rssnetcfg *Cfg) { xp = XML_ParserCreateNS(ptr, ':'); if (!xp) { - CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n"); + syslog(LOG_ALERT, "Cannot create XML parser!\n"); goto shutdown; } FlushStrBuf(rssc.Key); @@ -1146,17 +1385,20 @@ void rss_do_fetching(rssnetcfg *Cfg) { XML_Parse(xp, "", 0, 1); - CtdlLogPrintf(CTDL_ALERT, "RSS: XML Status [%s] \n", + syslog(LOG_ALERT, "RSS: XML Status [%s] \n", XML_ErrorString( XML_GetErrorCode(xp))); shutdown: + FreeStrBuf(&Answer); curl_easy_cleanup(curl); XML_ParserFree(xp); flush_rss_item(&ri); FreeStrBuf(&rssc.CData); FreeStrBuf(&rssc.Key); + + Cfg->next_poll = time(NULL) + config.c_net_freq; } @@ -1177,7 +1419,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); - if (CtdlThreadCheckStop()) + if (server_shutting_down) return; /* Only do net processing for rooms that have netconfigs */ @@ -1186,7 +1428,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) return; } - while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) { + while (fgets(buf, sizeof buf, fp) != NULL && !server_shutting_down) { buf[strlen(buf)-1] = 0; extract_token(instr, buf, 0, '|', sizeof instr); @@ -1208,6 +1450,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) /* Otherwise create a new client request */ if (use_this_rncptr == NULL) { rncptr = (rssnetcfg *) malloc(sizeof(rssnetcfg)); + memset(rncptr, 0, sizeof(rssnetcfg)); rncptr->ItemType = RSS_UNSET; if (rncptr != NULL) { rncptr->next = rnclist; @@ -1244,15 +1487,15 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) /* * Scan for rooms that have RSS client requests configured */ -void *rssclient_scan(void *args) { +void rssclient_scan(void) { static time_t last_run = 0L; static int doing_rssclient = 0; rssnetcfg *rptr = NULL; - CitContext rssclientCC; - /* Give this thread its own private CitContext */ - CtdlFillSystemContext(&rssclientCC, "rssclient"); - citthread_setspecific(MyConKey, (void *)&rssclientCC ); + /* Run no more than once every 15 minutes. */ + if ((time(NULL) - last_run) < 900) { + return; + } /* * This is a simple concurrency check to make sure only one rssclient run @@ -1260,13 +1503,13 @@ void *rssclient_scan(void *args) { * don't really require extremely fine granularity here, we'll do it * with a static variable instead. */ - if (doing_rssclient) return NULL; + if (doing_rssclient) return; doing_rssclient = 1; - CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n"); + syslog(LOG_DEBUG, "rssclient started\n"); CtdlForEachRoom(rssclient_scan_room, NULL); - while (rnclist != NULL && !CtdlThreadCheckStop()) { + while (rnclist != NULL && !server_shutting_down) { rss_do_fetching(rnclist); rptr = rnclist; rnclist = rnclist->next; @@ -1274,101 +1517,152 @@ void *rssclient_scan(void *args) { free(rptr); } - CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n"); + syslog(LOG_DEBUG, "rssclient ended\n"); last_run = time(NULL); doing_rssclient = 0; - if (!CtdlThreadCheckStop()) - CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq); - else - CtdlLogPrintf(CTDL_DEBUG, "rssclient: Task STOPPED.\n"); - CtdlClearSystemContext(); - return NULL; + return; } +void LoadUrlShorteners(void) +{ + int i = 0; + int fd; + const char *POS = NULL; + const char *Err = NULL; + StrBuf *Content, *Line; + + + UrlShorteners = NewHash(0, Flathash); + + fd = open(file_citadel_urlshorteners, 0); + + if (fd != 0) + { + Content = NewStrBufPlain(NULL, SIZ); + Line = NewStrBuf(); + while (POS != StrBufNOTNULL) + { + StrBufTCP_read_buffered_line_fast (Line, Content, &POS, &fd, 1, 1, &Err); + StrBufTrim(Line); + if ((*ChrPtr(Line) != '#') && (StrLength(Line) > 0)) + { + Put(UrlShorteners, IKEY(i), Line, HFreeStrBuf); + i++; + Line = NewStrBuf(); + } + else + FlushStrBuf(Line); + if (POS == NULL) + POS = StrBufNOTNULL; + } + FreeStrBuf(&Line); + FreeStrBuf(&Content); + } + close(fd); +} + +void rss_cleanup(void) +{ + DeleteHash(&StartHandlers); + DeleteHash(&EndHandlers); + DeleteHash(&UrlShorteners); + DeleteHash(&KnownNameSpaces); +} CTDL_MODULE_INIT(rssclient) { if (threading) { - CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version()); - CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, 0); + syslog(LOG_INFO, "%s\n", curl_version()); + CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); } - - StartHandlers = NewHash(1, NULL); - EndHandlers = NewHash(1, NULL); - - AddRSSStartHandler(RSS_item_rss_start, RSS_UNSET, HKEY("rss")); - AddRSSStartHandler(RSS_item_rdf_start, RSS_UNSET, HKEY("rdf")); - AddRSSStartHandler(ATOM_item_feed_start, RSS_UNSET, HKEY("feed")); - AddRSSStartHandler(RSS_item_item_start, RSS_RSS, HKEY("item")); - AddRSSStartHandler(ATOM_item_entry_start, RSS_ATOM, HKEY("entry")); - AddRSSStartHandler(ATOM_item_link_start, RSS_ATOM, HKEY("link")); - - AddRSSEndHandler(ATOMRSS_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); - AddRSSEndHandler(RSS_item_guid_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid")); - AddRSSEndHandler(ATOM_item_id_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id")); - AddRSSEndHandler(RSS_item_link_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("link")); - AddRSSEndHandler(RSSATOM_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); - AddRSSEndHandler(ATOM_item_content_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content")); - AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("encoded")); - AddRSSEndHandler(ATOM_item_summary_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary")); - AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description")); - AddRSSEndHandler(ATOM_item_published_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published")); - AddRSSEndHandler(ATOM_item_updated_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated")); - AddRSSEndHandler(RSS_item_pubdate_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate")); - AddRSSEndHandler(RSS_item_date_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("date")); - AddRSSEndHandler(RSS_item_author_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("author")); - AddRSSEndHandler(RSS_item_creator_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator")); + else + { + LoadUrlShorteners (); + + StartHandlers = NewHash(1, NULL); + EndHandlers = NewHash(1, NULL); + + AddRSSStartHandler(RSS_item_rss_start, RSS_UNSET, HKEY("rss")); + AddRSSStartHandler(RSS_item_rdf_start, RSS_UNSET, HKEY("rdf")); + AddRSSStartHandler(ATOM_item_feed_start, RSS_UNSET, HKEY("feed")); + AddRSSStartHandler(RSS_item_item_start, RSS_RSS, HKEY("item")); + AddRSSStartHandler(ATOM_item_entry_start, RSS_ATOM, HKEY("entry")); + AddRSSStartHandler(ATOM_item_link_start, RSS_ATOM, HKEY("link")); + + AddRSSEndHandler(ATOMRSS_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); + AddRSSEndHandler(RSS_item_guid_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid")); + AddRSSEndHandler(ATOM_item_id_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id")); + AddRSSEndHandler(RSS_item_link_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("link")); +#if 0 +// hm, rss to the comments of that blog, might be interesting in future, but... + AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss")); +// comment count... + AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments")); +#endif + AddRSSEndHandler(RSSATOM_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); + AddRSSEndHandler(ATOM_item_content_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content")); + AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded")); + AddRSSEndHandler(ATOM_item_summary_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary")); + AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description")); + AddRSSEndHandler(ATOM_item_published_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published")); + AddRSSEndHandler(ATOM_item_updated_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated")); + AddRSSEndHandler(RSS_item_pubdate_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate")); + AddRSSEndHandler(RSS_item_date_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("date")); + AddRSSEndHandler(RSS_item_author_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("author")); + AddRSSEndHandler(RSS_item_creator_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator")); /* */ - AddRSSEndHandler(ATOM_item_email_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email")); - AddRSSEndHandler(ATOM_item_name_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name")); - AddRSSEndHandler(ATOM_item_uri_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri")); + AddRSSEndHandler(ATOM_item_email_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email")); + AddRSSEndHandler(ATOM_item_name_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name")); + AddRSSEndHandler(ATOM_item_uri_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri")); /* */ - AddRSSEndHandler(RSS_item_item_end, RSS_RSS, HKEY("item")); - AddRSSEndHandler(RSS_item_rss_end, RSS_RSS, HKEY("rss")); - AddRSSEndHandler(RSS_item_rdf_end, RSS_RSS, HKEY("rdf")); - AddRSSEndHandler(ATOM_item_entry_end, RSS_ATOM, HKEY("entry")); + AddRSSEndHandler(RSS_item_item_end, RSS_RSS, HKEY("item")); + AddRSSEndHandler(RSS_item_rss_end, RSS_RSS, HKEY("rss")); + AddRSSEndHandler(RSS_item_rdf_end, RSS_RSS, HKEY("rdf")); + AddRSSEndHandler(ATOM_item_entry_end, RSS_ATOM, HKEY("entry")); /* at the start of atoms:
  • link to resource
  • ignore them. */ - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); /* links to other feed generators... */ - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); - - KnownNameSpaces = NewHash(1, NULL); - Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); + AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); + AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); + + KnownNameSpaces = NewHash(1, NULL); + Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler); + Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler); #if 0 - /* we don't like these namespaces because of they shadow our usefull parameters. */ - Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler); + /* we don't like these namespaces because of they shadow our usefull parameters. */ + Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler); #endif + CtdlRegisterCleanupHook(rss_cleanup); + } return "rssclient"; }