X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=b991b4df13b2c372e794d36fcd98378d3af3ffb5;hb=4a8abab15d6662f2bca504e496013a42d7a65efd;hp=341e8b02597b5cff03d7654d29b878b2fa7b7d12;hpb=ae60aa1708e79de5ad979c427d06f431ee1f6d0d;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 341e8b025..b991b4df1 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -64,6 +64,7 @@ struct rssnetcfg { char *rooms; time_t last_error_when; int ItemType; + time_t next_poll; }; #define RSS_UNSET (1<<0) @@ -157,7 +158,7 @@ fill_encoding_info (const char *charset, XML_Encoding * info) { iconv_t cd = (iconv_t)(-1); int flag; - CtdlLogPrintf(0, "RSS: fill encoding info ...\n"); + syslog(LOG_EMERG, "RSS: fill encoding info ...\n"); #if G_BYTE_ORDER == G_LITTLE_ENDIAN cd = iconv_open ("UCS-2LE", charset); @@ -235,7 +236,7 @@ iconv_convertor (void *data, const char *s) { XML_Encoding *info = data; int res; - CtdlLogPrintf(0, "RSS: Converting ...\n"); + syslog(LOG_EMERG, "RSS: Converting ...\n"); if (s == NULL) return -1; @@ -268,7 +269,7 @@ handle_unknown_xml_encoding (void *encodingHandleData, XML_Encoding * info) { int result; - CtdlLogPrintf(0, "RSS: unknown encoding ...\n"); + syslog(LOG_EMERG, "RSS: unknown encoding ...\n"); result = fill_encoding_info (name, info); if (result >= 0) { @@ -339,7 +340,7 @@ int LookupUrl(StrBuf *ShorterUrlStr) curl = curl_easy_init(); if (!curl) { - CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n"); + syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); return 0; } Answer = NewStrBufPlain(NULL, SIZ); @@ -373,18 +374,19 @@ int LookupUrl(StrBuf *ShorterUrlStr) curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr); } - if (CtdlThreadCheckStop()) + if (server_shutting_down) goto shutdown ; rc = curl_easy_perform(curl); if (rc) { - CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", rc, errmsg); + syslog(LOG_ALERT, "libcurl error %d: %s\n", rc, errmsg); rc = 0; } else rc = 1; shutdown: + FreeStrBuf(&Answer); curl_easy_cleanup(curl); return rc; @@ -457,7 +459,7 @@ void ExpandShortUrls(StrBuf *Message) { StrBuf *ShorterUrlStr; HashPos *Pos; - const char *Key; + const char *RetrKey; void *pv; long len; @@ -469,7 +471,7 @@ void ExpandShortUrls(StrBuf *Message) pch = ChrPtr(Message); pche = pch + StrLength(Message); Pos = GetNewHashPos(pUrls, 1); - while (GetNextHashPos(pUrls, Pos, &len, &Key, &pv)) + while (GetNextHashPos(pUrls, Pos, &len, &RetrKey, &pv)) { pCUrl = (ConstStr*) pv; @@ -584,7 +586,7 @@ void rss_save_item(rss_item *ri) #ifndef DEBUG_RSS if (cdbut != NULL) { /* Item has already been seen */ - CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid); + syslog(LOG_DEBUG, "%s has already been seen\n", utmsgid); cdb_free(cdbut); /* rewrite the record anyway, to update the timestamp */ @@ -596,7 +598,7 @@ void rss_save_item(rss_item *ri) #endif { /* Item has not been seen, so save it. */ - CtdlLogPrintf(CTDL_DEBUG, "RSS: saving item...\n"); + syslog(LOG_DEBUG, "RSS: saving item...\n"); if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); StrBufSpaceToBlank(ri->description); msg = malloc(sizeof(struct CtdlMessage)); @@ -631,7 +633,10 @@ void rss_save_item(rss_item *ri) else { if (FromAt) - msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator); + { + msg->cm_fields['A'] = SmashStrBuf(&ri->author_or_creator); + msg->cm_fields['P'] = strdup(msg->cm_fields['A']); + } else { StrBufRFC2047encode(&Encoded, ri->author_or_creator); @@ -669,8 +674,13 @@ void rss_save_item(rss_item *ri) msg->cm_fields['U'] = SmashStrBuf(&QPEncoded); FreeStrBuf(&Encoded); } + + if (ri->pubdate <= 0) { + ri->pubdate = time(NULL); + } msg->cm_fields['T'] = malloc(64); snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); + if (ri->channel_title != NULL) { if (StrLength(ri->channel_title) > 0) { msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title)); @@ -678,7 +688,10 @@ void rss_save_item(rss_item *ri) } if (ri->link == NULL) ri->link = NewStrBufPlain(HKEY("")); +#ifdef EXPERIMENTAL_SHORTER_URLS +/* its rather hard to implement this libevent compatible, so we don't ship it. */ ExpandShortUrls(ri->description); +#endif msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; Message = NewStrBufPlain(NULL, StrLength(ri->description)); @@ -755,10 +768,14 @@ void flush_rss_item(rss_item *ri) FreeStrBuf(&ri->guid); FreeStrBuf(&ri->title); FreeStrBuf(&ri->link); + FreeStrBuf(&ri->linkTitle); + FreeStrBuf(&ri->reLink); + FreeStrBuf(&ri->reLinkTitle); + FreeStrBuf(&ri->description); + FreeStrBuf(&ri->channel_title); FreeStrBuf(&ri->author_or_creator); - FreeStrBuf(&ri->author_email); FreeStrBuf(&ri->author_url); - FreeStrBuf(&ri->description); + FreeStrBuf(&ri->author_email); } void rss_xml_start(void *data, const char *supplied_el, const char **attr) @@ -772,7 +789,7 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) char *sep = NULL; /* Axe the namespace, we don't care about it */ -/// CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el); +/// syslog(LOG_EMERG, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el); pel = supplied_el; while (sep = strchr(pel, ':'), sep) { pel = sep + 1; @@ -788,7 +805,7 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) &v)) { #ifdef DEBUG_RSS - CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", + syslog(LOG_EMERG, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", supplied_el); #endif return; @@ -818,12 +835,12 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); + syslog(LOG_EMERG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); #endif } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); + syslog(LOG_EMERG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); #endif } @@ -842,7 +859,7 @@ void rss_xml_end(void *data, const char *supplied_el) while (sep = strchr(pel, ':'), sep) { pel = sep + 1; } -// CtdlLogPrintf(0, "RSS: END %s...\n", el); +// syslog(LOG_EMERG, "RSS: END %s...\n", el); if (pel != supplied_el) { void *v; @@ -853,7 +870,7 @@ void rss_xml_end(void *data, const char *supplied_el) &v)) { #ifdef DEBUG_RSS - CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", + syslog(LOG_EMERG, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", supplied_el, ChrPtr(rssc->CData)); #endif FlushStrBuf(rssc->CData); @@ -884,12 +901,12 @@ void rss_xml_end(void *data, const char *supplied_el) } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); + syslog(LOG_EMERG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); #endif } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); + syslog(LOG_EMERG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); #endif FlushStrBuf(rssc->CData); rssc->Current = NULL; @@ -901,19 +918,19 @@ void rss_xml_end(void *data, const char *supplied_el) void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RSS feed.\n"); + syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n"); Cfg->ItemType = RSS_RSS; } void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RDF feed.\n"); + syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n"); Cfg->ItemType = RSS_RSS; } void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an ATOM feed.\n"); + syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n"); Cfg->ItemType = RSS_ATOM; } @@ -1188,13 +1205,13 @@ void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { -// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); +// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n"); ri->done_parsing = 1; } void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) { -// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); +// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n"); ri->done_parsing = 1; } @@ -1252,15 +1269,21 @@ void rss_do_fetching(rssnetcfg *Cfg) { const char *at; long len; + time_t now; + + now = time(NULL); + + if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) + return; memset(&ri, 0, sizeof(rss_item)); rssc.Item = &ri; rssc.Cfg = Cfg; - CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", Cfg->url); + syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", Cfg->url); curl = curl_easy_init(); if (!curl) { - CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n"); + syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); return; } Answer = NewStrBufPlain(NULL, SIZ); @@ -1288,21 +1311,21 @@ void rss_do_fetching(rssnetcfg *Cfg) { curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr); } - if (CtdlThreadCheckStop()) + if (server_shutting_down) { curl_easy_cleanup(curl); return; } - if (CtdlThreadCheckStop()) + if (server_shutting_down) goto shutdown ; res = curl_easy_perform(curl); if (res) { - CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", res, errmsg); + syslog(LOG_ALERT, "libcurl error %d: %s\n", res, errmsg); } - if (CtdlThreadCheckStop()) + if (server_shutting_down) goto shutdown ; @@ -1335,7 +1358,7 @@ void rss_do_fetching(rssnetcfg *Cfg) { xp = XML_ParserCreateNS(ptr, ':'); if (!xp) { - CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n"); + syslog(LOG_ALERT, "Cannot create XML parser!\n"); goto shutdown; } FlushStrBuf(rssc.Key); @@ -1362,17 +1385,20 @@ void rss_do_fetching(rssnetcfg *Cfg) { XML_Parse(xp, "", 0, 1); - CtdlLogPrintf(CTDL_ALERT, "RSS: XML Status [%s] \n", + syslog(LOG_ALERT, "RSS: XML Status [%s] \n", XML_ErrorString( XML_GetErrorCode(xp))); shutdown: + FreeStrBuf(&Answer); curl_easy_cleanup(curl); XML_ParserFree(xp); flush_rss_item(&ri); FreeStrBuf(&rssc.CData); FreeStrBuf(&rssc.Key); + + Cfg->next_poll = time(NULL) + config.c_net_freq; } @@ -1393,7 +1419,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); - if (CtdlThreadCheckStop()) + if (server_shutting_down) return; /* Only do net processing for rooms that have netconfigs */ @@ -1402,7 +1428,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) return; } - while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) { + while (fgets(buf, sizeof buf, fp) != NULL && !server_shutting_down) { buf[strlen(buf)-1] = 0; extract_token(instr, buf, 0, '|', sizeof instr); @@ -1424,6 +1450,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) /* Otherwise create a new client request */ if (use_this_rncptr == NULL) { rncptr = (rssnetcfg *) malloc(sizeof(rssnetcfg)); + memset(rncptr, 0, sizeof(rssnetcfg)); rncptr->ItemType = RSS_UNSET; if (rncptr != NULL) { rncptr->next = rnclist; @@ -1465,6 +1492,11 @@ void rssclient_scan(void) { static int doing_rssclient = 0; rssnetcfg *rptr = NULL; + /* Run no more than once every 15 minutes. */ + if ((time(NULL) - last_run) < 900) { + return; + } + /* * This is a simple concurrency check to make sure only one rssclient run * is done at a time. We could do this with a mutex, but since we @@ -1474,10 +1506,10 @@ void rssclient_scan(void) { if (doing_rssclient) return; doing_rssclient = 1; - CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n"); + syslog(LOG_DEBUG, "rssclient started\n"); CtdlForEachRoom(rssclient_scan_room, NULL); - while (rnclist != NULL && !CtdlThreadCheckStop()) { + while (rnclist != NULL && !server_shutting_down) { rss_do_fetching(rnclist); rptr = rnclist; rnclist = rnclist->next; @@ -1485,7 +1517,7 @@ void rssclient_scan(void) { free(rptr); } - CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n"); + syslog(LOG_DEBUG, "rssclient ended\n"); last_run = time(NULL); doing_rssclient = 0; return; @@ -1529,11 +1561,19 @@ void LoadUrlShorteners(void) close(fd); } +void rss_cleanup(void) +{ + DeleteHash(&StartHandlers); + DeleteHash(&EndHandlers); + DeleteHash(&UrlShorteners); + DeleteHash(&KnownNameSpaces); +} + CTDL_MODULE_INIT(rssclient) { if (threading) { - CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version()); + syslog(LOG_INFO, "%s\n", curl_version()); CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); } else @@ -1622,6 +1662,7 @@ CTDL_MODULE_INIT(rssclient) /* we don't like these namespaces because of they shadow our usefull parameters. */ Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler); #endif + CtdlRegisterCleanupHook(rss_cleanup); } return "rssclient"; }