X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=c2dfa85e1ad1149aff91919df492ff68edade82a;hb=03cd7985fad9d51530f613304869f3b7960b9f81;hp=5f1357603df48ffdb838948c6f301a4e5fa308a6;hpb=65fdf25a39d0ab07270233c1059c53e437c98639;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 5f1357603..c2dfa85e1 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -1,7 +1,7 @@ /* * Bring external RSS feeds into rooms. * - * Copyright (c) 2007-2012 by the citadel.org team + * Copyright (c) 2007-2016 by the citadel.org team * * This program is open source software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3. @@ -73,26 +73,6 @@ int RSSClientDebugEnabled = 0; #define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSClientDebugEnabled != 0)) -#define EVRSSC_syslog(LEVEL, FORMAT, ...) \ - DBGLOG(LEVEL) syslog(LEVEL, \ - "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ - IO->ID, CCID, N, __VA_ARGS__) - -#define EVRSSCM_syslog(LEVEL, FORMAT) \ - DBGLOG(LEVEL) syslog(LEVEL, \ - "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ - IO->ID, CCID, N) - -#define EVRSSQ_syslog(LEVEL, FORMAT, ...) \ - DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT, \ - __VA_ARGS__) -#define EVRSSQM_syslog(LEVEL, FORMAT) \ - DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT) - -#define EVRSSCSM_syslog(LEVEL, FORMAT) \ - DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSS" FORMAT, \ - IO->ID, N) - typedef enum _RSSState { eRSSCreated, eRSSFetching, @@ -108,13 +88,47 @@ ConstStr RSSStates[] = { {HKEY("checking usetable")} }; + +static size_t GetLocationString( void *ptr, size_t size, size_t nmemb, void *userdata) +{ +#define LOCATION "location" + if (strncasecmp((char*)ptr, LOCATION, sizeof(LOCATION) - 1) == 0) + { + AsyncIO *IO = (AsyncIO *) userdata; + rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; + + char *pch = (char*) ptr; + char *pche; + + pche = pch + (size * nmemb); + pch += sizeof(LOCATION); + + while (isspace(*pch) || (*pch == ':')) + pch ++; + + while (isspace(*pche) || (*pche == '\0')) + pche--; + if (RSSAggr->RedirectUrl == NULL) { + RSSAggr->RedirectUrl = NewStrBufPlain(pch, pche - pch + 1); + } + else { + FlushStrBuf(RSSAggr->RedirectUrl); + StrBufPlain(RSSAggr->RedirectUrl, pch, pche - pch + 1); + } + } + return size * nmemb; +} + + static void SetRSSState(AsyncIO *IO, RSSState State) { CitContext* CCC = IO->CitContext; - if (CCC != NULL) + if (CCC != NULL) { memcpy(CCC->cs_clientname, RSSStates[State].Key, RSSStates[State].len + 1); + } } + void DeleteRoomReference(long QRnumber) { HashPos *At; @@ -187,10 +201,12 @@ void DeleteRssCfg(void *vptr) rss_aggregator *RSSAggr = (rss_aggregator *)vptr; AsyncIO *IO = &RSSAggr->IO; - if (IO->CitContext != NULL) - EVRSSCM_syslog(LOG_DEBUG, "RSS: destroying\n"); + if (IO->CitContext != NULL) { + syslog(LOG_DEBUG, "RSS: destroying\n"); + } FreeStrBuf(&RSSAggr->Url); + FreeStrBuf(&RSSAggr->RedirectUrl); FreeStrBuf(&RSSAggr->rooms); FreeStrBuf(&RSSAggr->CData); FreeStrBuf(&RSSAggr->Key); @@ -218,7 +234,7 @@ eNextState RSSAggregator_Terminate(AsyncIO *IO) { rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + syslog(LOG_DEBUG, "RSS: Terminating."); StopCurlWatchers(IO); UnlinkRSSAggregator(RSSAggr); @@ -229,7 +245,7 @@ eNextState RSSAggregator_TerminateDB(AsyncIO *IO) { rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + syslog(LOG_DEBUG, "RSS: Terminating."); StopDBWatchers(&RSSAggr->IO); @@ -246,7 +262,7 @@ eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO) if (pUrl == NULL) pUrl = ""; - EVRSSC_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); + syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.", pUrl); StopCurlWatchers(IO); UnlinkRSSAggregator(RSSAggr); @@ -274,11 +290,24 @@ void AppendLink(StrBuf *Message, } -void rss_format_item(networker_save_message *SaveMsg) +int rss_format_item(AsyncIO *IO, networker_save_message *SaveMsg) { StrBuf *Message; int msglen = 0; + if (StrLength(SaveMsg->description) + + StrLength(SaveMsg->link) + + StrLength(SaveMsg->linkTitle) + + StrLength(SaveMsg->reLink) + + StrLength(SaveMsg->reLinkTitle) + + StrLength(SaveMsg->title) == 0) + { + syslog(LOG_INFO, "Refusing to save empty message."); + return 0; + } + + CM_Flush(&SaveMsg->Msg); + if (SaveMsg->author_or_creator != NULL) { char *From; @@ -296,36 +325,31 @@ void rss_format_item(networker_save_message *SaveMsg) if (!FromAt && StrLength (SaveMsg->author_email) > 0) { StrBufRFC2047encode(&Encoded, SaveMsg->author_or_creator); - SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded); - SaveMsg->Msg.cm_fields['P'] = - SmashStrBuf(&SaveMsg->author_email); + CM_SetAsFieldSB(&SaveMsg->Msg, eAuthor, &Encoded); + CM_SetAsFieldSB(&SaveMsg->Msg, eMessagePath, &SaveMsg->author_email); } else { if (FromAt) { - SaveMsg->Msg.cm_fields['A'] = - SmashStrBuf(&SaveMsg->author_or_creator); - SaveMsg->Msg.cm_fields['P'] = - strdup(SaveMsg->Msg.cm_fields['A']); + CM_SetAsFieldSB(&SaveMsg->Msg, eAuthor, &SaveMsg->author_or_creator); + CM_CopyField(&SaveMsg->Msg, eMessagePath, eAuthor); } else { StrBufRFC2047encode(&Encoded, SaveMsg->author_or_creator); - SaveMsg->Msg.cm_fields['A'] = - SmashStrBuf(&Encoded); - SaveMsg->Msg.cm_fields['P'] = - strdup("rss@localhost"); + CM_SetAsFieldSB(&SaveMsg->Msg, eAuthor, &Encoded); + CM_SetField(&SaveMsg->Msg, eMessagePath, HKEY("rss@localhost")); } } } else { - SaveMsg->Msg.cm_fields['A'] = strdup("rss"); + CM_SetField(&SaveMsg->Msg, eAuthor, HKEY("rss")); } - SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME); + CM_SetField(&SaveMsg->Msg, eNodeName, CtdlGetConfigStr("c_nodename"), strlen(CtdlGetConfigStr("c_nodename"))); if (SaveMsg->title != NULL) { long len; char *Sbj; @@ -335,20 +359,25 @@ void rss_format_item(networker_save_message *SaveMsg) StrBufSpaceToBlank(SaveMsg->title); len = StrLength(SaveMsg->title); Sbj = html_to_ascii(ChrPtr(SaveMsg->title), len, 512, 0); - len = strlen(Sbj); - if ((len > 0) && (Sbj[len - 1] == '\n')) - { - len --; - Sbj[len] = '\0'; - } - Encoded = NewStrBufPlain(Sbj, len); - free(Sbj); - - StrBufTrim(Encoded); - StrBufRFC2047encode(&QPEncoded, Encoded); + if (!IsEmptyStr(Sbj)) { + len = strlen(Sbj); + if ((Sbj[len - 1] == '\n')) + { + len --; + Sbj[len] = '\0'; + } + Encoded = NewStrBufPlain(Sbj, len); + - SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded); - FreeStrBuf(&Encoded); + StrBufTrim(Encoded); + StrBufRFC2047encode(&QPEncoded, Encoded); + + CM_SetAsFieldSB(&SaveMsg->Msg, eMsgSubject, &QPEncoded); + FreeStrBuf(&Encoded); + } + if (Sbj != NULL) { + free(Sbj); + } } if (SaveMsg->link == NULL) SaveMsg->link = NewStrBufPlain(HKEY("")); @@ -375,8 +404,8 @@ void rss_format_item(networker_save_message *SaveMsg) AppendLink(Message, SaveMsg->reLink, SaveMsg->reLinkTitle, "Reply to this"); StrBufAppendBufPlain(Message, HKEY("\n"), 0); - SaveMsg->Message = Message; + return 1; } eNextState RSSSaveMessage(AsyncIO *IO) @@ -385,16 +414,17 @@ eNextState RSSSaveMessage(AsyncIO *IO) const char *Key; rss_aggregator *RSSAggr = (rss_aggregator *) IO->Data; - rss_format_item(RSSAggr->ThisMsg); - - RSSAggr->ThisMsg->Msg.cm_fields['M'] = - SmashStrBuf(&RSSAggr->ThisMsg->Message); - - CtdlSubmitMsg(&RSSAggr->ThisMsg->Msg, &RSSAggr->recp, NULL, 0); - - /* write the uidl to the use table so we don't store this item again */ + if (rss_format_item(IO, RSSAggr->ThisMsg)) + { + CM_SetAsFieldSB(&RSSAggr->ThisMsg->Msg, eMesageText, + &RSSAggr->ThisMsg->Message); - CheckIfAlreadySeen("RSS Item Insert", RSSAggr->ThisMsg->MsgGUID, IO->Now, 0, eWrite, IO->ID, CCID); + CtdlSubmitMsg(&RSSAggr->ThisMsg->Msg, &RSSAggr->recp, NULL, 0); + + /* write the uidl to the use table so we don't store this item again */ + + CheckIfAlreadySeen("RSS Item Insert", RSSAggr->ThisMsg->MsgGUID, EvGetNow(IO), 0, eWrite, CCID, IO->ID); + } if (GetNextHashPos(RSSAggr->Messages, RSSAggr->Pos, @@ -407,26 +437,27 @@ eNextState RSSSaveMessage(AsyncIO *IO) eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) { + static const time_t antiExpire = USETABLE_ANTIEXPIRE_HIRES; +#ifndef DEBUG_RSS + time_t seenstamp = 0; const char *Key; long len; rss_aggregator *Ctx = (rss_aggregator *) IO->Data; /* Find out if we've already seen this item */ // todo: expiry? -#ifndef DEBUG_RSS SetRSSState(IO, eRSSUT); - if (CheckIfAlreadySeen("RSS Item Seen", - Ctx->ThisMsg->MsgGUID, - IO->Now, - IO->Now - USETABLE_ANTIEXPIRE, - eCheckUpdate, - IO->ID, CCID) - != 0) + seenstamp = CheckIfAlreadySeen("RSS Item Seen", + Ctx->ThisMsg->MsgGUID, + EvGetNow(IO), + antiExpire, + eCheckUpdate, + CCID, IO->ID); + if (seenstamp != 0) { /* Item has already been seen */ - EVRSSC_syslog(LOG_DEBUG, - "%s has already been seen\n", - ChrPtr(Ctx->ThisMsg->MsgGUID)); + syslog(LOG_DEBUG, "%s has already been seen - %ld < %ld", ChrPtr(Ctx->ThisMsg->MsgGUID), seenstamp, antiExpire); + SetRSSState(IO, eRSSParsing); if (GetNextHashPos(Ctx->Messages, @@ -442,6 +473,11 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) else #endif { + /* Item has already been seen */ + syslog(LOG_DEBUG, + "%s Parsing - %ld >= %ld", + ChrPtr(Ctx->ThisMsg->MsgGUID), + seenstamp, antiExpire); SetRSSState(IO, eRSSParsing); NextDBOperation(IO, RSSSaveMessage); @@ -452,9 +488,9 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) void UpdateLastKnownGood(pRSSConfig *pCfg, time_t now) { - OneRoomNetCfg* pRNCfg; + OneRoomNetCfg *pRNCfg; begin_critical_section(S_NETCONFIGS); - pRNCfg = CtdlGetNetCfgForRoom (pCfg->QRnumber); + pRNCfg = CtdlGetNetCfgForRoom(pCfg->QRnumber); if (pRNCfg != NULL) { RSSCfgLine *RSSCfg = (RSSCfgLine *)pRNCfg->NetConfigs[rssclient]; @@ -468,11 +504,11 @@ void UpdateLastKnownGood(pRSSConfig *pCfg, time_t now) } if (RSSCfg != NULL) { - pRNCfg->changed = 1; RSSCfg->last_known_good = now; } } - + SaveRoomNetConfigFile(pRNCfg, pCfg->QRnumber); + FreeRoomNetworkStruct(&pRNCfg); end_critical_section(S_NETCONFIGS); } @@ -487,7 +523,8 @@ eNextState RSSAggregator_AnalyseReply(AsyncIO *IO) StrBuf *guid; rss_aggregator *Ctx = (rss_aggregator *) IO->Data; - if (IO->HttpReq.httpcode != 200) + + if ((IO->HttpReq.httpcode >= 300) && (IO->HttpReq.httpcode < 400) && (Ctx->RedirectUrl != NULL)) { StrBuf *ErrMsg; long lens[2]; @@ -495,34 +532,96 @@ eNextState RSSAggregator_AnalyseReply(AsyncIO *IO) SetRSSState(IO, eRSSFailure); ErrMsg = NewStrBuf(); - EVRSSC_syslog(LOG_ALERT, "need a 200, got a %ld !\n", - IO->HttpReq.httpcode); + if (IO) { + syslog(LOG_INFO, "need a 200, got a %ld !", IO->HttpReq.httpcode); + } + strs[0] = ChrPtr(Ctx->Url); + lens[0] = StrLength(Ctx->Url); + + strs[1] = ChrPtr(Ctx->rooms); + lens[1] = StrLength(Ctx->rooms); + + if (IO->HttpReq.CurlError == NULL) + IO->HttpReq.CurlError = ""; + + StrBufPrintf(ErrMsg, + "Error while RSS-Aggregation Run of %s\n" + " need a 200, got a %ld !\n" + " Curl Error message: \n%s / %s\n" + " Redirect header points to: %s\n" + " Response text was: \n" + " \n %s\n", + ChrPtr(Ctx->Url), + IO->HttpReq.httpcode, + IO->HttpReq.errdesc, + IO->HttpReq.CurlError, + ChrPtr(Ctx->RedirectUrl), + ChrPtr(IO->HttpReq.ReplyData) + ); + + CtdlAideFPMessage( + ChrPtr(ErrMsg), + "RSS Aggregation run failure", + 2, strs, (long*) &lens, + CCID, + IO->ID, + EvGetNow(IO)); + FreeStrBuf(&ErrMsg); + syslog(LOG_DEBUG, + "RSS feed returned an invalid http status code. <%s>", + ChrPtr(Ctx->Url), + IO->HttpReq.httpcode + ); + return eAbort; + } + else if (IO->HttpReq.httpcode != 200) + { + StrBuf *ErrMsg; + long lens[2]; + const char *strs[2]; + + SetRSSState(IO, eRSSFailure); + ErrMsg = NewStrBuf(); + if (IO) { + syslog(LOG_INFO, "need a 200, got a %ld !", IO->HttpReq.httpcode); + } strs[0] = ChrPtr(Ctx->Url); lens[0] = StrLength(Ctx->Url); strs[1] = ChrPtr(Ctx->rooms); lens[1] = StrLength(Ctx->rooms); + + if (IO->HttpReq.CurlError == NULL) + IO->HttpReq.CurlError = ""; + StrBufPrintf(ErrMsg, "Error while RSS-Aggregation Run of %s\n" " need a 200, got a %ld !\n" + " Curl Error message: \n%s / %s\n" " Response text was: \n" " \n %s\n", ChrPtr(Ctx->Url), IO->HttpReq.httpcode, - ChrPtr(IO->HttpReq.ReplyData)); + IO->HttpReq.errdesc, + IO->HttpReq.CurlError, + ChrPtr(IO->HttpReq.ReplyData) + ); + CtdlAideFPMessage( ChrPtr(ErrMsg), "RSS Aggregation run failure", 2, strs, (long*) &lens, - IO->Now, - IO->ID, CCID); + CCID, + IO->ID, + EvGetNow(IO)); FreeStrBuf(&ErrMsg); - EVRSSC_syslog(LOG_DEBUG, - "RSS feed returned an invalid http status code. <%s>\n", + syslog(LOG_DEBUG, + "RSS feed returned an invalid http status code. <%s>", ChrPtr(Ctx->Url), - IO->HttpReq.httpcode); + IO->HttpReq.httpcode + ); return eAbort; } @@ -530,7 +629,7 @@ eNextState RSSAggregator_AnalyseReply(AsyncIO *IO) while (pCfg != NULL) { - UpdateLastKnownGood (pCfg, IO->Now); + UpdateLastKnownGood (pCfg, EvGetNow(IO)); if ((Ctx->roomlist_parts > 1) && (it == NULL)) { @@ -572,15 +671,15 @@ eNextState RSSAggregator_AnalyseReply(AsyncIO *IO) if (CheckIfAlreadySeen("RSS Whole", guid, - IO->Now, - IO->Now - USETABLE_ANTIEXPIRE, - eCheckUpdate, - IO->ID, CCID) + EvGetNow(IO), + EvGetNow(IO) - USETABLE_ANTIEXPIRE, + eUpdate, + CCID, IO->ID) != 0) { FreeStrBuf(&guid); - EVRSSC_syslog(LOG_DEBUG, "RSS feed already seen. <%s>\n", ChrPtr(Ctx->Url)); + syslog(LOG_DEBUG, "RSS feed already seen. <%s>", ChrPtr(Ctx->Url)); return eAbort; } FreeStrBuf(&guid); @@ -591,7 +690,6 @@ eNextState RSSAggregator_AnalyseReply(AsyncIO *IO) eNextState RSSAggregator_FinishHttp(AsyncIO *IO) { - StopCurlWatchers(IO); return CurlQueueDBOperation(IO, RSSAggregator_AnalyseReply); } @@ -603,6 +701,9 @@ int rss_do_fetching(rss_aggregator *RSSAggr) AsyncIO *IO = &RSSAggr->IO; rss_item *ri; time_t now; + CURLcode sta; + CURL *chnd; + now = time(NULL); @@ -621,16 +722,19 @@ int rss_do_fetching(rss_aggregator *RSSAggr) RSSAggregator_TerminateDB, RSSAggregator_ShutdownAbort)) { - EVRSSCM_syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); + syslog(LOG_INFO, "Unable to initialize libcurl."); return 0; } + chnd = IO->HttpReq.chnd; + OPT(HEADERDATA, IO); + OPT(HEADERFUNCTION, GetLocationString); SetRSSState(IO, eRSSCreated); safestrncpy(((CitContext*)RSSAggr->IO.CitContext)->cs_host, ChrPtr(RSSAggr->Url), sizeof(((CitContext*)RSSAggr->IO.CitContext)->cs_host)); - EVRSSC_syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(RSSAggr->Url)); + syslog(LOG_DEBUG, "Fetching RSS feed <%s>", ChrPtr(RSSAggr->Url)); ParseURL(&RSSAggr->IO.ConnectMe, RSSAggr->Url, 80); CurlPrepareURL(RSSAggr->IO.ConnectMe); @@ -649,11 +753,12 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data, OneRoomNetCfg *OneR rss_aggregator *use_this_RSSAggr = NULL; void *vptr; + syslog(LOG_DEBUG, "rssclient_scan_room(%s)", qrbuf->QRname); pthread_mutex_lock(&RSSQueueMutex); if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr)) { - EVRSSQ_syslog(LOG_DEBUG, - "rssclient: [%ld] %s already in progress.\n", + syslog(LOG_DEBUG, + "rssclient: [%ld] %s already in progress.", qrbuf->QRnumber, qrbuf->QRname); pthread_mutex_unlock(&RSSQueueMutex); @@ -743,7 +848,7 @@ void rssclient_scan(void) { /* Run no more than once every 15 minutes. */ if ((now - last_run) < 900) { - EVRSSQ_syslog(LOG_DEBUG, + syslog(LOG_DEBUG, "Client: polling interval not yet reached; last run was %ldm%lds ago", ((now - last_run) / 60), ((now - last_run) % 60) @@ -761,7 +866,7 @@ void rssclient_scan(void) { pthread_mutex_unlock(&RSSQueueMutex); if ((RSSRoomCount > 0) || (RSSCount > 0)) { - EVRSSQ_syslog(LOG_DEBUG, + syslog(LOG_DEBUG, "rssclient: concurrency check failed; %d rooms and %d url's are queued", RSSRoomCount, RSSCount ); @@ -769,13 +874,13 @@ void rssclient_scan(void) { } become_session(&rss_CC); - EVRSSQM_syslog(LOG_DEBUG, "rssclient started"); + syslog(LOG_DEBUG, "rssclient started"); CtdlForEachNetCfgRoom(rssclient_scan_room, NULL, rssclient); if (GetCount(RSSFetchUrls) > 0) { pthread_mutex_lock(&RSSQueueMutex); - EVRSSQ_syslog(LOG_DEBUG, + syslog(LOG_DEBUG, "rssclient starting %d Clients", GetCount(RSSFetchUrls)); @@ -790,10 +895,11 @@ void rssclient_scan(void) { DeleteHashPos(&it); pthread_mutex_unlock(&RSSQueueMutex); } - else - EVRSSQM_syslog(LOG_DEBUG, "Nothing to do."); + else { + syslog(LOG_DEBUG, "Nothing to do."); + } - EVRSSQM_syslog(LOG_DEBUG, "rssclient ended\n"); + syslog(LOG_DEBUG, "rssclient ended"); return; }