X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=909587d6284f21d50852117f38870c56317196b4;hb=58f686487cf5f14d5da5357c67f2e6624dbde027;hp=00c32971db81763ada94e14fdb363cac8b173a9a;hpb=271924aeff4c786b31ec293ab48c861f7fe77bd0;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 00c32971d..909587d62 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -1,7 +1,7 @@ /* * Bring external RSS feeds into rooms. * - * Copyright (c) 2007-2012 by the citadel.org team + * Copyright (c) 2007-2017 by the citadel.org team * * This program is open source software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 3. @@ -48,484 +48,169 @@ #include "citadel_dirs.h" #include "md5.h" #include "context.h" -#include "event_client.h" #include "rss_atom_parser.h" +struct rssroom { + struct rssroom *next; + char *room; +}; -#define TMP_MSGDATA 0xFF -#define TMP_SHORTER_URL_OFFSET 0xFE -#define TMP_SHORTER_URLS 0xFD +struct rssurl { + struct rssurl *next; + char *url; + struct rssroom *rooms; +}; -time_t last_run = 0L; - -pthread_mutex_t RSSQueueMutex; /* locks the access to the following vars: */ -HashList *RSSQueueRooms = NULL; /* rss_room_counter */ -HashList *RSSFetchUrls = NULL; /*->rss_aggregator;->RefCount access locked*/ -eNextState RSSAggregator_Terminate(AsyncIO *IO); -eNextState RSSAggregator_TerminateDB(AsyncIO *IO); -eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO); +time_t last_run = 0L; struct CitContext rss_CC; +struct rssurl *rsstodo = NULL; -struct rssnetcfg *rnclist = NULL; -int RSSClientDebugEnabled = 0; -#define N ((rss_aggregator*)IO->Data)->QRnumber - -#define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSClientDebugEnabled != 0)) - -#define EVRSSC_syslog(LEVEL, FORMAT, ...) \ - DBGLOG(LEVEL) syslog(LEVEL, \ - "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ - IO->ID, CCID, N, __VA_ARGS__) - -#define EVRSSCM_syslog(LEVEL, FORMAT) \ - DBGLOG(LEVEL) syslog(LEVEL, \ - "IO[%ld]CC[%d][%ld]RSS" FORMAT, \ - IO->ID, CCID, N) - -#define EVRSSQ_syslog(LEVEL, FORMAT, ...) \ - DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT, \ - __VA_ARGS__) -#define EVRSSQM_syslog(LEVEL, FORMAT) \ - DBGLOG(LEVEL) syslog(LEVEL, "RSS" FORMAT) -#define EVRSSCSM_syslog(LEVEL, FORMAT) \ - DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSS" FORMAT, \ - IO->ID, N) - -void DeleteRoomReference(long QRnumber) +// Add a feed/room pair into the todo list +// +void rssclient_push_todo(char *rssurl, char *roomname) { - HashPos *At; - long HKLen; - const char *HK; - void *vData = NULL; - rss_room_counter *pRoomC; + struct rssurl *r = NULL; + struct rssurl *thisone = NULL; + struct rssroom *newroom = NULL; - At = GetNewHashPos(RSSQueueRooms, 0); + syslog(LOG_DEBUG, "rssclient_push_todo(%s, %s)", rssurl, roomname); - if (GetHashPosFromKey(RSSQueueRooms, LKEY(QRnumber), At)) - { - GetHashPos(RSSQueueRooms, At, &HKLen, &HK, &vData); - if (vData != NULL) - { - pRoomC = (rss_room_counter *) vData; - pRoomC->count --; - if (pRoomC->count == 0) - DeleteEntryFromHash(RSSQueueRooms, At); + for (r=rsstodo; r!=NULL; r=r->next) { + if (!strcasecmp(r->url, rssurl)) { + thisone = r; } } - DeleteHashPos(&At); -} - -void UnlinkRooms(rss_aggregator *RSSAggr) -{ - DeleteRoomReference(RSSAggr->QRnumber); - if (RSSAggr->OtherQRnumbers != NULL) - { - long HKLen; - const char *HK; - HashPos *At; - void *vData; - - At = GetNewHashPos(RSSAggr->OtherQRnumbers, 0); - while (! server_shutting_down && - GetNextHashPos(RSSAggr->OtherQRnumbers, - At, - &HKLen, &HK, - &vData) && - (vData != NULL)) - { - long *lData = (long*) vData; - DeleteRoomReference(*lData); - } - - DeleteHashPos(&At); + if (thisone == NULL) { + thisone = malloc(sizeof(struct rssurl)); + thisone->url = strdup(rssurl); + thisone->rooms = NULL; + thisone->next = rsstodo; + rsstodo = thisone; } -} -void UnlinkRSSAggregator(rss_aggregator *RSSAggr) -{ - HashPos *At; - - pthread_mutex_lock(&RSSQueueMutex); - UnlinkRooms(RSSAggr); - - At = GetNewHashPos(RSSFetchUrls, 0); - if (GetHashPosFromKey(RSSFetchUrls, SKEY(RSSAggr->Url), At)) - { - DeleteEntryFromHash(RSSFetchUrls, At); - } - DeleteHashPos(&At); - last_run = time(NULL); - pthread_mutex_unlock(&RSSQueueMutex); + newroom = malloc(sizeof(struct rssroom)); + newroom->room = strdup(roomname); + newroom->next = thisone->rooms; + thisone->rooms = newroom; } -void DeleteRssCfg(void *vptr) -{ - rss_aggregator *RSSAggr = (rss_aggregator *)vptr; - AsyncIO *IO = &RSSAggr->IO; - EVRSSCM_syslog(LOG_DEBUG, "RSS: destroying\n"); - - FreeStrBuf(&RSSAggr->Url); - FreeStrBuf(&RSSAggr->rooms); - FreeStrBuf(&RSSAggr->CData); - FreeStrBuf(&RSSAggr->Key); - DeleteHash(&RSSAggr->OtherQRnumbers); - - DeleteHashPos (&RSSAggr->Pos); - DeleteHash (&RSSAggr->Messages); - if (RSSAggr->recp.recp_room != NULL) - free(RSSAggr->recp.recp_room); - - if (RSSAggr->Item != NULL) - { - flush_rss_item(RSSAggr->Item); - - free(RSSAggr->Item); - } - - FreeAsyncIOContents(&RSSAggr->IO); - memset(RSSAggr, 0, sizeof(rss_aggregator)); - free(RSSAggr); -} - -eNextState RSSAggregator_Terminate(AsyncIO *IO) +// Callback function for curl +// +size_t rss_pof_write_data(void *buffer, size_t size, size_t nmemb, void *userp) { - rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - - EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); - - StopCurlWatchers(IO); - UnlinkRSSAggregator(RSSAggr); - return eAbort; + StrBuf *Downloaded = (StrBuf *)userp; + size_t bytes = size * nmemb; + StrBufAppendBufPlain(Downloaded, buffer, bytes, 0); + return(bytes); } -eNextState RSSAggregator_TerminateDB(AsyncIO *IO) + +// pull one feed (possibly multiple rooms) +// +void rss_pull_one_feed(struct rssurl *url) { - rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; + struct rssroom *r; + CURL *curl; + CURLcode res; + StrBuf *Downloaded = NULL; - EVRSSCM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + syslog(LOG_DEBUG, "rss_pull_one_feed(%s)", url->url); + curl = curl_easy_init(); + if (!curl) { + return; + } - StopDBWatchers(&RSSAggr->IO); - UnlinkRSSAggregator(RSSAggr); - return eAbort; -} + Downloaded = NewStrBuf(); -eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO) -{ - const char *pUrl; - rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data; - - pUrl = IO->ConnectMe->PlainUrl; - if (pUrl == NULL) - pUrl = ""; + curl_easy_setopt(curl, CURLOPT_URL, url->url); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); // Follow redirects + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_pof_write_data); // What to do with downloaded data + curl_easy_setopt(curl, CURLOPT_WRITEDATA, Downloaded); // Give it our StrBuf to work with + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L); // Time out after 20 seconds + res = curl_easy_perform(curl); // Perform the request + if (res != CURLE_OK) { + syslog(LOG_WARNING, "Failed to load feed: %s", curl_easy_strerror(res)); + } + curl_easy_cleanup(curl); - EVRSSC_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); + // FIXME parse the feed, dummeh ... it's in ChrPtr(Downloaded) + for (r=url->rooms; r!=NULL; r=r->next) { + syslog(LOG_DEBUG, "Saving item to %s", r->room); + // FIXME save to rooms + } - UnlinkRSSAggregator(RSSAggr); - return eAbort; + FreeStrBuf(&Downloaded); } -eNextState RSSSaveMessage(AsyncIO *IO) -{ - long len; - const char *Key; - rss_aggregator *RSSAggr = (rss_aggregator *) IO->Data; - - RSSAggr->ThisMsg->Msg.cm_fields['M'] = - SmashStrBuf(&RSSAggr->ThisMsg->Message); - - CtdlSubmitMsg(&RSSAggr->ThisMsg->Msg, &RSSAggr->recp, NULL, 0); - - /* write the uidl to the use table so we don't store this item again */ - cdb_store(CDB_USETABLE, - SKEY(RSSAggr->ThisMsg->MsgGUID), - &RSSAggr->ThisMsg->ut, - sizeof(struct UseTable) ); - - if (GetNextHashPos(RSSAggr->Messages, - RSSAggr->Pos, - &len, &Key, - (void**) &RSSAggr->ThisMsg)) - return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry); - else - return eAbort; -} -eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) +// We have a list, now download the feeds +// +void rss_pull_feeds(void) { - const char *Key; - long len; - struct cdbdata *cdbut; - rss_aggregator *Ctx = (rss_aggregator *) IO->Data; - - /* Find out if we've already seen this item */ - strcpy(Ctx->ThisMsg->ut.ut_msgid, - ChrPtr(Ctx->ThisMsg->MsgGUID)); /// TODO - Ctx->ThisMsg->ut.ut_timestamp = time(NULL); - - cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->ThisMsg->MsgGUID)); -#ifndef DEBUG_RSS - if (cdbut != NULL) { - /* Item has already been seen */ - EVRSSC_syslog(LOG_DEBUG, - "%s has already been seen\n", - ChrPtr(Ctx->ThisMsg->MsgGUID)); - cdb_free(cdbut); - - /* rewrite the record anyway, to update the timestamp */ - cdb_store(CDB_USETABLE, - SKEY(Ctx->ThisMsg->MsgGUID), - &Ctx->ThisMsg->ut, sizeof(struct UseTable) ); - - if (GetNextHashPos(Ctx->Messages, - Ctx->Pos, - &len, &Key, - (void**) &Ctx->ThisMsg)) - return NextDBOperation( - IO, - RSS_FetchNetworkUsetableEntry); - else - return eAbort; - } - else -#endif - { - NextDBOperation(IO, RSSSaveMessage); - return eSendMore; + struct rssurl *r; + struct rssroom *rr; + + while (rsstodo != NULL) { + rss_pull_one_feed(rsstodo); + r = rsstodo; + rsstodo = rsstodo->next; + while (r->rooms != NULL) { + rr = r->rooms; + r->rooms = r->rooms->next; + free(rr->room); + free(rr); + } + free(r->url); + free(r); } } -/* - * Begin a feed parse - */ -int rss_do_fetching(rss_aggregator *RSSAggr) -{ - AsyncIO *IO = &RSSAggr->IO; - rss_item *ri; - time_t now; - - now = time(NULL); - - if ((RSSAggr->next_poll != 0) && (now < RSSAggr->next_poll)) - return 0; - - ri = (rss_item*) malloc(sizeof(rss_item)); - memset(ri, 0, sizeof(rss_item)); - RSSAggr->Item = ri; - - if (! InitcURLIOStruct(&RSSAggr->IO, - RSSAggr, - "Citadel RSS Client", - RSSAggregator_ParseReply, - RSSAggregator_Terminate, - RSSAggregator_TerminateDB, - RSSAggregator_ShutdownAbort)) - { - EVRSSCM_syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); - return 0; - } - safestrncpy(((CitContext*)RSSAggr->IO.CitContext)->cs_host, - ChrPtr(RSSAggr->Url), - sizeof(((CitContext*)RSSAggr->IO.CitContext)->cs_host)); - - EVRSSC_syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(RSSAggr->Url)); - ParseURL(&RSSAggr->IO.ConnectMe, RSSAggr->Url, 80); - CurlPrepareURL(RSSAggr->IO.ConnectMe); - - QueueCurlContext(&RSSAggr->IO); - return 1; -} - -/* - * Scan a room's netconfig to determine whether it is requesting any RSS feeds - */ +// Scan a room's netconfig looking for RSS feed parsing requests +// void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) { - StrBuf *CfgData=NULL; - StrBuf *CfgType; - StrBuf *Line; - rss_room_counter *Count = NULL; - struct stat statbuf; - char filename[PATH_MAX]; - int fd; - int Done; - rss_aggregator *RSSAggr = NULL; - rss_aggregator *use_this_RSSAggr = NULL; - void *vptr; - const char *CfgPtr, *lPtr; - const char *Err; - - pthread_mutex_lock(&RSSQueueMutex); - if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr)) - { - EVRSSQ_syslog(LOG_DEBUG, - "rssclient: [%ld] %s already in progress.\n", - qrbuf->QRnumber, - qrbuf->QRname); - pthread_mutex_unlock(&RSSQueueMutex); - return; - } - pthread_mutex_unlock(&RSSQueueMutex); - - assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); - - if (server_shutting_down) - return; - - /* Only do net processing for rooms that have netconfigs */ - fd = open(filename, 0); - if (fd <= 0) { - /* syslog(LOG_DEBUG, - "rssclient: %s no config.\n", - qrbuf->QRname); */ - return; - } - - if (server_shutting_down) - return; - - if (fstat(fd, &statbuf) == -1) { - EVRSSQ_syslog(LOG_DEBUG, - "ERROR: could not stat configfile '%s' - %s\n", - filename, - strerror(errno)); - return; - } - - if (server_shutting_down) - return; - - CfgData = NewStrBufPlain(NULL, statbuf.st_size + 1); + char *serialized_config = NULL; + int num_configs = 0; + char cfgline[SIZ]; + int i = 0; - if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) { - close(fd); - FreeStrBuf(&CfgData); - EVRSSQ_syslog(LOG_ERR, "ERROR: reading config '%s' - %s
\n", - filename, strerror(errno)); + serialized_config = LoadRoomNetConfigFile(qrbuf->QRnumber); + if (!serialized_config) { return; } - close(fd); - if (server_shutting_down) - return; - CfgPtr = NULL; - CfgType = NewStrBuf(); - Line = NewStrBufPlain(NULL, StrLength(CfgData)); - Done = 0; - while (!Done) - { - Done = StrBufSipLine(Line, CfgData, &CfgPtr) == 0; - if (StrLength(Line) > 0) - { - lPtr = NULL; - StrBufExtract_NextToken(CfgType, Line, &lPtr, '|'); - if (!strcasecmp("rssclient", ChrPtr(CfgType))) - { - if (Count == NULL) - { - Count = malloc( - sizeof(rss_room_counter)); - Count->count = 0; - } - Count->count ++; - RSSAggr = (rss_aggregator *) malloc( - sizeof(rss_aggregator)); - - memset (RSSAggr, 0, sizeof(rss_aggregator)); - RSSAggr->QRnumber = qrbuf->QRnumber; - RSSAggr->roomlist_parts = 1; - RSSAggr->Url = NewStrBuf(); - - StrBufExtract_NextToken(RSSAggr->Url, - Line, - &lPtr, - '|'); - - pthread_mutex_lock(&RSSQueueMutex); - GetHash(RSSFetchUrls, - SKEY(RSSAggr->Url), - &vptr); - - use_this_RSSAggr = (rss_aggregator *)vptr; - if (use_this_RSSAggr != NULL) - { - long *QRnumber; - StrBufAppendBufPlain( - use_this_RSSAggr->rooms, - qrbuf->QRname, - -1, 0); - if (use_this_RSSAggr->roomlist_parts==1) - { - use_this_RSSAggr->OtherQRnumbers - = NewHash(1, lFlathash); - } - QRnumber = (long*)malloc(sizeof(long)); - *QRnumber = qrbuf->QRnumber; - Put(use_this_RSSAggr->OtherQRnumbers, - LKEY(qrbuf->QRnumber), - QRnumber, - NULL); - use_this_RSSAggr->roomlist_parts++; - - pthread_mutex_unlock(&RSSQueueMutex); - - FreeStrBuf(&RSSAggr->Url); - free(RSSAggr); - RSSAggr = NULL; - continue; - } - pthread_mutex_unlock(&RSSQueueMutex); - - RSSAggr->ItemType = RSS_UNSET; - - RSSAggr->rooms = NewStrBufPlain( - qrbuf->QRname, -1); - - pthread_mutex_lock(&RSSQueueMutex); - - Put(RSSFetchUrls, - SKEY(RSSAggr->Url), - RSSAggr, - DeleteRssCfg); - - pthread_mutex_unlock(&RSSQueueMutex); + num_configs = num_tokens(serialized_config, '\n'); + for (i=0; iQRname); } } - if (Count != NULL) - { - Count->QRnumber = qrbuf->QRnumber; - pthread_mutex_lock(&RSSQueueMutex); - EVRSSQ_syslog(LOG_DEBUG, "client: [%ld] %s now starting.\n", - qrbuf->QRnumber, qrbuf->QRname); - Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL); - pthread_mutex_unlock(&RSSQueueMutex); - } - FreeStrBuf(&CfgData); - FreeStrBuf(&CfgType); - FreeStrBuf(&Line); + + free(serialized_config); } + /* * Scan for rooms that have RSS client requests configured */ void rssclient_scan(void) { - int RSSRoomCount, RSSCount; - rss_aggregator *rptr = NULL; - void *vrptr = NULL; - HashPos *it; - long len; - const char *Key; time_t now = time(NULL); /* Run no more than once every 15 minutes. */ if ((now - last_run) < 900) { - EVRSSQ_syslog(LOG_DEBUG, + syslog(LOG_DEBUG, "Client: polling interval not yet reached; last run was %ldm%lds ago", ((now - last_run) / 60), ((now - last_run) % 60) @@ -533,68 +218,27 @@ void rssclient_scan(void) { return; } - /* - * This is a simple concurrency check to make sure only one rssclient - * run is done at a time. - */ - pthread_mutex_lock(&RSSQueueMutex); - RSSCount = GetCount(RSSFetchUrls); - RSSRoomCount = GetCount(RSSQueueRooms); - pthread_mutex_unlock(&RSSQueueMutex); - - if ((RSSRoomCount > 0) || (RSSCount > 0)) { - EVRSSQ_syslog(LOG_DEBUG, - "rssclient: concurrency check failed; %d rooms and %d url's are queued", - RSSRoomCount, RSSCount - ); - return; - } - become_session(&rss_CC); - EVRSSQM_syslog(LOG_DEBUG, "rssclient started\n"); + syslog(LOG_DEBUG, "rssclient started"); CtdlForEachRoom(rssclient_scan_room, NULL); - - pthread_mutex_lock(&RSSQueueMutex); - - it = GetNewHashPos(RSSFetchUrls, 0); - while (!server_shutting_down && - GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && - (vrptr != NULL)) { - rptr = (rss_aggregator *)vrptr; - if (!rss_do_fetching(rptr)) - UnlinkRSSAggregator(rptr); - } - DeleteHashPos(&it); - pthread_mutex_unlock(&RSSQueueMutex); - - EVRSSQM_syslog(LOG_DEBUG, "rssclient ended\n"); + rss_pull_feeds(); + syslog(LOG_DEBUG, "rssclient ended"); + last_run = time(NULL); return; } -void rss_cleanup(void) -{ - /* citthread_mutex_destroy(&RSSQueueMutex); TODO */ - DeleteHash(&RSSFetchUrls); - DeleteHash(&RSSQueueRooms); -} - -void LogDebugEnableRSSClient(const int n) -{ - RSSClientDebugEnabled = n; -} CTDL_MODULE_INIT(rssclient) { - if (threading) + if (!threading) + { + syslog(LOG_INFO, "%s", curl_version()); + CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300); + } + else { CtdlFillSystemContext(&rss_CC, "rssclient"); - pthread_mutex_init(&RSSQueueMutex, NULL); - RSSQueueRooms = NewHash(1, lFlathash); - RSSFetchUrls = NewHash(1, NULL); - syslog(LOG_INFO, "%s\n", curl_version()); - CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); - CtdlRegisterEVCleanupHook(rss_cleanup); - CtdlRegisterDebugFlagHook(HKEY("rssclient"), LogDebugEnableRSSClient, &RSSClientDebugEnabled); } return "rssclient"; } +