X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=530bed03fe6f689b8bb96eb6af547744f980f33c;hb=c855d497545dad80942a194624c111a54cd1fdc7;hp=b2cd3747f92ccfad92bbe97544b0fdbdd507882b;hpb=c06d1f8100fedb4b90c04ab6aefca49262d7a48e;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index b2cd3747f..530bed03f 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -48,264 +48,35 @@ #include "config.h" #include "threads.h" #include "ctdl_module.h" -#include "clientsocket.h" #include "msgbase.h" #include "parsedate.h" #include "database.h" #include "citadel_dirs.h" #include "md5.h" #include "context.h" +#include "event_client.h" +#include "rss_atom_parser.h" -typedef struct rssnetcfg rssnetcfg; -struct rssnetcfg { - rssnetcfg *next; - char url[256]; - char *rooms; - time_t last_error_when; - int ItemType; -}; - -#define RSS_UNSET (1<<0) -#define RSS_RSS (1<<1) -#define RSS_ATOM (1<<2) -#define RSS_REQUIRE_BUF (1<<3) - -typedef struct _rss_item { - char *roomlist; - int done_parsing; - StrBuf *guid; - StrBuf *title; - StrBuf *link; - StrBuf *linkTitle; - StrBuf *reLink; - StrBuf *reLinkTitle; - StrBuf *description; - time_t pubdate; - StrBuf *channel_title; - int item_tag_nesting; - StrBuf *author_or_creator; - StrBuf *author_url; - StrBuf *author_email; -}rss_item; - - -typedef void (*rss_handler_func)(StrBuf *CData, - rss_item *ri, - rssnetcfg *Cfg, - const char** Attr); - -typedef struct __rss_xml_handler { - int Flags; - rss_handler_func Handler; -}rss_xml_handler; - - -typedef struct _rsscollection { - StrBuf *CData; - StrBuf *Key; - - rss_item *Item; - rssnetcfg *Cfg; - - rss_xml_handler *Current; -} rsscollection; -struct rssnetcfg *rnclist = NULL; -HashList *StartHandlers; -HashList *EndHandlers; -HashList *KnownNameSpaces; -void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len) -{ - rss_xml_handler *h; - h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler)); - h->Flags = Flags; - h->Handler = Handler; - Put(StartHandlers, key, len, h, NULL); -} -void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len) -{ - rss_xml_handler *h; - h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler)); - h->Flags = Flags; - h->Handler = Handler; - Put(EndHandlers, key, len, h, NULL); -} +#define TMP_MSGDATA 0xFF +#define TMP_SHORTER_URL_OFFSET 0xFE +#define TMP_SHORTER_URLS 0xFD -///#if 0 -//#ifdef HAVE_ICONV -#include +time_t last_run = 0L; +pthread_mutex_t RSSQueueMutex; /* locks the access to the following vars: */ +HashList *RSSQueueRooms = NULL; /* rss_room_counter */ +HashList *RSSFetchUrls = NULL; /*->rss_aggregator;->RefCount access locked*/ -/* - * dug this out of the trashcan of the midgard project, lets see if it works for us. - * original code by Alexander Bokovoy distributed under GPL V2 or later - */ +eNextState RSSAggregator_Terminate(AsyncIO *IO); +eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO); +struct CitContext rss_CC; -/* Returns: - >= 0 - successfull, 0 means conversion doesn't use multibyte sequences - -1 - error during iconv_open call - -2 - error during iconv_close call - ---------------------------------- - This function expects that multibyte encoding in 'charset' wouldn't have - characters with more than 3 bytes. It is not intended to convert UTF-8 because - we'll never receive UTF-8 in our handler (it is handled by Exat itself). -*/ -static int -fill_encoding_info (const char *charset, XML_Encoding * info) -{ - iconv_t cd = (iconv_t)(-1); - int flag; - CtdlLogPrintf(0, "RSS: fill encoding info ...\n"); - -#if G_BYTE_ORDER == G_LITTLE_ENDIAN - cd = iconv_open ("UCS-2LE", charset); -#else - cd = iconv_open ("UCS-2BE", charset); -#endif - - if (cd == (iconv_t) (-1)) - { - return -1; - } - - { - unsigned short out; - unsigned char buf[4]; - unsigned int i0, i1, i2; - int result; - flag = 0; - for (i0 = 0; i0 < 0x100; i0++) - { - buf[0] = i0; - info->map[i0] = 0; - //result = try (cd, buf, 1, &out); - if (result < 0) - { - } - else if (result > 0) - { - info->map[i0] = out; - } - else - { - for (i1 = 0; i1 < 0x100; i1++) - { - buf[1] = i1; - ///result = try (cd, buf, 2, &out); - if (result < 0) - { - } - else if (result > 0) - { - flag++; - info->map[i0] = -2; - } - else - { - for (i2 = 0; i2 < 0x100; i2++) - { - buf[2] = i2; - ////result = try (cd, buf, 3, &out); - if (result < 0) - { - } - else if (result > 0) - { - flag++; - info->map[i0] = -3; - } - } - } - } - } - } - } - - if (iconv_close (cd) < 0) - { - return -2; - } - return flag; -} - -static int -iconv_convertor (void *data, const char *s) -{ - XML_Encoding *info = data; - int res; - CtdlLogPrintf(0, "RSS: Converting ...\n"); - - if (s == NULL) - return -1; -/* - GByteArray *result; - result = g_byte_array_new (); - if (process_block (info->data, (char *) s, strlen (s), result) == 0) - { - res = *(result->data); - g_byte_array_free (result, TRUE); - return res; - } - g_byte_array_free (result, TRUE); -*/ - return -1; -} - -static void -my_release (void *data) -{ - iconv_t cd = (iconv_t) data; - if (iconv_close (cd) != 0) - { -/// TODO: uh no. exit (1); - } -} -int -handle_unknown_xml_encoding (void *encodingHandleData, - const XML_Char * name, - XML_Encoding * info) -{ - int result; - CtdlLogPrintf(0, "RSS: unknown encoding ...\n"); - result = fill_encoding_info (name, info); - if (result >= 0) - { - /* - Special case: client asked for reverse conversion, we'll provide him with - iconv descriptor which handles it. Client should release it by himself. - */ - if(encodingHandleData != NULL) - *((iconv_t *)encodingHandleData) = iconv_open(name, "UTF-8"); - /* - Optimization: we do not need conversion function if encoding is one-to-one, - info->map table will be enough - */ - if (result == 0) - { - info->data = NULL; - info->convert = NULL; - info->release = NULL; - return 1; - } - /* - We do need conversion function because this encoding uses multibyte sequences - */ - info->data = (void *) iconv_open ("UTF-8", name); - if ((int)info->data == -1) - return -1; - info->convert = iconv_convertor; - info->release = my_release; - return 1; - } - if(encodingHandleData != NULL) - *(iconv_t *)encodingHandleData = NULL; - return 0; -} - -///#endif -//#endif - -void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title) +struct rssnetcfg *rnclist = NULL; +void AppendLink(StrBuf *Message, + StrBuf *link, + StrBuf *LinkTitle, + const char *Title) { if (StrLength(link) > 0) { @@ -321,999 +92,641 @@ void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Ti StrBufAppendBufPlain(Message, HKEY("
\n"), 0); } } -/* - * Commit a fetched and parsed RSS item to disk - */ -void rss_save_item(rss_item *ri) -{ - - struct MD5Context md5context; - u_char rawdigest[MD5_DIGEST_LEN]; - int i; - char utmsgid[SIZ]; - struct cdbdata *cdbut; - struct UseTable ut; - struct CtdlMessage *msg; - struct recptypes *recp = NULL; - int msglen = 0; - StrBuf *Message; - - recp = (struct recptypes *) malloc(sizeof(struct recptypes)); - if (recp == NULL) return; - memset(recp, 0, sizeof(struct recptypes)); - memset(&ut, 0, sizeof(struct UseTable)); - recp->recp_room = strdup(ri->roomlist); - recp->num_room = num_tokens(ri->roomlist, '|'); - recp->recptypes_magic = RECPTYPES_MAGIC; - - /* Construct a GUID to use in the S_USETABLE table. - * If one is not present in the item itself, make one up. - */ - if (ri->guid != NULL) { - StrBufSpaceToBlank(ri->guid); - StrBufTrim(ri->guid); - snprintf(utmsgid, sizeof utmsgid, "rss/%s", ChrPtr(ri->guid)); - } - else { - MD5Init(&md5context); - if (ri->title != NULL) { - MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->title), StrLength(ri->title)); - } - if (ri->link != NULL) { - MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->link), StrLength(ri->link)); - } - MD5Final(rawdigest, &md5context); - for (i=0; idescription == NULL) ri->description = NewStrBufPlain(HKEY("")); - StrBufSpaceToBlank(ri->description); - msg = malloc(sizeof(struct CtdlMessage)); - memset(msg, 0, sizeof(struct CtdlMessage)); - msg->cm_magic = CTDLMESSAGE_MAGIC; - msg->cm_anon_type = MES_NORMAL; - msg->cm_format_type = FMT_RFC822; - - if (ri->guid != NULL) { - msg->cm_fields['E'] = strdup(ChrPtr(ri->guid)); - } - - if (ri->author_or_creator != NULL) { - char *From; - StrBuf *Encoded, *QPEncoded; - StrBuf *UserName; - StrBuf *EmailAddress; - StrBuf *EncBuf; - int FromAt; - int FromLen; - - UserName = NewStrBuf(); - EmailAddress = NewStrBuf(); - EncBuf = NewStrBuf(); -////TODO! - StrBufTrim(ri->author_or_creator); - From = html_to_ascii(ChrPtr(ri->author_or_creator), - StrLength(ri->author_or_creator), - 512, 0); - FromLen = strlen(From); - if (From[FromLen - 1] == '\n') - { - From[FromLen - 1] = '\0'; - } - FromAt = strchr(From, '@') != NULL; - if (!FromAt && StrLength (ri->author_email) > 0) - { - Encoded = NewStrBuf(); - if (!IsEmptyStr(From)) - { - StrBufPrintf(Encoded, - "%s<%s>", - From, - ChrPtr(ri->author_email)); - } - else - { - StrBufPrintf(Encoded, - "<%s>", - ChrPtr(ri->author_email)); - } - } - else - { - if (FromAt) - Encoded = NewStrBufPlain(From, -1); - else - { - Encoded = NewStrBuf(); - StrBufPrintf(Encoded, - "%s<%s>", - From, - "rss@localhost"); /// TODO: get hostname? - } - } - free(From); - StrBufTrim(Encoded); - QPEncoded = StrBufSanitizeEmailRecipientVector(Encoded, UserName, EmailAddress, EncBuf); - msg->cm_fields['A'] = SmashStrBuf(&QPEncoded); - - FreeStrBuf(&Encoded); - FreeStrBuf(&UserName); - FreeStrBuf(&EmailAddress); - FreeStrBuf(&EncBuf); - - } - else { - msg->cm_fields['A'] = strdup("rss"); - } - - msg->cm_fields['N'] = strdup(NODENAME); - if (ri->title != NULL) { - long len; - char *Sbj; - StrBuf *Encoded, *QPEncoded; - - QPEncoded = NULL; - StrBufSpaceToBlank(ri->title); - len = StrLength(ri->title); - Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); - len = strlen(Sbj); - if (Sbj[len - 1] == '\n') - { - len --; - Sbj[len] = '\0'; - } - Encoded = NewStrBufPlain(Sbj, len); - free(Sbj); - - StrBufTrim(Encoded); - StrBufRFC2047encode(&QPEncoded, Encoded); - - msg->cm_fields['U'] = SmashStrBuf(&QPEncoded); - FreeStrBuf(&Encoded); - } - msg->cm_fields['T'] = malloc(64); - snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate); - if (ri->channel_title != NULL) { - if (StrLength(ri->channel_title) > 0) { - msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title)); - } - } - if (ri->link == NULL) - ri->link = NewStrBufPlain(HKEY("")); - msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; - - Message = NewStrBufPlain(NULL, StrLength(ri->description)); - - StrBufPlain(Message, HKEY( - "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" - "\n")); - - StrBufAppendBuf(Message, ri->description, 0); - StrBufAppendBufPlain(Message, HKEY("

\n"), 0); - - AppendLink(Message, ri->link, ri->linkTitle, NULL); - AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this"); - StrBufAppendBufPlain(Message, HKEY("\n"), 0); - - msg->cm_fields['M'] = SmashStrBuf(&Message); - - CtdlSubmitMsg(msg, recp, NULL, 0); - CtdlFreeMessage(msg); - - /* write the uidl to the use table so we don't store this item again */ - strcpy(ut.ut_msgid, utmsgid); - ut.ut_timestamp = time(NULL); - cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) ); - } - free_recipients(recp); -} - -/* - * Convert an RDF/RSS datestamp into a time_t - */ -time_t rdf_parsedate(const char *p) +void DeleteRoomReference(long QRnumber) { - struct tm tm; - time_t t = 0; - - if (!p) return 0L; - if (strlen(p) < 10) return 0L; + HashPos *At; + long HKLen; + const char *HK; + void *vData = NULL; + rss_room_counter *pRoomC; - memset(&tm, 0, sizeof tm); + At = GetNewHashPos(RSSQueueRooms, 0); - /* - * If the timestamp appears to be in W3C datetime format, try to - * parse it. See also: http://www.w3.org/TR/NOTE-datetime - * - * This code, along with parsedate.c, is a potential candidate for - * moving into libcitadel. - */ - if ( (p[4] == '-') && (p[7] == '-') ) { - tm.tm_year = atoi(&p[0]) - 1900; - tm.tm_mon = atoi(&p[5]) - 1; - tm.tm_mday = atoi(&p[8]); - if ( (p[10] == 'T') && (p[13] == ':') ) { - tm.tm_hour = atoi(&p[11]); - tm.tm_min = atoi(&p[14]); + if (GetHashPosFromKey(RSSQueueRooms, LKEY(QRnumber), At)) + { + GetHashPos(RSSQueueRooms, At, &HKLen, &HK, &vData); + if (vData != NULL) + { + pRoomC = (rss_room_counter *) vData; + pRoomC->count --; + if (pRoomC->count == 0) + DeleteEntryFromHash(RSSQueueRooms, At); } - return mktime(&tm); } - - /* hmm... try RFC822 date stamp format */ - - t = parsedate(p); - if (t > 0) return(t); - - /* yeesh. ok, just return the current date and time. */ - return(time(NULL)); -} - -void flush_rss_item(rss_item *ri) -{ - /* Initialize the feed item data structure */ - FreeStrBuf(&ri->guid); - FreeStrBuf(&ri->title); - FreeStrBuf(&ri->link); - FreeStrBuf(&ri->author_or_creator); - FreeStrBuf(&ri->author_email); - FreeStrBuf(&ri->author_url); - FreeStrBuf(&ri->description); + DeleteHashPos(&At); } -void rss_xml_start(void *data, const char *supplied_el, const char **attr) +void UnlinkRooms(rss_aggregator *Cfg) { - rss_xml_handler *h; - rsscollection *rssc = (rsscollection*) data; - rssnetcfg *Cfg = rssc->Cfg; - rss_item *ri = rssc->Item; - void *pv; - const char *pel; - char *sep = NULL; - - /* Axe the namespace, we don't care about it */ -/// CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el); - pel = supplied_el; - while (sep = strchr(pel, ':'), sep) { - pel = sep + 1; - } - - if (pel != supplied_el) + DeleteRoomReference(Cfg->QRnumber); + if (Cfg->OtherQRnumbers != NULL) { - void *v; - - if (!GetHash(KnownNameSpaces, - supplied_el, - pel - supplied_el - 1, - &v)) + long HKLen; + const char *HK; + HashPos *At; + void *vData; + + At = GetNewHashPos(Cfg->OtherQRnumbers, 0); + while (! server_shutting_down && + GetNextHashPos(Cfg->OtherQRnumbers, + At, + &HKLen, &HK, + &vData) && + (vData != NULL)) { -#ifdef DEBUG_RSS - CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", - supplied_el); -#endif - return; + long *lData = (long*) vData; + DeleteRoomReference(*lData); } - } - - StrBufPlain(rssc->Key, pel, -1); - StrBufLowerCase(rssc->Key); - if (GetHash(StartHandlers, SKEY(rssc->Key), &pv)) - { - rssc->Current = h = (rss_xml_handler*) pv; - if (((h->Flags & RSS_UNSET) != 0) && - (Cfg->ItemType == RSS_UNSET)) - { - h->Handler(rssc->CData, ri, Cfg, attr); - } - else if (((h->Flags & RSS_RSS) != 0) && - (Cfg->ItemType == RSS_RSS)) - { - h->Handler(rssc->CData, ri, Cfg, attr); - } - else if (((h->Flags & RSS_ATOM) != 0) && - (Cfg->ItemType == RSS_ATOM)) - { - h->Handler(rssc->CData, ri, Cfg, attr); - } -#ifdef DEBUG_RSS - else - CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); -#endif + DeleteHashPos(&At); } -#ifdef DEBUG_RSS - else - CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); -#endif } -void rss_xml_end(void *data, const char *supplied_el) +void UnlinkRSSAggregator(rss_aggregator *Cfg) { - rss_xml_handler *h; - rsscollection *rssc = (rsscollection*) data; - rssnetcfg *Cfg = rssc->Cfg; - rss_item *ri = rssc->Item; - const char *pel; - char *sep = NULL; - void *pv; - - /* Axe the namespace, we don't care about it */ - pel = supplied_el; - while (sep = strchr(pel, ':'), sep) { - pel = sep + 1; - } -// CtdlLogPrintf(0, "RSS: END %s...\n", el); - if (pel != supplied_el) - { - void *v; - - if (!GetHash(KnownNameSpaces, - supplied_el, - pel - supplied_el - 1, - &v)) - { -#ifdef DEBUG_RSS - CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", - supplied_el, ChrPtr(rssc->CData)); -#endif - FlushStrBuf(rssc->CData); - return; - } - } + HashPos *At; - StrBufPlain(rssc->Key, pel, -1); - StrBufLowerCase(rssc->Key); - if (GetHash(EndHandlers, SKEY(rssc->Key), &pv)) - { - h = (rss_xml_handler*) pv; + UnlinkRooms(Cfg); - if (((h->Flags & RSS_UNSET) != 0) && - (Cfg->ItemType == RSS_UNSET)) - { - h->Handler(rssc->CData, ri, Cfg, NULL); - } - else if (((h->Flags & RSS_RSS) != 0) && - (Cfg->ItemType == RSS_RSS)) - { - h->Handler(rssc->CData, ri, Cfg, NULL); - } - else if (((h->Flags & RSS_ATOM) != 0) && - (Cfg->ItemType == RSS_ATOM)) - { - h->Handler(rssc->CData, ri, Cfg, NULL); - } -#ifdef DEBUG_RSS - else - CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); -#endif + At = GetNewHashPos(RSSFetchUrls, 0); + if (GetHashPosFromKey(RSSFetchUrls, SKEY(Cfg->Url), At)) + { + DeleteEntryFromHash(RSSFetchUrls, At); } -#ifdef DEBUG_RSS - else - CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); -#endif - FlushStrBuf(rssc->CData); - rssc->Current = NULL; + DeleteHashPos(&At); + last_run = time(NULL); } - - - - -void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void FreeNetworkSaveMessage (void *vMsg) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RSS feed.\n"); - Cfg->ItemType = RSS_RSS; -} + networker_save_message *Msg = (networker_save_message *) vMsg; -void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RDF feed.\n"); - Cfg->ItemType = RSS_RSS; + CtdlFreeMessageContents(&Msg->Msg); + FreeStrBuf(&Msg->Message); + FreeStrBuf(&Msg->MsgGUID); + free(Msg); } -void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +eNextState AbortNetworkSaveMessage (AsyncIO *IO) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an ATOM feed.\n"); - Cfg->ItemType = RSS_ATOM; + return eAbort; ///TODO } - -void RSS_item_item_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +eNextState RSSSaveMessage(AsyncIO *IO) { - ri->item_tag_nesting ++; - flush_rss_item(ri); -} + long len; + const char *Key; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; -void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ -/* Atom feed... */ - ri->item_tag_nesting ++; - flush_rss_item(ri); -} + Ctx->ThisMsg->Msg.cm_fields['M'] = SmashStrBuf(&Ctx->ThisMsg->Message); -void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - int i; - const char *pHref = NULL; - const char *pType = NULL; - const char *pRel = NULL; - const char *pTitle = NULL; + CtdlSubmitMsg(&Ctx->ThisMsg->Msg, &Ctx->recp, NULL, 0); - for (i = 0; Attr[i] != NULL; i+=2) - { - if (!strcmp(Attr[i], "href")) - { - pHref = Attr[i+1]; - } - else if (!strcmp(Attr[i], "rel")) - { - pRel = Attr[i+1]; - } - else if (!strcmp(Attr[i], "type")) - { - pType = Attr[i+1]; - } - else if (!strcmp(Attr[i], "title")) - { - pTitle = Attr[i+1]; - } - } - if (pHref == NULL) - return; /* WHUT? Pointing... where? */ - if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml")) - return; /* these just point to other rss resources, we're not interested in them. */ - if (pRel != NULL) - { - if (!strcasecmp (pRel, "replies")) - { - NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1); - StrBufTrim(ri->link); - NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1); - } - else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */ - { - NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1); - StrBufTrim(ri->link); - NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1); + /* write the uidl to the use table so we don't store this item again */ + cdb_store(CDB_USETABLE, + SKEY(Ctx->ThisMsg->MsgGUID), + &Ctx->ThisMsg->ut, + sizeof(struct UseTable) ); - } -#if 0 /* these are also defined, but dunno what to do with them.. */ - else if (!strcasecmp(pRel, "related")) - { - } - else if (!strcasecmp(pRel, "self")) - { - } - else if (!strcasecmp(pRel, "enclosure")) - {/* this reference can get big, and is probably the full article... */ - } - else if (!strcasecmp(pRel, "via")) - {/* this article was provided via... */ - } -#endif - } - else if (StrLength(ri->link) == 0) - { - NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1); - StrBufTrim(ri->link); - NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1); - } + if (GetNextHashPos(Ctx->Messages, + Ctx->Pos, + &len, &Key, + (void**) &Ctx->ThisMsg)) + return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry); + else + return eAbort; } +eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO) +{ + const char *Key; + long len; + struct cdbdata *cdbut; + rss_aggregator *Ctx = (rss_aggregator *) IO->Data; + /* Find out if we've already seen this item */ + strcpy(Ctx->ThisMsg->ut.ut_msgid, + ChrPtr(Ctx->ThisMsg->MsgGUID)); /// TODO + Ctx->ThisMsg->ut.ut_timestamp = time(NULL); + cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->ThisMsg->MsgGUID)); +#ifndef DEBUG_RSS + if (cdbut != NULL) { + /* Item has already been seen */ + EV_syslog(LOG_DEBUG, + "%s has already been seen\n", + ChrPtr(Ctx->ThisMsg->MsgGUID)); + cdb_free(cdbut); -void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) { - NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0); - StrBufTrim(ri->channel_title); + /* rewrite the record anyway, to update the timestamp */ + cdb_store(CDB_USETABLE, + SKEY(Ctx->ThisMsg->MsgGUID), + &Ctx->ThisMsg->ut, sizeof(struct UseTable) ); + + if (GetNextHashPos(Ctx->Messages, + Ctx->Pos, + &len, &Key, + (void**) &Ctx->ThisMsg)) + return NextDBOperation( + IO, + RSS_FetchNetworkUsetableEntry); + else + return eAbort; } -} - -void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0); + else +#endif + { + NextDBOperation(IO, RSSSaveMessage); + return eSendMore; } } -void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +/* + * Commit a fetched and parsed RSS item to disk + */ +void rss_save_item(rss_item *ri, rss_aggregator *Cfg) { - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0); - } -} + networker_save_message *SaveMsg; + struct MD5Context md5context; + u_char rawdigest[MD5_DIGEST_LEN]; + int msglen = 0; + StrBuf *Message; + StrBuf *guid; + AsyncIO *IO = &Cfg->IO; + int n; -void RSS_item_link_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0); - StrBufTrim(ri->link); - } -} -void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0); - StrBufTrim(ri->reLink); - } -} + SaveMsg = (networker_save_message *) malloc( + sizeof(networker_save_message)); + memset(SaveMsg, 0, sizeof(networker_save_message)); -void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0); - StrBufTrim(ri->title); + /* Construct a GUID to use in the S_USETABLE table. + * If one is not present in the item itself, make one up. + */ + if (ri->guid != NULL) { + StrBufSpaceToBlank(ri->guid); + StrBufTrim(ri->guid); + guid = NewStrBufPlain(HKEY("rss/")); + StrBufAppendBuf(guid, ri->guid, 0); } -} - -void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - long olen = StrLength (ri->description); - long clen = StrLength (CData); - if (clen > 0) - { - if (olen == 0) { - NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); - StrBufTrim(ri->description); + else { + MD5Init(&md5context); + if (ri->title != NULL) { + MD5Update(&md5context, + (const unsigned char*)SKEY(ri->title)); } - else if (olen < clen) { - FlushStrBuf(ri->description); - NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); - StrBufTrim(ri->description); + if (ri->link != NULL) { + MD5Update(&md5context, + (const unsigned char*)SKEY(ri->link)); } + MD5Final(rawdigest, &md5context); + guid = NewStrBufPlain(NULL, + MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/); + StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN); + StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0); } -} -void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */ - if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0)) - { - NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); - StrBufTrim(ri->description); - } -} -void RSS_item_description_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - long olen = StrLength (ri->description); - long clen = StrLength (CData); - if (clen > 0) - { - if (olen == 0) { - NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); - StrBufTrim(ri->description); - } - else if (olen < clen) { - FlushStrBuf(ri->description); - NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0); - StrBufTrim(ri->description); - } - } -} + /* translate Item into message. */ + EVM_syslog(LOG_DEBUG, "RSS: translating item...\n"); + if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY("")); + StrBufSpaceToBlank(ri->description); + SaveMsg->Msg.cm_magic = CTDLMESSAGE_MAGIC; + SaveMsg->Msg.cm_anon_type = MES_NORMAL; + SaveMsg->Msg.cm_format_type = FMT_RFC822; -void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - StrBufTrim(CData); - ri->pubdate = rdf_parsedate(ChrPtr(CData)); + if (ri->guid != NULL) { + SaveMsg->Msg.cm_fields['E'] = strdup(ChrPtr(ri->guid)); } -} -void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - StrBufTrim(CData); - ri->pubdate = rdf_parsedate(ChrPtr(CData)); - } -} + if (ri->author_or_creator != NULL) { + char *From; + StrBuf *Encoded = NULL; + int FromAt; -void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - StrBufTrim(CData); - ri->pubdate = rdf_parsedate(ChrPtr(CData)); - } -} + From = html_to_ascii(ChrPtr(ri->author_or_creator), + StrLength(ri->author_or_creator), + 512, 0); + StrBufPlain(ri->author_or_creator, From, -1); + StrBufTrim(ri->author_or_creator); + free(From); + FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL; + if (!FromAt && StrLength (ri->author_email) > 0) + { + StrBufRFC2047encode(&Encoded, ri->author_or_creator); + SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded); + SaveMsg->Msg.cm_fields['P'] = + SmashStrBuf(&ri->author_email); + } + else + { + if (FromAt) + { + SaveMsg->Msg.cm_fields['A'] = + SmashStrBuf(&ri->author_or_creator); + SaveMsg->Msg.cm_fields['P'] = + strdup(SaveMsg->Msg.cm_fields['A']); + } + else + { + StrBufRFC2047encode(&Encoded, + ri->author_or_creator); + SaveMsg->Msg.cm_fields['A'] = + SmashStrBuf(&Encoded); + SaveMsg->Msg.cm_fields['P'] = + strdup("rss@localhost"); -void RSS_item_date_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - StrBufTrim(CData); - ri->pubdate = rdf_parsedate(ChrPtr(CData)); + } + if (ri->pubdate <= 0) { + ri->pubdate = time(NULL); + } + } } -} - - - -void RSS_item_author_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); - StrBufTrim(ri->author_or_creator); + else { + SaveMsg->Msg.cm_fields['A'] = strdup("rss"); } -} + SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME); + if (ri->title != NULL) { + long len; + char *Sbj; + StrBuf *Encoded, *QPEncoded; -void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); - StrBufTrim(ri->author_or_creator); - } -} + QPEncoded = NULL; + StrBufSpaceToBlank(ri->title); + len = StrLength(ri->title); + Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0); + len = strlen(Sbj); + if (Sbj[len - 1] == '\n') + { + len --; + Sbj[len] = '\0'; + } + Encoded = NewStrBufPlain(Sbj, len); + free(Sbj); -void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0); - StrBufTrim(ri->author_email); - } -} + StrBufTrim(Encoded); + StrBufRFC2047encode(&QPEncoded, Encoded); -void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if ((StrLength(CData) > 0) && - (StrLength(ri->author_or_creator) == 0)) - { - NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); - StrBufTrim(ri->author_or_creator); + SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded); + FreeStrBuf(&Encoded); } -} - - -void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - if (StrLength(CData) > 0) { - NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0); - StrBufTrim(ri->author_url); + SaveMsg->Msg.cm_fields['T'] = malloc(64); + snprintf(SaveMsg->Msg.cm_fields['T'], 64, "%ld", ri->pubdate); + if (ri->channel_title != NULL) { + if (StrLength(ri->channel_title) > 0) { + SaveMsg->Msg.cm_fields['O'] = + strdup(ChrPtr(ri->channel_title)); + } } -} + if (ri->link == NULL) + ri->link = NewStrBufPlain(HKEY("")); -void RSS_item_item_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - --ri->item_tag_nesting; - rss_save_item(ri); -} +#if 0 /* temporarily disable shorter urls. */ + SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] = + GetShorterUrls(ri->description); +#endif + msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ; -void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ - --ri->item_tag_nesting; - rss_save_item(ri); -} + Message = NewStrBufPlain(NULL, StrLength(ri->description)); -void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ -// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); - ri->done_parsing = 1; - -} -void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ -// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); - ri->done_parsing = 1; -} + StrBufPlain(Message, HKEY( + "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n" + "\n")); +#if 0 /* disable shorter url for now. */ + SaveMsg->Msg.cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message); +#endif + StrBufAppendBuf(Message, ri->description, 0); + StrBufAppendBufPlain(Message, HKEY("

\n"), 0); + AppendLink(Message, ri->link, ri->linkTitle, NULL); + AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this"); + StrBufAppendBufPlain(Message, HKEY("\n"), 0); -void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) -{ + SaveMsg->MsgGUID = guid; + SaveMsg->Message = Message; + + n = GetCount(Cfg->Messages) + 1; + Put(Cfg->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage); } /* - * This callback stores up the data which appears in between tags. + * Begin a feed parse */ -void rss_xml_cdata_start(void *data) +int rss_do_fetching(rss_aggregator *Cfg) { - rsscollection *rssc = (rsscollection*) data; + rss_item *ri; + time_t now; - FlushStrBuf(rssc->CData); -} + now = time(NULL); -void rss_xml_cdata_end(void *data) -{ -} -void rss_xml_chardata(void *data, const XML_Char *s, int len) -{ - rsscollection *rssc = (rsscollection*) data; + if ((Cfg->next_poll != 0) && (now < Cfg->next_poll)) + return 0; - StrBufAppendBufPlain (rssc->CData, s, len, 0); -} + ri = (rss_item*) malloc(sizeof(rss_item)); + memset(ri, 0, sizeof(rss_item)); + Cfg->Item = ri; -/* - * Callback function for passing libcurl's output to expat for parsing - */ -size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream) -{ - XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0); - return (size*nmemb); -} + if (! InitcURLIOStruct(&Cfg->IO, + Cfg, + "Citadel RSS Client", + RSSAggregator_ParseReply, + RSSAggregator_Terminate, + RSSAggregator_ShutdownAbort)) + { + syslog(LOG_ALERT, "Unable to initialize libcurl.\n"); + return 0; + } + safestrncpy(((CitContext*)Cfg->IO.CitContext)->cs_host, + ChrPtr(Cfg->Url), + sizeof(((CitContext*)Cfg->IO.CitContext)->cs_host)); + syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url)); + ParseURL(&Cfg->IO.ConnectMe, Cfg->Url, 80); + CurlPrepareURL(Cfg->IO.ConnectMe); -/* - * Begin a feed parse - */ -void rss_do_fetching(rssnetcfg *Cfg) { - rsscollection rssc; - rss_item ri; - XML_Parser xp; - StrBuf *Answer; - - CURL *curl; - CURLcode res; - char errmsg[1024] = ""; - char *ptr; - const char *at; - long len; + QueueCurlContext(&Cfg->IO); + return 1; +} - memset(&ri, 0, sizeof(rss_item)); - rssc.Item = &ri; - rssc.Cfg = Cfg; - CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", Cfg->url); +void DeleteRssCfg(void *vptr) +{ + rss_aggregator *rncptr = (rss_aggregator *)vptr; + AsyncIO *IO = &rncptr->IO; + EVM_syslog(LOG_DEBUG, "RSS: destroying\n"); - curl = curl_easy_init(); - if (!curl) { - CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n"); - return; - } - Answer = NewStrBufPlain(NULL, SIZ); - - curl_easy_setopt(curl, CURLOPT_URL, Cfg->url); - curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); - curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, Answer); -// curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlFillStrBuf_callback); - curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errmsg); - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); -#ifdef CURLOPT_HTTP_CONTENT_DECODING - curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, 1); - curl_easy_setopt(curl, CURLOPT_ENCODING, ""); -#endif - curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL); - curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180); /* die after 180 seconds */ - if ( - (!IsEmptyStr(config.c_ip_addr)) - && (strcmp(config.c_ip_addr, "*")) - && (strcmp(config.c_ip_addr, "::")) - && (strcmp(config.c_ip_addr, "0.0.0.0")) - ) { - curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr); - } + FreeStrBuf(&rncptr->Url); + FreeStrBuf(&rncptr->rooms); + FreeStrBuf(&rncptr->CData); + FreeStrBuf(&rncptr->Key); + FreeStrBuf(&rncptr->IO.HttpReq.ReplyData); + DeleteHash(&rncptr->OtherQRnumbers); + FreeURL(&rncptr->IO.ConnectMe); - if (CtdlThreadCheckStop()) + DeleteHashPos (&rncptr->Pos); + DeleteHash (&rncptr->Messages); + if (rncptr->recp.recp_room != NULL) + free(rncptr->recp.recp_room); + + + if (rncptr->Item != NULL) { - curl_easy_cleanup(curl); - return; - } - - if (CtdlThreadCheckStop()) - goto shutdown ; + FreeStrBuf(&rncptr->Item->guid); + FreeStrBuf(&rncptr->Item->title); + FreeStrBuf(&rncptr->Item->link); + FreeStrBuf(&rncptr->Item->linkTitle); + FreeStrBuf(&rncptr->Item->reLink); + FreeStrBuf(&rncptr->Item->reLinkTitle); + FreeStrBuf(&rncptr->Item->description); + FreeStrBuf(&rncptr->Item->channel_title); + FreeStrBuf(&rncptr->Item->author_or_creator); + FreeStrBuf(&rncptr->Item->author_url); + FreeStrBuf(&rncptr->Item->author_email); - res = curl_easy_perform(curl); - if (res) { - CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", res, errmsg); + free(rncptr->Item); } + free(rncptr); +} - if (CtdlThreadCheckStop()) - goto shutdown ; +eNextState RSSAggregator_Terminate(AsyncIO *IO) +{ + rss_aggregator *rncptr = (rss_aggregator *)IO->Data; + EVM_syslog(LOG_DEBUG, "RSS: Terminating.\n"); + UnlinkRSSAggregator(rncptr); + return eAbort; +} +eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO) +{ + const char *pUrl; + rss_aggregator *rncptr = (rss_aggregator *)IO->Data; - memset(&ri, 0, sizeof(rss_item)); - ri.roomlist = Cfg->rooms; - rssc.CData = NewStrBufPlain(NULL, SIZ); - rssc.Key = NewStrBuf(); - at = NULL; - StrBufSipLine(rssc.Key, Answer, &at); - ptr = NULL; + pUrl = IO->ConnectMe->PlainUrl; + if (pUrl == NULL) + pUrl = ""; -#define encoding "encoding=\"" - ptr = strstr(ChrPtr(rssc.Key), encoding); - if (ptr != NULL) - { - char *pche; - - ptr += sizeof (encoding) - 1; - pche = strchr(ptr, '"'); - if (pche != NULL) - StrBufCutAt(rssc.Key, -1, pche); - else - ptr = "UTF-8"; - } - else - ptr = "UTF-8"; + EV_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl); - xp = XML_ParserCreateNS(ptr, ':'); - if (!xp) { - CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n"); - goto shutdown; - } - FlushStrBuf(rssc.Key); -//#ifdef HAVE_ICONV -#if 0 - XML_SetUnknownEncodingHandler(xp, - handle_unknown_xml_encoding, - &rssc); -#endif -//#endif - XML_SetElementHandler(xp, rss_xml_start, rss_xml_end); - XML_SetCharacterDataHandler(xp, rss_xml_chardata); - XML_SetUserData(xp, &rssc); - XML_SetCdataSectionHandler(xp, - rss_xml_cdata_start, - rss_xml_cdata_end); - - - len = StrLength(Answer); - ptr = SmashStrBuf(&Answer); - XML_Parse(xp, ptr, len, 0); - free (ptr); - if (ri.done_parsing == 0) - XML_Parse(xp, "", 0, 1); - - - CtdlLogPrintf(CTDL_ALERT, "RSS: XML Status [%s] \n", - XML_ErrorString( - XML_GetErrorCode(xp))); - -shutdown: - curl_easy_cleanup(curl); - XML_ParserFree(xp); - - flush_rss_item(&ri); - FreeStrBuf(&rssc.CData); - FreeStrBuf(&rssc.Key); + UnlinkRSSAggregator(rncptr); + return eAbort; } - /* * Scan a room's netconfig to determine whether it is requesting any RSS feeds */ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) { + StrBuf *CfgData=NULL; + StrBuf *CfgType; + StrBuf *Line; + rss_room_counter *Count = NULL; + struct stat statbuf; char filename[PATH_MAX]; - char buf[1024]; - char instr[32]; - FILE *fp; - char feedurl[256]; - rssnetcfg *rncptr = NULL; - rssnetcfg *use_this_rncptr = NULL; - int len = 0; - char *ptr = NULL; + int fd; + int Done; + rss_aggregator *rncptr = NULL; + rss_aggregator *use_this_rncptr = NULL; + void *vptr; + const char *CfgPtr, *lPtr; + const char *Err; + + pthread_mutex_lock(&RSSQueueMutex); + if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr)) + { + syslog(LOG_DEBUG, + "rssclient: [%ld] %s already in progress.\n", + qrbuf->QRnumber, + qrbuf->QRname); + pthread_mutex_unlock(&RSSQueueMutex); + return; + } + pthread_mutex_unlock(&RSSQueueMutex); assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir); - if (CtdlThreadCheckStop()) + if (server_shutting_down) return; - + /* Only do net processing for rooms that have netconfigs */ - fp = fopen(filename, "r"); - if (fp == NULL) { + fd = open(filename, 0); + if (fd <= 0) { + /* syslog(LOG_DEBUG, + "rssclient: %s no config.\n", + qrbuf->QRname); */ return; } - while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) { - buf[strlen(buf)-1] = 0; - - extract_token(instr, buf, 0, '|', sizeof instr); - if (!strcasecmp(instr, "rssclient")) { - - use_this_rncptr = NULL; - - extract_token(feedurl, buf, 1, '|', sizeof feedurl); + if (server_shutting_down) + return; - /* If any other rooms have requested the same feed, then we will just add this - * room to the target list for that client request. - */ - for (rncptr=rnclist; rncptr!=NULL; rncptr=rncptr->next) { - if (!strcmp(rncptr->url, feedurl)) { - use_this_rncptr = rncptr; - } - } + if (fstat(fd, &statbuf) == -1) { + syslog(LOG_DEBUG, + "ERROR: could not stat configfile '%s' - %s\n", + filename, + strerror(errno)); + return; + } - /* Otherwise create a new client request */ - if (use_this_rncptr == NULL) { - rncptr = (rssnetcfg *) malloc(sizeof(rssnetcfg)); - rncptr->ItemType = RSS_UNSET; - if (rncptr != NULL) { - rncptr->next = rnclist; - safestrncpy(rncptr->url, feedurl, sizeof rncptr->url); - rncptr->rooms = NULL; - rnclist = rncptr; - use_this_rncptr = rncptr; - } - } + if (server_shutting_down) + return; - /* Add the room name to the request */ - if (use_this_rncptr != NULL) { - if (use_this_rncptr->rooms == NULL) { - rncptr->rooms = strdup(qrbuf->QRname); - } - else { - len = strlen(use_this_rncptr->rooms) + strlen(qrbuf->QRname) + 5; - ptr = realloc(use_this_rncptr->rooms, len); - if (ptr != NULL) { - strcat(ptr, "|"); - strcat(ptr, qrbuf->QRname); - use_this_rncptr->rooms = ptr; - } - } - } - } + CfgData = NewStrBufPlain(NULL, statbuf.st_size + 1); + if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) { + close(fd); + FreeStrBuf(&CfgData); + syslog(LOG_DEBUG, "ERROR: reading config '%s' - %s
\n", + filename, strerror(errno)); + return; } + close(fd); + if (server_shutting_down) + return; - fclose(fp); - + CfgPtr = NULL; + CfgType = NewStrBuf(); + Line = NewStrBufPlain(NULL, StrLength(CfgData)); + Done = 0; + while (!Done) + { + Done = StrBufSipLine(Line, CfgData, &CfgPtr) == 0; + if (StrLength(Line) > 0) + { + lPtr = NULL; + StrBufExtract_NextToken(CfgType, Line, &lPtr, '|'); + if (!strcasecmp("rssclient", ChrPtr(CfgType))) + { + if (Count == NULL) + { + Count = malloc(sizeof(rss_room_counter)); + Count->count = 0; + } + Count->count ++; + rncptr = (rss_aggregator *) malloc(sizeof(rss_aggregator)); + memset (rncptr, 0, sizeof(rss_aggregator)); + rncptr->roomlist_parts = 1; + rncptr->Url = NewStrBuf(); + StrBufExtract_NextToken(rncptr->Url, Line, &lPtr, '|'); + + pthread_mutex_lock(&RSSQueueMutex); + GetHash(RSSFetchUrls, SKEY(rncptr->Url), &vptr); + use_this_rncptr = (rss_aggregator *)vptr; + if (use_this_rncptr != NULL) + { + long *QRnumber; + StrBufAppendBufPlain(use_this_rncptr->rooms, + qrbuf->QRname, + -1, 0); + if (use_this_rncptr->roomlist_parts == 1) + { + use_this_rncptr->OtherQRnumbers = + NewHash(1, lFlathash); + } + QRnumber = (long*)malloc(sizeof(long)); + *QRnumber = qrbuf->QRnumber; + Put(use_this_rncptr->OtherQRnumbers, + LKEY(qrbuf->QRnumber), + QRnumber, + NULL); + use_this_rncptr->roomlist_parts++; + + pthread_mutex_unlock(&RSSQueueMutex); + + FreeStrBuf(&rncptr->Url); + free(rncptr); + rncptr = NULL; + continue; + } + pthread_mutex_unlock(&RSSQueueMutex); + + rncptr->ItemType = RSS_UNSET; + + rncptr->rooms = NewStrBufPlain(qrbuf->QRname, -1); + + pthread_mutex_lock(&RSSQueueMutex); + Put(RSSFetchUrls, SKEY(rncptr->Url), rncptr, DeleteRssCfg); + pthread_mutex_unlock(&RSSQueueMutex); + } + } + } + if (Count != NULL) + { + Count->QRnumber = qrbuf->QRnumber; + pthread_mutex_lock(&RSSQueueMutex); + syslog(LOG_DEBUG, "rssclient: [%ld] %s now starting.\n", + qrbuf->QRnumber, qrbuf->QRname); + Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL); + pthread_mutex_unlock(&RSSQueueMutex); + } + FreeStrBuf(&CfgData); + FreeStrBuf(&CfgType); + FreeStrBuf(&Line); } /* * Scan for rooms that have RSS client requests configured */ -void rssclient_scan(void *args) { - static time_t last_run = 0L; +void rssclient_scan(void) { static int doing_rssclient = 0; - rssnetcfg *rptr = NULL; + rss_aggregator *rptr = NULL; + void *vrptr = NULL; + HashPos *it; + long len; + const char *Key; + + /* Run no more than once every 15 minutes. */ + if ((time(NULL) - last_run) < 900) { + return; + } /* - * This is a simple concurrency check to make sure only one rssclient run - * is done at a time. We could do this with a mutex, but since we + * This is a simple concurrency check to make sure only one rssclient + * run is done at a time. We could do this with a mutex, but since we * don't really require extremely fine granularity here, we'll do it * with a static variable instead. */ - if (doing_rssclient) return NULL; + if (doing_rssclient) return; doing_rssclient = 1; + if ((GetCount(RSSQueueRooms) > 0) || (GetCount(RSSFetchUrls) > 0)) + return; - CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n"); + become_session(&rss_CC); + syslog(LOG_DEBUG, "rssclient started\n"); CtdlForEachRoom(rssclient_scan_room, NULL); - while (rnclist != NULL && !CtdlThreadCheckStop()) { - rss_do_fetching(rnclist); - rptr = rnclist; - rnclist = rnclist->next; - if (rptr->rooms != NULL) free(rptr->rooms); - free(rptr); + pthread_mutex_lock(&RSSQueueMutex); + + it = GetNewHashPos(RSSFetchUrls, 0); + while (!server_shutting_down && + GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && + (vrptr != NULL)) { + rptr = (rss_aggregator *)vrptr; + if (!rss_do_fetching(rptr)) + UnlinkRSSAggregator(rptr); } + DeleteHashPos(&it); + pthread_mutex_unlock(&RSSQueueMutex); - CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n"); - last_run = time(NULL); + syslog(LOG_DEBUG, "rssclient ended\n"); doing_rssclient = 0; - return NULL; + return; +} + +void rss_cleanup(void) +{ + /* citthread_mutex_destroy(&RSSQueueMutex); TODO */ + DeleteHash(&RSSFetchUrls); + DeleteHash(&RSSQueueRooms); } @@ -1321,91 +734,13 @@ CTDL_MODULE_INIT(rssclient) { if (threading) { - CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version()); + CtdlFillSystemContext(&rss_CC, "rssclient"); + pthread_mutex_init(&RSSQueueMutex, NULL); + RSSQueueRooms = NewHash(1, lFlathash); + RSSFetchUrls = NewHash(1, NULL); + syslog(LOG_INFO, "%s\n", curl_version()); CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER); + CtdlRegisterCleanupHook(rss_cleanup); } - - StartHandlers = NewHash(1, NULL); - EndHandlers = NewHash(1, NULL); - - AddRSSStartHandler(RSS_item_rss_start, RSS_UNSET, HKEY("rss")); - AddRSSStartHandler(RSS_item_rdf_start, RSS_UNSET, HKEY("rdf")); - AddRSSStartHandler(ATOM_item_feed_start, RSS_UNSET, HKEY("feed")); - AddRSSStartHandler(RSS_item_item_start, RSS_RSS, HKEY("item")); - AddRSSStartHandler(ATOM_item_entry_start, RSS_ATOM, HKEY("entry")); - AddRSSStartHandler(ATOM_item_link_start, RSS_ATOM, HKEY("link")); - - AddRSSEndHandler(ATOMRSS_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); - AddRSSEndHandler(RSS_item_guid_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid")); - AddRSSEndHandler(ATOM_item_id_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id")); - AddRSSEndHandler(RSS_item_link_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("link")); -#if 0 -// hm, rss to the comments of that blog, might be interesting in future, but... - AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss")); -// comment count... - AddRSSEndHandler(RSS_item_relink_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments")); -#endif - AddRSSEndHandler(RSSATOM_item_title_end, RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title")); - AddRSSEndHandler(ATOM_item_content_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content")); - AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded")); - AddRSSEndHandler(ATOM_item_summary_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary")); - AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description")); - AddRSSEndHandler(ATOM_item_published_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published")); - AddRSSEndHandler(ATOM_item_updated_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated")); - AddRSSEndHandler(RSS_item_pubdate_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate")); - AddRSSEndHandler(RSS_item_date_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("date")); - AddRSSEndHandler(RSS_item_author_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("author")); - AddRSSEndHandler(RSS_item_creator_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator")); -/* */ - AddRSSEndHandler(ATOM_item_email_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email")); - AddRSSEndHandler(ATOM_item_name_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name")); - AddRSSEndHandler(ATOM_item_uri_end, RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri")); -/* */ - AddRSSEndHandler(RSS_item_item_end, RSS_RSS, HKEY("item")); - AddRSSEndHandler(RSS_item_rss_end, RSS_RSS, HKEY("rss")); - AddRSSEndHandler(RSS_item_rdf_end, RSS_RSS, HKEY("rdf")); - AddRSSEndHandler(ATOM_item_entry_end, RSS_ATOM, HKEY("entry")); - - -/* at the start of atoms:
  • link to resource
  • ignore them. */ - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("seq")); - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("li")); - -/* links to other feed generators... */ - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("feedflare")); - AddRSSStartHandler(RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); - AddRSSEndHandler (RSSATOM_item_ignore, RSS_RSS|RSS_ATOM, HKEY("browserfriendly")); - - KnownNameSpaces = NewHash(1, NULL); - Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler); - Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler); -#if 0 - /* we don't like these namespaces because of they shadow our usefull parameters. */ - Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler); -#endif return "rssclient"; }