X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Frss_atom_parser.c;h=84747d5d453de6d40ec27df136e67418af4ecaf3;hb=77c8e0da599c3aecc05f790d21558e4f2c61f4c9;hp=503de8e6c382abf2df9bac4674c034ca91e9f58f;hpb=7ed238dbbcd600038309afb183d3c31fd8283325;p=citadel.git diff --git a/citadel/modules/rssclient/rss_atom_parser.c b/citadel/modules/rssclient/rss_atom_parser.c index 503de8e6c..84747d5d4 100644 --- a/citadel/modules/rssclient/rss_atom_parser.c +++ b/citadel/modules/rssclient/rss_atom_parser.c @@ -58,6 +58,8 @@ #include "event_client.h" #include "rss_atom_parser.h" +extern pthread_mutex_t RSSQueueMutex; + HashList *StartHandlers = NULL; HashList *EndHandlers = NULL; HashList *KnownNameSpaces = NULL; @@ -135,15 +137,14 @@ void flush_rss_item(rss_item *ri) void rss_xml_start(void *data, const char *supplied_el, const char **attr) { rss_xml_handler *h; - rsscollection *rssc = (rsscollection*) data; - rssnetcfg *Cfg = rssc->Cfg; + rss_aggregator *rssc = (rss_aggregator*) data; rss_item *ri = rssc->Item; void *pv; const char *pel; char *sep = NULL; /* Axe the namespace, we don't care about it */ -/// CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el); +/// syslog(LOG_DEBUG, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el); pel = supplied_el; while (sep = strchr(pel, ':'), sep) { pel = sep + 1; @@ -159,7 +160,7 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) &v)) { #ifdef DEBUG_RSS - CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", + syslog(LOG_DEBUG, "RSS: START ignoring because of wrong namespace [%s]\n", supplied_el); #endif return; @@ -173,36 +174,35 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) rssc->Current = h = (rss_xml_handler*) pv; if (((h->Flags & RSS_UNSET) != 0) && - (Cfg->ItemType == RSS_UNSET)) + (rssc->ItemType == RSS_UNSET)) { - h->Handler(rssc->CData, ri, Cfg, attr); + h->Handler(rssc->CData, ri, rssc, attr); } else if (((h->Flags & RSS_RSS) != 0) && - (Cfg->ItemType == RSS_RSS)) + (rssc->ItemType == RSS_RSS)) { - h->Handler(rssc->CData, ri, Cfg, attr); + h->Handler(rssc->CData, ri, rssc, attr); } else if (((h->Flags & RSS_ATOM) != 0) && - (Cfg->ItemType == RSS_ATOM)) + (rssc->ItemType == RSS_ATOM)) { - h->Handler(rssc->CData, ri, Cfg, attr); + h->Handler(rssc->CData, ri, rssc, attr); } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); + syslog(LOG_DEBUG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); #endif } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); + syslog(LOG_DEBUG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el); #endif } void rss_xml_end(void *data, const char *supplied_el) { rss_xml_handler *h; - rsscollection *rssc = (rsscollection*) data; - rssnetcfg *Cfg = rssc->Cfg; + rss_aggregator *rssc = (rss_aggregator*) data; rss_item *ri = rssc->Item; const char *pel; char *sep = NULL; @@ -213,7 +213,7 @@ void rss_xml_end(void *data, const char *supplied_el) while (sep = strchr(pel, ':'), sep) { pel = sep + 1; } -// CtdlLogPrintf(0, "RSS: END %s...\n", el); +// syslog(LOG_DEBUG, "RSS: END %s...\n", el); if (pel != supplied_el) { void *v; @@ -224,7 +224,7 @@ void rss_xml_end(void *data, const char *supplied_el) &v)) { #ifdef DEBUG_RSS - CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", + syslog(LOG_DEBUG, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", supplied_el, ChrPtr(rssc->CData)); #endif FlushStrBuf(rssc->CData); @@ -239,28 +239,28 @@ void rss_xml_end(void *data, const char *supplied_el) h = (rss_xml_handler*) pv; if (((h->Flags & RSS_UNSET) != 0) && - (Cfg->ItemType == RSS_UNSET)) + (rssc->ItemType == RSS_UNSET)) { - h->Handler(rssc->CData, ri, Cfg, NULL); + h->Handler(rssc->CData, ri, rssc, NULL); } else if (((h->Flags & RSS_RSS) != 0) && - (Cfg->ItemType == RSS_RSS)) + (rssc->ItemType == RSS_RSS)) { - h->Handler(rssc->CData, ri, Cfg, NULL); + h->Handler(rssc->CData, ri, rssc, NULL); } else if (((h->Flags & RSS_ATOM) != 0) && - (Cfg->ItemType == RSS_ATOM)) + (rssc->ItemType == RSS_ATOM)) { - h->Handler(rssc->CData, ri, Cfg, NULL); + h->Handler(rssc->CData, ri, rssc, NULL); } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); + syslog(LOG_DEBUG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); #endif } #ifdef DEBUG_RSS else - CtdlLogPrintf(0, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); + syslog(LOG_DEBUG, "RSS: END unhandled: [%s] [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData)); #endif FlushStrBuf(rssc->CData); rssc->Current = NULL; @@ -270,39 +270,39 @@ void rss_xml_end(void *data, const char *supplied_el) -void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RSS feed.\n"); + syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n"); Cfg->ItemType = RSS_RSS; } -void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RDF feed.\n"); + syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n"); Cfg->ItemType = RSS_RSS; } -void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { - CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an ATOM feed.\n"); + syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n"); Cfg->ItemType = RSS_ATOM; } -void RSS_item_item_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_item_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { ri->item_tag_nesting ++; flush_rss_item(ri); } -void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { /* Atom feed... */ ri->item_tag_nesting ++; flush_rss_item(ri); } -void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { int i; const char *pHref = NULL; @@ -374,7 +374,7 @@ void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const ch -void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) { NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0); @@ -382,14 +382,14 @@ void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const c } } -void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0); } } -void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0); @@ -397,14 +397,14 @@ void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** } -void RSS_item_link_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_link_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0); StrBufTrim(ri->link); } } -void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0); @@ -412,7 +412,7 @@ void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char } } -void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0); @@ -420,7 +420,7 @@ void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const } } -void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { long olen = StrLength (ri->description); long clen = StrLength (CData); @@ -437,7 +437,7 @@ void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const c } } } -void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */ if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0)) @@ -447,7 +447,7 @@ void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const c } } -void RSS_item_description_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_description_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { long olen = StrLength (ri->description); long clen = StrLength (CData); @@ -465,7 +465,7 @@ void RSS_item_description_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, cons } } -void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { StrBufTrim(CData); @@ -473,7 +473,7 @@ void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const } } -void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { StrBufTrim(CData); @@ -481,7 +481,7 @@ void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const c } } -void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { StrBufTrim(CData); @@ -490,7 +490,7 @@ void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const ch } -void RSS_item_date_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_date_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { StrBufTrim(CData); @@ -500,7 +500,7 @@ void RSS_item_date_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char* -void RSS_item_author_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_author_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); @@ -509,7 +509,7 @@ void RSS_item_author_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char } -void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0); @@ -517,7 +517,7 @@ void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char* } } -void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0); @@ -525,7 +525,7 @@ void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char } } -void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if ((StrLength(CData) > 0) && (StrLength(ri->author_or_creator) == 0)) @@ -536,7 +536,7 @@ void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const cha } -void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { if (StrLength(CData) > 0) { NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0); @@ -544,33 +544,33 @@ void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** } } -void RSS_item_item_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_item_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { --ri->item_tag_nesting; - rss_save_item(ri); + rss_save_item(ri, Cfg); } -void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { --ri->item_tag_nesting; - rss_save_item(ri); + rss_save_item(ri, Cfg); } -void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { -// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); +// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n"); ri->done_parsing = 1; } -void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { -// CtdlLogPrintf(CTDL_DEBUG, "End of feed detected. Closing parser.\n"); +// syslog(LOG_DEBUG, "End of feed detected. Closing parser.\n"); ri->done_parsing = 1; } -void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr) +void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr) { } @@ -581,7 +581,7 @@ void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char */ void rss_xml_cdata_start(void *data) { - rsscollection *rssc = (rsscollection*) data; + rss_aggregator *rssc = (rss_aggregator*) data; FlushStrBuf(rssc->CData); } @@ -591,7 +591,7 @@ void rss_xml_cdata_end(void *data) } void rss_xml_chardata(void *data, const XML_Char *s, int len) { - rsscollection *rssc = (rsscollection*) data; + rss_aggregator *rssc = (rss_aggregator*) data; StrBufAppendBufPlain (rssc->CData, s, len, 0); } @@ -609,11 +609,23 @@ size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream) eNextState ParseRSSReply(AsyncIO *IO) { - rsscollection *rssc; + StrBuf *Buf; + rss_aggregator *rssc; rss_item *ri; const char *at; char *ptr; long len; + const char *Key; + + + if (IO->HttpReq.httpcode != 200) + { + + EV_syslog(LOG_DEBUG, "need a 200, got a %ld !\n", + IO->HttpReq.httpcode); +// TODO: aide error message with rate limit + return eAbort; + } rssc = IO->Data; ri = rssc->Item; @@ -639,14 +651,16 @@ eNextState ParseRSSReply(AsyncIO *IO) else ptr = "UTF-8"; + syslog(LOG_DEBUG, "RSS: Now parsing [%s] \n", ChrPtr(rssc->Url)); rssc->xp = XML_ParserCreateNS(ptr, ':'); if (!rssc->xp) { - CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n"); - goto shutdown; + syslog(LOG_DEBUG, "Cannot create XML parser!\n"); + return eAbort; } FlushStrBuf(rssc->Key); + rssc->Messages = NewHash(1, Flathash); XML_SetElementHandler(rssc->xp, rss_xml_start, rss_xml_end); XML_SetCharacterDataHandler(rssc->xp, rss_xml_chardata); XML_SetUserData(rssc->xp, rssc); @@ -663,24 +677,31 @@ eNextState ParseRSSReply(AsyncIO *IO) XML_Parse(rssc->xp, "", 0, 1); - CtdlLogPrintf(CTDL_ALERT, "RSS: XML Status [%s] \n", + syslog(LOG_DEBUG, "RSS: XML Status [%s] \n", XML_ErrorString( XML_GetErrorCode(rssc->xp))); -shutdown: XML_ParserFree(rssc->xp); - flush_rss_item(ri); FreeStrBuf(&rssc->CData); FreeStrBuf(&rssc->Key); - ///Cfg->next_poll = time(NULL) + config.c_net_freq; + Buf = NewStrBufDup(rssc->rooms); + rssc->recp.recp_room = SmashStrBuf(&Buf); + rssc->recp.num_room = rssc->roomlist_parts; + rssc->recp.recptypes_magic = RECPTYPES_MAGIC; + + rssc->Pos = GetNewHashPos(rssc->Messages, 1); - return eTerminateConnection; + ///Cfg->next_poll = time(NULL) + config.c_net_freq; + if (GetNextHashPos(rssc->Messages, rssc->Pos, &len, &Key, (void**) &rssc->ThisMsg)) + return QueueDBOperation(IO, RSS_FetchNetworkUsetableEntry); + else + return eAbort; } -void rss_cleanup(void) +void rss_parser_cleanup(void) { DeleteHash(&StartHandlers); DeleteHash(&EndHandlers); @@ -774,7 +795,7 @@ CTDL_MODULE_INIT(rssparser) /* we don't like these namespaces because of they shadow our usefull parameters. */ Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler); #endif - CtdlRegisterCleanupHook(rss_cleanup); + CtdlRegisterCleanupHook(rss_parser_cleanup); } return "rssparser"; }