X-Git-Url: https://code.citadel.org/?a=blobdiff_plain;f=citadel%2Fmodules%2Frssclient%2Fserv_rssclient.c;h=72ce11ed280f8164cdaa538ce73b0bf6994d7529;hb=684e3a06c4d2ab35e39760f328a0d354ad330622;hp=909587d6284f21d50852117f38870c56317196b4;hpb=58f686487cf5f14d5da5357c67f2e6624dbde027;p=citadel.git diff --git a/citadel/modules/rssclient/serv_rssclient.c b/citadel/modules/rssclient/serv_rssclient.c index 909587d62..72ce11ed2 100644 --- a/citadel/modules/rssclient/serv_rssclient.c +++ b/citadel/modules/rssclient/serv_rssclient.c @@ -48,7 +48,7 @@ #include "citadel_dirs.h" #include "md5.h" #include "context.h" -#include "rss_atom_parser.h" +#include "internet_addressing.h" struct rssroom { struct rssroom *next; @@ -61,12 +61,247 @@ struct rssurl { struct rssroom *rooms; }; +struct rssparser { + StrBuf *CData; + struct CtdlMessage *msg; + char *link; + char *description; + char *item_id; + struct rssroom *rooms; +}; time_t last_run = 0L; struct CitContext rss_CC; struct rssurl *rsstodo = NULL; +// This handler is called whenever an XML tag opens. +// +void rss_start_element(void *data, const char *el, const char **attribute) +{ + struct rssparser *r = (struct rssparser *)data; + int i; + + if ( + (!strcasecmp(el, "entry")) + || (!strcasecmp(el, "item")) + ) { + // this is the start of a new item(rss) or entry(atom) + if (r->msg != NULL) { + CM_Free(r->msg); + r->msg = NULL; + } + r->msg = malloc(sizeof(struct CtdlMessage)); + memset(r->msg, 0, sizeof(struct CtdlMessage)); + r->msg->cm_magic = CTDLMESSAGE_MAGIC; + r->msg->cm_anon_type = MES_NORMAL; + r->msg->cm_format_type = FMT_RFC822; + } + + else if (!strcasecmp(el, "link")) { // atom feeds have the link as an attribute + for(i = 0; attribute[i]; i += 2) { + if (!strcasecmp(attribute[i], "href")) { + if (r->link != NULL) { + free(r->link); + r->link = NULL; + } + r->link = strdup(attribute[i+1]); + striplt(r->link); + } + } + } +} + + +// This handler is called whenever an XML tag closes. +// +void rss_end_element(void *data, const char *el) +{ + struct rssparser *r = (struct rssparser *)data; + + if ( // end of a new item(rss) or entry(atom) + (!strcasecmp(el, "entry")) + || (!strcasecmp(el, "item")) + ) { + + if (r->msg != NULL) { // Save the message to the rooms + + // use the link as an item id if nothing else is available + if ((r->item_id == NULL) && (r->link != NULL)) { + r->item_id = strdup(r->link); + } + + // check the use table + StrBuf *u = NewStrBuf(); + StrBufAppendPrintf(u, "rss/%s", r->item_id); + int already_seen = CheckIfAlreadySeen(u); + FreeStrBuf(&u); + + if (already_seen == 0) { + + // Compose the message text + StrBuf *TheMessage = NewStrBuf(); + StrBufAppendPrintf(TheMessage, + "Content-type: text/html\n\n" + "\n\n" + "" + ); + + if (r->description != NULL) { + StrBufAppendPrintf(TheMessage, "%s

\r\n", r->description); + free(r->description); + r->description = NULL; + } + + if (r->link != NULL) { + StrBufAppendPrintf(TheMessage, "%s\r\n", r->link, r->link); + free(r->link); + r->link = NULL; + } + + StrBufAppendPrintf(TheMessage, "\r\n"); + CM_SetField(r->msg, eMesageText, ChrPtr(TheMessage), StrLength(TheMessage)); + FreeStrBuf(&TheMessage); + + if (CM_IsEmpty(r->msg, eAuthor)) { + CM_SetField(r->msg, eAuthor, HKEY("rss")); + } + + if (CM_IsEmpty(r->msg, eTimestamp)) { + CM_SetFieldLONG(r->msg, eTimestamp, time(NULL)); + } + + // Save it to the room(s) + struct rssroom *rr = NULL; + long msgnum = (-1); + for (rr=r->rooms; rr!=NULL; rr=rr->next) { + if (rr == r->rooms) { + msgnum = CtdlSubmitMsg(r->msg, NULL, rr->room, 0); + } + else { + CtdlSaveMsgPointerInRoom(rr->room, msgnum, 0, NULL); + } + syslog(LOG_DEBUG, "Saved message %ld to %s", msgnum, rr->room); + } + } + else { + syslog(LOG_DEBUG, "%s was already seen", r->item_id); + } + + CM_Free(r->msg); + r->msg = NULL; + } + + if (r->item_id != NULL) { + free(r->item_id); + r->item_id = NULL; + } + } + + else if (!strcasecmp(el, "title")) { // item subject (rss and atom) + if ((r->msg != NULL) && (CM_IsEmpty(r->msg, eMsgSubject))) { + CM_SetField(r->msg, eMsgSubject, ChrPtr(r->CData), StrLength(r->CData)); + striplt(r->msg->cm_fields[eMsgSubject]); + } + } + + else if (!strcasecmp(el, "author")) { // author of item (rss and maybe atom) + if ((r->msg != NULL) && (CM_IsEmpty(r->msg, eAuthor))) { + CM_SetField(r->msg, eAuthor, ChrPtr(r->CData), StrLength(r->CData)); + striplt(r->msg->cm_fields[eAuthor]); + } + } + + else if (!strcasecmp(el, "pubdate")) { // date/time stamp (rss) Sat, 25 Feb 2017 14:28:01 EST + if ((r->msg)&&(r->msg->cm_fields[eTimestamp]==NULL)) { + CM_SetFieldLONG(r->msg, eTimestamp, parsedate(ChrPtr(r->CData))); + } + } + + else if (!strcasecmp(el, "updated")) { // date/time stamp (atom) 2003-12-13T18:30:02Z + if ((r->msg)&&(r->msg->cm_fields[eTimestamp]==NULL)) { + struct tm t; + char zulu; + memset(&t, 0, sizeof t); + sscanf(ChrPtr(r->CData), "%d-%d-%dT%d:%d:%d%c", &t.tm_year, &t.tm_mon, &t.tm_mday, &t.tm_hour, &t.tm_min, &t.tm_sec, &zulu); + t.tm_year -= 1900; + t.tm_mon -= 1; + CM_SetFieldLONG(r->msg, eTimestamp, mktime(&t)); + } + } + + else if (!strcasecmp(el, "link")) { // link to story (rss) + if (r->link != NULL) { + free(r->link); + r->link = NULL; + } + r->link = strdup(ChrPtr(r->CData)); + striplt(r->link); + } + + else if ( + (!strcasecmp(el, "guid")) // unique item id (rss) + || (!strcasecmp(el, "id")) // unique item id (atom) + ) { + if (r->item_id != NULL) { + free(r->item_id); + r->item_id = NULL; + } + r->item_id = strdup(ChrPtr(r->CData)); + striplt(r->item_id); + } + + else if ( + (!strcasecmp(el, "description")) // message text (rss) + || (!strcasecmp(el, "summary")) // message text (atom) + || (!strcasecmp(el, "content")) // message text (atom) + ) { + if (r->description != NULL) { + free(r->description); + r->description = NULL; + } + r->description = strdup(ChrPtr(r->CData)); + striplt(r->description); + } + + if (r->CData != NULL) { + FreeStrBuf(&r->CData); + r->CData = NULL; + } +} + + +// This handler is called whenever data appears between opening and closing tags. +// +void rss_handle_data(void *data, const char *content, int length) +{ + struct rssparser *r = (struct rssparser *)data; + + if (r->CData == NULL) { + r->CData = NewStrBuf(); + } + + StrBufAppendBufPlain(r->CData, content, length, 0); +} + + +// Feed has been downloaded, now parse it. +// +void rss_parse_feed(StrBuf *Feed, struct rssroom *rooms) +{ + struct rssparser r; + + memset(&r, 0, sizeof r); + r.rooms = rooms; + XML_Parser p = XML_ParserCreate("UTF-8"); + XML_SetElementHandler(p, rss_start_element, rss_end_element); + XML_SetCharacterDataHandler(p, rss_handle_data); + XML_SetUserData(p, (void *)&r); + XML_Parse(p, ChrPtr(Feed), StrLength(Feed), XML_TRUE); + XML_ParserFree(p); +} + + // Add a feed/room pair into the todo list // void rssclient_push_todo(char *rssurl, char *roomname) @@ -97,22 +332,10 @@ void rssclient_push_todo(char *rssurl, char *roomname) } -// Callback function for curl -// -size_t rss_pof_write_data(void *buffer, size_t size, size_t nmemb, void *userp) -{ - StrBuf *Downloaded = (StrBuf *)userp; - size_t bytes = size * nmemb; - StrBufAppendBufPlain(Downloaded, buffer, bytes, 0); - return(bytes); -} - - // pull one feed (possibly multiple rooms) // void rss_pull_one_feed(struct rssurl *url) { - struct rssroom *r; CURL *curl; CURLcode res; StrBuf *Downloaded = NULL; @@ -127,8 +350,10 @@ void rss_pull_one_feed(struct rssurl *url) Downloaded = NewStrBuf(); curl_easy_setopt(curl, CURLOPT_URL, url->url); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); // Follow redirects - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_pof_write_data); // What to do with downloaded data + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlFillStrBuf_callback); // What to do with downloaded data curl_easy_setopt(curl, CURLOPT_WRITEDATA, Downloaded); // Give it our StrBuf to work with curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L); // Time out after 20 seconds res = curl_easy_perform(curl); // Perform the request @@ -137,14 +362,8 @@ void rss_pull_one_feed(struct rssurl *url) } curl_easy_cleanup(curl); - // FIXME parse the feed, dummeh ... it's in ChrPtr(Downloaded) - - for (r=url->rooms; r!=NULL; r=r->next) { - syslog(LOG_DEBUG, "Saving item to %s", r->room); - // FIXME save to rooms - } - - FreeStrBuf(&Downloaded); + rss_parse_feed(Downloaded, url->rooms); // parse the feed + FreeStrBuf(&Downloaded); // free the downloaded feed data } @@ -211,9 +430,9 @@ void rssclient_scan(void) { /* Run no more than once every 15 minutes. */ if ((now - last_run) < 900) { syslog(LOG_DEBUG, - "Client: polling interval not yet reached; last run was %ldm%lds ago", - ((now - last_run) / 60), - ((now - last_run) % 60) + "Client: polling interval not yet reached; last run was %ldm%lds ago", + ((now - last_run) / 60), + ((now - last_run) % 60) ); return; }