2 * Bring external RSS feeds into rooms.
4 * Copyright (c) 2007-2017 by the citadel.org team
6 * This program is open source software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 3.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
12 * GNU General Public License for more details.
19 #if TIME_WITH_SYS_TIME
20 # include <sys/time.h>
33 #include <sys/types.h>
36 #include <curl/curl.h>
37 #include <libcitadel.h>
40 #include "citserver.h"
44 #include "ctdl_module.h"
46 #include "parsedate.h"
48 #include "citadel_dirs.h"
60 struct rssroom *rooms;
65 struct CtdlMessage *msg;
68 struct rssroom *rooms;
72 struct CitContext rss_CC;
73 struct rssurl *rsstodo = NULL;
76 // This is what RSS probably looks like
79 // <title><![CDATA[Felicity flexes action chops]]></title>
80 // <link>http://video.foxnews.com/v/5336254459001/</link>
81 // <author>foxnewsonline@foxnews.com (Fox News Online)</author>
83 // <pubDate>Sat, 25 Feb 2017 14:28:01 EST</pubDate>
87 // This handler is called whenever an XML tag opens.
89 void rss_start_element(void *data, const char *el, const char **attribute)
91 struct rssparser *r = (struct rssparser *)data;
95 (!strcasecmp(el, "entry"))
96 || (!strcasecmp(el, "item"))
98 // this is the start of a new item(rss) or entry(atom)
103 r->msg = malloc(sizeof(struct CtdlMessage));
104 memset(r->msg, 0, sizeof(struct CtdlMessage));
105 r->msg->cm_magic = CTDLMESSAGE_MAGIC;
106 r->msg->cm_anon_type = MES_NORMAL;
107 r->msg->cm_format_type = FMT_RFC822;
110 else if (!strcasecmp(el, "link")) { // atom feeds have the link as an attribute
111 for(i = 0; attribute[i]; i += 2) {
112 if (!strcasecmp(attribute[i], "href")) {
113 if (r->link != NULL) {
117 r->link = strdup(attribute[i+1]);
125 // This handler is called whenever an XML tag closes.
127 void rss_end_element(void *data, const char *el)
129 struct rssparser *r = (struct rssparser *)data;
131 if ( // end of a new item(rss) or entry(atom)
132 (!strcasecmp(el, "entry"))
133 || (!strcasecmp(el, "item"))
136 if (r->msg != NULL) { // Save the message to the rooms
138 // FIXME check the use table
140 StrBuf *TheMessage = NewStrBuf();
141 StrBufAppendPrintf(TheMessage,
142 "Content-type: text/html\n\n"
144 "<html><head></head><body>"
147 if (r->description != NULL) {
148 StrBufAppendPrintf(TheMessage, "%s<br><br>\r\n", r->description);
149 free(r->description);
150 r->description = NULL;
153 if (r->link != NULL) {
154 StrBufAppendPrintf(TheMessage, "<a href=\"%s\">%s</a>\r\n", r->link, r->link);
159 StrBufAppendPrintf(TheMessage, "</body></html>\r\n");
161 syslog(LOG_DEBUG, "------------------\n%s\n------------------", ChrPtr(TheMessage));
162 FreeStrBuf(&TheMessage);
167 for (rr=r->rooms; rr!=NULL; rr=rr->next) {
168 syslog(LOG_DEBUG, "Saving item %s to %s", r->link, rr->room);
178 else if (!strcasecmp(el, "title")) { // item subject (rss and atom)
179 if ((r->msg != NULL) && (r->msg->cm_fields[eMsgSubject] == NULL)) {
180 r->msg->cm_fields[eMsgSubject] = strdup(ChrPtr(r->CData));
181 striplt(r->msg->cm_fields[eMsgSubject]);
185 else if (!strcasecmp(el, "author")) { // author of item (rss and maybe atom)
186 if ((r->msg != NULL) && (r->msg->cm_fields[eAuthor] == NULL)) {
187 r->msg->cm_fields[eAuthor] = strdup(ChrPtr(r->CData));
188 striplt(r->msg->cm_fields[eAuthor]);
192 else if (!strcasecmp(el, "pubdate")) { // date/time stamp (rss) Sat, 25 Feb 2017 14:28:01 EST
196 else if (!strcasecmp(el, "updated")) { // date/time stamp (atom) 2003-12-13T18:30:02Z
200 else if (!strcasecmp(el, "link")) { // link to story (rss)
201 if (r->link != NULL) {
205 r->link = strdup(ChrPtr(r->CData));
210 (!strcasecmp(el, "description")) // message text (rss)
211 || (!strcasecmp(el, "summary")) // message text (atom)
213 if (r->description != NULL) {
214 free(r->description);
215 r->description = NULL;
217 r->description = strdup(ChrPtr(r->CData));
218 striplt(r->description);
221 if (r->CData != NULL) {
222 FreeStrBuf(&r->CData);
228 // This handler is called whenever data appears between opening and closing tags.
230 void rss_handle_data(void *data, const char *content, int length)
232 struct rssparser *r = (struct rssparser *)data;
234 if (r->CData == NULL) {
235 r->CData = NewStrBuf();
238 StrBufAppendBufPlain(r->CData, content, length, 0);
242 // Feed has been downloaded, now parse it.
244 void rss_parse_feed(StrBuf *Feed, struct rssroom *rooms)
248 memset(&r, 0, sizeof r);
250 XML_Parser p = XML_ParserCreateNS("UTF-8", ':');
251 XML_SetElementHandler(p, rss_start_element, rss_end_element);
252 XML_SetCharacterDataHandler(p, rss_handle_data);
253 XML_SetUserData(p, (void *)&r);
254 XML_Parse(p, ChrPtr(Feed), StrLength(Feed), XML_TRUE);
259 // Add a feed/room pair into the todo list
261 void rssclient_push_todo(char *rssurl, char *roomname)
263 struct rssurl *r = NULL;
264 struct rssurl *thisone = NULL;
265 struct rssroom *newroom = NULL;
267 syslog(LOG_DEBUG, "rssclient_push_todo(%s, %s)", rssurl, roomname);
269 for (r=rsstodo; r!=NULL; r=r->next) {
270 if (!strcasecmp(r->url, rssurl)) {
274 if (thisone == NULL) {
275 thisone = malloc(sizeof(struct rssurl));
276 thisone->url = strdup(rssurl);
277 thisone->rooms = NULL;
278 thisone->next = rsstodo;
282 newroom = malloc(sizeof(struct rssroom));
283 newroom->room = strdup(roomname);
284 newroom->next = thisone->rooms;
285 thisone->rooms = newroom;
289 // Callback function for curl
291 size_t rss_pof_write_data(void *buffer, size_t size, size_t nmemb, void *userp)
293 StrBuf *Downloaded = (StrBuf *)userp;
294 size_t bytes = size * nmemb;
295 StrBufAppendBufPlain(Downloaded, buffer, bytes, 0);
300 // pull one feed (possibly multiple rooms)
302 void rss_pull_one_feed(struct rssurl *url)
306 StrBuf *Downloaded = NULL;
308 syslog(LOG_DEBUG, "rss_pull_one_feed(%s)", url->url);
310 curl = curl_easy_init();
315 Downloaded = NewStrBuf();
317 curl_easy_setopt(curl, CURLOPT_URL, url->url);
318 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); // Follow redirects
319 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_pof_write_data); // What to do with downloaded data
320 curl_easy_setopt(curl, CURLOPT_WRITEDATA, Downloaded); // Give it our StrBuf to work with
321 curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L); // Time out after 20 seconds
322 res = curl_easy_perform(curl); // Perform the request
323 if (res != CURLE_OK) {
324 syslog(LOG_WARNING, "Failed to load feed: %s", curl_easy_strerror(res));
326 curl_easy_cleanup(curl);
328 rss_parse_feed(Downloaded, url->rooms); // parse the feed
329 FreeStrBuf(&Downloaded); // free the downloaded feed data
333 // We have a list, now download the feeds
335 void rss_pull_feeds(void)
340 while (rsstodo != NULL) {
341 rss_pull_one_feed(rsstodo);
343 rsstodo = rsstodo->next;
344 while (r->rooms != NULL) {
346 r->rooms = r->rooms->next;
356 // Scan a room's netconfig looking for RSS feed parsing requests
358 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
360 char *serialized_config = NULL;
365 serialized_config = LoadRoomNetConfigFile(qrbuf->QRnumber);
366 if (!serialized_config) {
370 num_configs = num_tokens(serialized_config, '\n');
371 for (i=0; i<num_configs; ++i) {
372 extract_token(cfgline, serialized_config, i, '\n', sizeof cfgline);
373 if (!strncasecmp(cfgline, HKEY("rssclient|"))) {
374 strcpy(cfgline, &cfgline[10]);
375 char *vbar = strchr(cfgline, '|');
379 rssclient_push_todo(cfgline, qrbuf->QRname);
383 free(serialized_config);
388 * Scan for rooms that have RSS client requests configured
390 void rssclient_scan(void) {
391 time_t now = time(NULL);
393 /* Run no more than once every 15 minutes. */
394 if ((now - last_run) < 900) {
396 "Client: polling interval not yet reached; last run was %ldm%lds ago",
397 ((now - last_run) / 60),
398 ((now - last_run) % 60)
403 become_session(&rss_CC);
404 syslog(LOG_DEBUG, "rssclient started");
405 CtdlForEachRoom(rssclient_scan_room, NULL);
407 syslog(LOG_DEBUG, "rssclient ended");
408 last_run = time(NULL);
413 CTDL_MODULE_INIT(rssclient)
417 syslog(LOG_INFO, "%s", curl_version());
418 CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300);
422 CtdlFillSystemContext(&rss_CC, "rssclient");