3435eb4dd213aa42b598195113db1697e80e49a7
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
1 /*
2  * Bring external RSS feeds into rooms.
3  *
4  * Copyright (c) 2007-2017 by the citadel.org team
5  *
6  * This program is open source software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 3.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
12  * GNU General Public License for more details.
13  */
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <stdio.h>
18
19 #if TIME_WITH_SYS_TIME
20 # include <sys/time.h>
21 # include <time.h>
22 #else
23 # if HAVE_SYS_TIME_H
24 #include <sys/time.h>
25 # else
26 #include <time.h>
27 # endif
28 #endif
29
30 #include <ctype.h>
31 #include <string.h>
32 #include <errno.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <expat.h>
36 #include <curl/curl.h>
37 #include <libcitadel.h>
38 #include "citadel.h"
39 #include "server.h"
40 #include "citserver.h"
41 #include "support.h"
42 #include "config.h"
43 #include "threads.h"
44 #include "ctdl_module.h"
45 #include "msgbase.h"
46 #include "parsedate.h"
47 #include "database.h"
48 #include "citadel_dirs.h"
49 #include "md5.h"
50 #include "context.h"
51
52 struct rssroom {
53         struct rssroom *next;
54         char *room;
55 };
56
57 struct rssurl {
58         struct rssurl *next;
59         char *url;
60         struct rssroom *rooms;
61 };
62
63 struct rssparser {
64         StrBuf *CData;
65         struct CtdlMessage *msg;
66 };
67
68 time_t last_run = 0L;
69 struct CitContext rss_CC;
70 struct rssurl *rsstodo = NULL;
71
72
73 // This is what RSS probably looks like
74 //
75 //      <item>
76 //              <title><![CDATA[Felicity flexes action chops]]></title>
77 //              <link>http://video.foxnews.com/v/5336254459001/</link>
78 //              <author>foxnewsonline@foxnews.com (Fox News Online)</author>
79 //              <description />
80 //              <pubDate>Sat, 25 Feb 2017 14:28:01 EST</pubDate>
81 //      </item>
82
83
84 // This handler is called whenever an XML tag opens.
85 //
86 void rss_start_element(void *data, const char *el, const char **attribute)
87 {
88         struct rssparser *r = (struct rssparser *)data;
89
90         if (
91                 (!strcasecmp(el, "entry"))
92                 || (!strcasecmp(el, "item"))
93         ) {
94                 // this is the start of a new item(rss) or entry(atom)
95                 if (r->msg != NULL) {
96                         CM_Free(r->msg);
97                         r->msg = NULL;
98                 }
99                 r->msg = malloc(sizeof(struct CtdlMessage));
100                 memset(r->msg, 0, sizeof(struct CtdlMessage));
101                 r->msg->cm_magic = CTDLMESSAGE_MAGIC;
102                 r->msg->cm_anon_type = MES_NORMAL;
103                 r->msg->cm_format_type = FMT_RFC822;
104         }
105 }
106
107
108 // This handler is called whenever an XML tag closes.
109 //
110 void rss_end_element(void *data, const char *el)
111 {
112         struct rssparser *r = (struct rssparser *)data;
113
114         if (                                                    // end of a new item(rss) or entry(atom)
115                 (!strcasecmp(el, "entry"))
116                 || (!strcasecmp(el, "item"))
117         ) {
118
119                 // FIXME check the use table
120
121                 if (r->msg != NULL) {
122                         // FIXME WRITE IT TO THE ROOMS HERE, DUMMEH
123                         CM_Free(r->msg);
124                         r->msg = NULL;
125                 }
126         }
127
128         else if (!strcasecmp(el, "title")) {                    // item subject (rss and atom)
129                 if ((r->msg != NULL) && (r->msg->cm_fields[eMsgSubject] == NULL)) {
130                         r->msg->cm_fields[eMsgSubject] = strdup(ChrPtr(r->CData));
131                 }
132         }
133
134         else if (!strcasecmp(el, "author")) {                   // author of item (rss and maybe atom)
135                 if ((r->msg != NULL) && (r->msg->cm_fields[eAuthor] == NULL)) {
136                         r->msg->cm_fields[eAuthor] = strdup(ChrPtr(r->CData));
137                 }
138         }
139
140         else if (!strcasecmp(el, "pubdate")) {                  // date/time stamp (rss) Sat, 25 Feb 2017 14:28:01 EST
141                 // FIXME parse it
142         }
143
144         else if (!strcasecmp(el, "updated")) {                  // date/time stamp (atom) 2003-12-13T18:30:02Z
145                 // FIXME parse it
146         }
147
148         if (r->CData != NULL) {
149                 FreeStrBuf(&r->CData);
150                 r->CData = NULL;
151         }
152 }
153
154
155 // This handler is called whenever data appears between opening and closing tags.
156 //
157 void rss_handle_data(void *data, const char *content, int length)
158 {
159         struct rssparser *r = (struct rssparser *)data;
160
161         if (r->CData == NULL) {
162                 r->CData = NewStrBuf();
163         }
164
165         StrBufAppendBufPlain(r->CData, content, length, 0);
166 }
167
168
169 // Feed has been downloaded, now parse it.
170 //
171 void rss_parse_feed(StrBuf *Feed)
172 {
173         struct rssparser r;
174
175         memset(&r, 0, sizeof r);
176         XML_Parser p = XML_ParserCreateNS("UTF-8", ':');
177         XML_SetElementHandler(p, rss_start_element, rss_end_element);
178         XML_SetCharacterDataHandler(p, rss_handle_data);
179         XML_SetUserData(p, (void *)&r);
180         XML_Parse(p, ChrPtr(Feed), StrLength(Feed), XML_TRUE);
181         XML_ParserFree(p);
182 }
183
184
185 // Add a feed/room pair into the todo list
186 //
187 void rssclient_push_todo(char *rssurl, char *roomname)
188 {
189         struct rssurl *r = NULL;
190         struct rssurl *thisone = NULL;
191         struct rssroom *newroom = NULL;
192
193         syslog(LOG_DEBUG, "rssclient_push_todo(%s, %s)", rssurl, roomname);
194
195         for (r=rsstodo; r!=NULL; r=r->next) {
196                 if (!strcasecmp(r->url, rssurl)) {
197                         thisone = r;
198                 }
199         }
200         if (thisone == NULL) {
201                 thisone = malloc(sizeof(struct rssurl));
202                 thisone->url = strdup(rssurl);
203                 thisone->rooms = NULL;
204                 thisone->next = rsstodo;
205                 rsstodo = thisone;
206         }
207
208         newroom = malloc(sizeof(struct rssroom));
209         newroom->room = strdup(roomname);
210         newroom->next = thisone->rooms;
211         thisone->rooms = newroom;
212 }
213
214
215 // Callback function for curl
216 //
217 size_t rss_pof_write_data(void *buffer, size_t size, size_t nmemb, void *userp)
218 {
219         StrBuf *Downloaded = (StrBuf *)userp;
220         size_t bytes = size * nmemb;
221         StrBufAppendBufPlain(Downloaded, buffer, bytes, 0);
222         return(bytes);
223 }
224
225
226 // pull one feed (possibly multiple rooms)
227 //
228 void rss_pull_one_feed(struct rssurl *url)
229 {
230         struct rssroom *r;
231         CURL *curl;
232         CURLcode res;
233         StrBuf *Downloaded = NULL;
234
235         syslog(LOG_DEBUG, "rss_pull_one_feed(%s)", url->url);
236
237         curl = curl_easy_init();
238         if (!curl) {
239                 return;
240         }
241
242         Downloaded = NewStrBuf();
243
244         curl_easy_setopt(curl, CURLOPT_URL, url->url);
245         curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);                     // Follow redirects
246         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_pof_write_data);      // What to do with downloaded data
247         curl_easy_setopt(curl, CURLOPT_WRITEDATA, Downloaded);                  // Give it our StrBuf to work with
248         curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L);                           // Time out after 20 seconds
249         res = curl_easy_perform(curl);                                          // Perform the request
250         if (res != CURLE_OK) {
251                 syslog(LOG_WARNING, "Failed to load feed: %s", curl_easy_strerror(res));
252         }
253         curl_easy_cleanup(curl);
254
255         rss_parse_feed(Downloaded);                                             // parse the feed
256
257         for (r=url->rooms; r!=NULL; r=r->next) {                                // we might move this somewhere else
258                 syslog(LOG_DEBUG, "Saving item to %s", r->room);
259                 // FIXME save to rooms
260         }
261
262         FreeStrBuf(&Downloaded);
263 }
264
265
266 // We have a list, now download the feeds
267 //
268 void rss_pull_feeds(void)
269 {
270         struct rssurl *r;
271         struct rssroom *rr;
272
273         while (rsstodo != NULL) {
274                 rss_pull_one_feed(rsstodo);
275                 r = rsstodo;
276                 rsstodo = rsstodo->next;
277                 while (r->rooms != NULL) {
278                         rr = r->rooms;
279                         r->rooms = r->rooms->next;
280                         free(rr->room);
281                         free(rr);
282                 }
283                 free(r->url);
284                 free(r);
285         }
286 }
287
288
289 // Scan a room's netconfig looking for RSS feed parsing requests
290 //
291 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
292 {
293         char *serialized_config = NULL;
294         int num_configs = 0;
295         char cfgline[SIZ];
296         int i = 0;
297
298         serialized_config = LoadRoomNetConfigFile(qrbuf->QRnumber);
299         if (!serialized_config) {
300                 return;
301         }
302
303         num_configs = num_tokens(serialized_config, '\n');
304         for (i=0; i<num_configs; ++i) {
305                 extract_token(cfgline, serialized_config, i, '\n', sizeof cfgline);
306                 if (!strncasecmp(cfgline, HKEY("rssclient|"))) {
307                         strcpy(cfgline, &cfgline[10]);
308                         char *vbar = strchr(cfgline, '|');
309                         if (vbar != NULL) {
310                                 *vbar = 0;
311                         }
312                         rssclient_push_todo(cfgline, qrbuf->QRname);
313                 }
314         }
315
316         free(serialized_config);
317 }
318
319
320 /*
321  * Scan for rooms that have RSS client requests configured
322  */
323 void rssclient_scan(void) {
324         time_t now = time(NULL);
325
326         /* Run no more than once every 15 minutes. */
327         if ((now - last_run) < 900) {
328                 syslog(LOG_DEBUG,
329                               "Client: polling interval not yet reached; last run was %ldm%lds ago",
330                               ((now - last_run) / 60),
331                               ((now - last_run) % 60)
332                 );
333                 return;
334         }
335
336         become_session(&rss_CC);
337         syslog(LOG_DEBUG, "rssclient started");
338         CtdlForEachRoom(rssclient_scan_room, NULL);
339         rss_pull_feeds();
340         syslog(LOG_DEBUG, "rssclient ended");
341         last_run = time(NULL);
342         return;
343 }
344
345
346 CTDL_MODULE_INIT(rssclient)
347 {
348         if (!threading)
349         {
350                 syslog(LOG_INFO, "%s", curl_version());
351                 CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300);
352         }
353         else
354         {
355                 CtdlFillSystemContext(&rss_CC, "rssclient");
356         }
357         return "rssclient";
358 }
359