Move back to single threaded structure for rss feed puller
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
1 /*
2  * Bring external RSS feeds into rooms.
3  *
4  * Copyright (c) 2007-2017 by the citadel.org team
5  *
6  * This program is open source software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 3.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
12  * GNU General Public License for more details.
13  */
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <stdio.h>
18
19 #if TIME_WITH_SYS_TIME
20 # include <sys/time.h>
21 # include <time.h>
22 #else
23 # if HAVE_SYS_TIME_H
24 #include <sys/time.h>
25 # else
26 #include <time.h>
27 # endif
28 #endif
29
30 #include <ctype.h>
31 #include <string.h>
32 #include <errno.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <expat.h>
36 #include <curl/curl.h>
37 #include <libcitadel.h>
38 #include "citadel.h"
39 #include "server.h"
40 #include "citserver.h"
41 #include "support.h"
42 #include "config.h"
43 #include "threads.h"
44 #include "ctdl_module.h"
45 #include "msgbase.h"
46 #include "parsedate.h"
47 #include "database.h"
48 #include "citadel_dirs.h"
49 #include "md5.h"
50 #include "context.h"
51 #include "rss_atom_parser.h"
52
53 struct rssroom {
54         struct rssroom *next;
55         char *room;
56 };
57
58 struct rssurl {
59         struct rssurl *next;
60         char *url;
61         struct rssroom *rooms;
62 };
63
64
65 time_t last_run = 0L;
66 struct CitContext rss_CC;
67 struct rssurl *rsstodo = NULL;
68
69
70 // Add a feed/room pair into the todo list
71 //
72 void rssclient_push_todo(char *rssurl, char *roomname)
73 {
74         struct rssurl *r = NULL;
75         struct rssurl *thisone = NULL;
76         struct rssroom *newroom = NULL;
77
78         syslog(LOG_DEBUG, "rssclient_push_todo(%s, %s)", rssurl, roomname);
79
80         for (r=rsstodo; r!=NULL; r=r->next) {
81                 if (!strcasecmp(r->url, rssurl)) {
82                         thisone = r;
83                 }
84         }
85         if (thisone == NULL) {
86                 thisone = malloc(sizeof(struct rssurl));
87                 thisone->url = strdup(rssurl);
88                 thisone->rooms = NULL;
89                 thisone->next = rsstodo;
90                 rsstodo = thisone;
91         }
92
93         newroom = malloc(sizeof(struct rssroom));
94         newroom->room = strdup(roomname);
95         newroom->next = thisone->rooms;
96         thisone->rooms = newroom;
97 }
98
99
100 // Callback function for curl
101 //
102 size_t rss_pof_write_data(void *buffer, size_t size, size_t nmemb, void *userp)
103 {
104         StrBuf *Downloaded = (StrBuf *)userp;
105         size_t bytes = size * nmemb;
106         StrBufAppendBufPlain(Downloaded, buffer, bytes, 0);
107         return(bytes);
108 }
109
110
111 // pull one feed (possibly multiple rooms)
112 //
113 void rss_pull_one_feed(struct rssurl *url)
114 {
115         struct rssroom *r;
116         CURL *curl;
117         CURLcode res;
118         StrBuf *Downloaded = NULL;
119
120         syslog(LOG_DEBUG, "rss_pull_one_feed(%s)", url->url);
121
122         curl = curl_easy_init();
123         if (!curl) {
124                 return;
125         }
126
127         Downloaded = NewStrBuf();
128
129         curl_easy_setopt(curl, CURLOPT_URL, url->url);
130         curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);                     // Follow redirects
131         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_pof_write_data);      // What to do with downloaded data
132         curl_easy_setopt(curl, CURLOPT_WRITEDATA, Downloaded);                  // Give it our StrBuf to work with
133         curl_easy_setopt(curl, CURLOPT_TIMEOUT, 20L);                           // Time out after 20 seconds
134         res = curl_easy_perform(curl);                                          // Perform the request
135         if (res != CURLE_OK) {
136                 syslog(LOG_WARNING, "Failed to load feed: %s", curl_easy_strerror(res));
137         }
138         curl_easy_cleanup(curl);
139
140         // FIXME parse the feed, dummeh ... it's in ChrPtr(Downloaded)
141
142         for (r=url->rooms; r!=NULL; r=r->next) {
143                 syslog(LOG_DEBUG, "Saving item to %s", r->room);
144                 // FIXME save to rooms
145         }
146
147         FreeStrBuf(&Downloaded);
148 }
149
150
151 // We have a list, now download the feeds
152 //
153 void rss_pull_feeds(void)
154 {
155         struct rssurl *r;
156         struct rssroom *rr;
157
158         while (rsstodo != NULL) {
159                 rss_pull_one_feed(rsstodo);
160                 r = rsstodo;
161                 rsstodo = rsstodo->next;
162                 while (r->rooms != NULL) {
163                         rr = r->rooms;
164                         r->rooms = r->rooms->next;
165                         free(rr->room);
166                         free(rr);
167                 }
168                 free(r->url);
169                 free(r);
170         }
171 }
172
173
174 // Scan a room's netconfig looking for RSS feed parsing requests
175 //
176 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
177 {
178         char *serialized_config = NULL;
179         int num_configs = 0;
180         char cfgline[SIZ];
181         int i = 0;
182
183         serialized_config = LoadRoomNetConfigFile(qrbuf->QRnumber);
184         if (!serialized_config) {
185                 return;
186         }
187
188         num_configs = num_tokens(serialized_config, '\n');
189         for (i=0; i<num_configs; ++i) {
190                 extract_token(cfgline, serialized_config, i, '\n', sizeof cfgline);
191                 if (!strncasecmp(cfgline, HKEY("rssclient|"))) {
192                         strcpy(cfgline, &cfgline[10]);
193                         char *vbar = strchr(cfgline, '|');
194                         if (vbar != NULL) {
195                                 *vbar = 0;
196                         }
197                         rssclient_push_todo(cfgline, qrbuf->QRname);
198                 }
199         }
200
201         free(serialized_config);
202 }
203
204
205 /*
206  * Scan for rooms that have RSS client requests configured
207  */
208 void rssclient_scan(void) {
209         time_t now = time(NULL);
210
211         /* Run no more than once every 15 minutes. */
212         if ((now - last_run) < 900) {
213                 syslog(LOG_DEBUG,
214                               "Client: polling interval not yet reached; last run was %ldm%lds ago",
215                               ((now - last_run) / 60),
216                               ((now - last_run) % 60)
217                 );
218                 return;
219         }
220
221         become_session(&rss_CC);
222         syslog(LOG_DEBUG, "rssclient started");
223         CtdlForEachRoom(rssclient_scan_room, NULL);
224         rss_pull_feeds();
225         syslog(LOG_DEBUG, "rssclient ended");
226         last_run = time(NULL);
227         return;
228 }
229
230
231 CTDL_MODULE_INIT(rssclient)
232 {
233         if (!threading)
234         {
235                 syslog(LOG_INFO, "%s", curl_version());
236                 CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER, PRIO_AGGR + 300);
237         }
238         else
239         {
240                 CtdlFillSystemContext(&rss_CC, "rssclient");
241         }
242         return "rssclient";
243 }
244