more work on libevĀ“ing the rss-reader.
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
1 /*
2  * Bring external RSS feeds into rooms.
3  *
4  * Copyright (c) 2007-2010 by the citadel.org team
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <stdio.h>
24
25 #if TIME_WITH_SYS_TIME
26 # include <sys/time.h>
27 # include <time.h>
28 #else
29 # if HAVE_SYS_TIME_H
30 #  include <sys/time.h>
31 # else
32 #  include <time.h>
33 # endif
34 #endif
35
36 #include <ctype.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <expat.h>
42 #include <curl/curl.h>
43 #include <libcitadel.h>
44 #include "citadel.h"
45 #include "server.h"
46 #include "citserver.h"
47 #include "support.h"
48 #include "config.h"
49 #include "threads.h"
50 #include "ctdl_module.h"
51 #include "msgbase.h"
52 #include "parsedate.h"
53 #include "database.h"
54 #include "citadel_dirs.h"
55 #include "md5.h"
56 #include "context.h"
57 #include "event_client.h"
58 #include "rss_atom_parser.h"
59
60
61 #define TMP_MSGDATA 0xFF
62 #define TMP_SHORTER_URL_OFFSET 0xFE
63 #define TMP_SHORTER_URLS 0xFD
64
65
66 struct rssnetcfg *rnclist = NULL;
67 void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title)
68 {
69         if (StrLength(link) > 0)
70         {
71                 StrBufAppendBufPlain(Message, HKEY("<a href=\""), 0);
72                 StrBufAppendBuf(Message, link, 0);
73                 StrBufAppendBufPlain(Message, HKEY("\">"), 0);
74                 if (StrLength(LinkTitle) > 0)
75                         StrBufAppendBuf(Message, LinkTitle, 0);
76                 else if ((Title != NULL) && !IsEmptyStr(Title))
77                         StrBufAppendBufPlain(Message, Title, -1, 0);
78                 else
79                         StrBufAppendBuf(Message, link, 0);
80                 StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
81         }
82 }
83
84 void RSSSaveMessage(struct CtdlMessage *Msg, rss_item *ri, struct UseTable *ut)
85 {
86
87         CtdlSubmitMsg(msg, recp, NULL, 0);
88         CtdlFreeMessage(msg);
89
90         /* write the uidl to the use table so we don't store this item again */
91         cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
92         free(ut);
93 }
94
95
96 rss_save_msg(msg, recp)
97 {
98
99
100 }
101
102 /*
103  * Commit a fetched and parsed RSS item to disk
104  */
105 void rss_save_item(rss_item *ri)
106 {
107
108         struct MD5Context md5context;
109         u_char rawdigest[MD5_DIGEST_LEN];
110         int i;
111         char utmsgid[SIZ];
112         struct cdbdata *cdbut;
113         struct UseTable ut;
114         struct CtdlMessage *msg;
115         struct recptypes *recp = NULL;
116         int msglen = 0;
117         StrBuf *Message;
118         AsyncIO *OtherIO;
119
120         recp = (struct recptypes *) malloc(sizeof(struct recptypes));
121         if (recp == NULL) return;
122         memset(recp, 0, sizeof(struct recptypes));
123         memset(&ut, 0, sizeof(struct UseTable));
124         recp->recp_room = strdup(ri->roomlist);
125         recp->num_room = num_tokens(ri->roomlist, '|');
126         recp->recptypes_magic = RECPTYPES_MAGIC;
127    
128         /* Construct a GUID to use in the S_USETABLE table.
129          * If one is not present in the item itself, make one up.
130          */
131         if (ri->guid != NULL) {
132                 StrBufSpaceToBlank(ri->guid);
133                 StrBufTrim(ri->guid);
134                 snprintf(utmsgid, sizeof utmsgid, "rss/%s", ChrPtr(ri->guid));
135         }
136         else {
137                 MD5Init(&md5context);
138                 if (ri->title != NULL) {
139                         MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->title), StrLength(ri->title));
140                 }
141                 if (ri->link != NULL) {
142                         MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->link), StrLength(ri->link));
143                 }
144                 MD5Final(rawdigest, &md5context);
145                 for (i=0; i<MD5_DIGEST_LEN; i++) {
146                         sprintf(&utmsgid[i*2], "%02X", (unsigned char) (rawdigest[i] & 0xff));
147                         utmsgid[i*2] = tolower(utmsgid[i*2]);
148                         utmsgid[(i*2)+1] = tolower(utmsgid[(i*2)+1]);
149                 }
150                 strcat(utmsgid, "_rss2ctdl");
151         }
152
153         /* translate Item into message. */
154         CtdlLogPrintf(CTDL_DEBUG, "RSS: translating item...\n");
155         if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
156         StrBufSpaceToBlank(ri->description);
157         msg = malloc(sizeof(struct CtdlMessage));
158         memset(msg, 0, sizeof(struct CtdlMessage));
159         msg->cm_magic = CTDLMESSAGE_MAGIC;
160         msg->cm_anon_type = MES_NORMAL;
161         msg->cm_format_type = FMT_RFC822;
162
163         if (ri->guid != NULL) {
164                 msg->cm_fields['E'] = strdup(ChrPtr(ri->guid));
165         }
166
167         if (ri->author_or_creator != NULL) {
168                 char *From;
169                 StrBuf *Encoded = NULL;
170                 int FromAt;
171                         
172                 From = html_to_ascii(ChrPtr(ri->author_or_creator),
173                                      StrLength(ri->author_or_creator), 
174                                      512, 0);
175                 StrBufPlain(ri->author_or_creator, From, -1);
176                 StrBufTrim(ri->author_or_creator);
177                 free(From);
178
179                 FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL;
180                 if (!FromAt && StrLength (ri->author_email) > 0)
181                 {
182                         StrBufRFC2047encode(&Encoded, ri->author_or_creator);
183                         msg->cm_fields['A'] = SmashStrBuf(&Encoded);
184                         msg->cm_fields['P'] = SmashStrBuf(&ri->author_email);
185                 }
186                 else
187                 {
188                         if (FromAt)
189                                 msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator);
190                         else 
191                         {
192                                 StrBufRFC2047encode(&Encoded, ri->author_or_creator);
193                                 msg->cm_fields['A'] = SmashStrBuf(&Encoded);
194                                 msg->cm_fields['P'] = strdup("rss@localhost");
195                         }
196                 }
197         }
198         else {
199                 msg->cm_fields['A'] = strdup("rss");
200         }
201
202         msg->cm_fields['N'] = strdup(NODENAME);
203         if (ri->title != NULL) {
204                 long len;
205                 char *Sbj;
206                 StrBuf *Encoded, *QPEncoded;
207
208                 QPEncoded = NULL;
209                 StrBufSpaceToBlank(ri->title);
210                 len = StrLength(ri->title);
211                 Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0);
212                 len = strlen(Sbj);
213                 if (Sbj[len - 1] == '\n')
214                 {
215                         len --;
216                         Sbj[len] = '\0';
217                 }
218                 Encoded = NewStrBufPlain(Sbj, len);
219                 free(Sbj);
220
221                 StrBufTrim(Encoded);
222                 StrBufRFC2047encode(&QPEncoded, Encoded);
223
224                 msg->cm_fields['U'] = SmashStrBuf(&QPEncoded);
225                 FreeStrBuf(&Encoded);
226         }
227         msg->cm_fields['T'] = malloc(64);
228         snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
229         if (ri->channel_title != NULL) {
230                 if (StrLength(ri->channel_title) > 0) {
231                         msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title));
232                 }
233         }
234         if (ri->link == NULL) 
235                 ri->link = NewStrBufPlain(HKEY(""));
236
237
238         msg->cm_fields[TMP_SHORTER_URLS] = GetShorterUrls(ri->description);
239
240         strcpy(ut->ut_msgid, utmsgid);
241         ut->ut_timestamp = time(NULL);
242
243         msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ;
244
245         Message = NewStrBufPlain(NULL, StrLength(ri->description));
246
247         StrBufPlain(Message, HKEY(
248                             "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
249                             "<html><body>\n"));
250         msg->cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message);
251         StrBufAppendBuf(Message, ri->description, 0);
252         StrBufAppendBufPlain(Message, HKEY("<br><br>\n"), 0);
253
254         AppendLink(Message, ri->link, ri->linkTitle, NULL);
255         AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this");
256         StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
257
258
259         msg->cm_fields[TMP_MSGDATA] = Message;
260         
261
262         OtherIO = malloc(sizeof(AsyncIO));
263         memset(OtherIO, 0, sizeof(AsyncIO));
264         OtherIO->AsyncMsg = msg;
265         OtherIO->AsyncRcp = recp;
266
267         rss_save_msg(msg, recp);
268 //      msg->cm_fields['M'] = SmashStrBuf(&Message);
269
270         // TODO: reenable me    ExpandShortUrls(ri->description);
271
272 ///     free_recipients(recp);
273 }
274
275
276
277
278         /* Find out if we've already seen this item * /
279
280         cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid));
281 #ifndef DEBUG_RSS
282         if (cdbut != NULL) {
283                 /* Item has already been seen * /
284                 CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid);
285                 cdb_free(cdbut);
286
287                 /* rewrite the record anyway, to update the timestamp * /
288                 strcpy(ut.ut_msgid, utmsgid);
289                 ut.ut_timestamp = time(NULL);
290                 cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
291         }
292         else
293 #endif
294         {
295 */
296
297
298 /*
299  * Begin a feed parse
300  */
301 void rss_do_fetching(rssnetcfg *Cfg) {
302         rsscollection *rssc;
303         rss_item *ri;
304                 
305         time_t now;
306
307         CURL *chnd;
308         AsyncIO *IO;
309
310         now = time(NULL);
311
312         if ((Cfg->next_poll != 0) && (now < Cfg->next_poll))
313                 return;
314
315
316         ri = (rss_item*) malloc(sizeof(rss_item));
317         rssc = (rsscollection*) malloc(sizeof(rsscollection));
318         memset(ri, 0, sizeof(rss_item));
319         memset(rssc, 0, sizeof(rsscollection));
320         rssc->Item = ri;
321         rssc->Cfg = Cfg;
322         IO = &rssc->IO;
323         IO->CitContext = CloneContext(CC);
324         IO->Data = rssc;
325         ri->roomlist = Cfg->rooms;
326
327
328         CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url));
329         ParseURL(&IO->ConnectMe, Cfg->Url, 80);
330         CurlPrepareURL(IO->ConnectMe);
331
332         if (! evcurl_init(IO, 
333 //                        Ctx, 
334                           NULL,
335                           "Citadel RSS Client",
336                           ParseRSSReply))
337         {
338                 CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
339 //              goto abort;
340         }
341         chnd = IO->HttpReq.chnd;
342
343         evcurl_handle_start(IO);
344 }
345
346
347
348
349 /*
350  * Scan a room's netconfig to determine whether it is requesting any RSS feeds
351  */
352 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
353 {
354         char filename[PATH_MAX];
355         char buf[1024];
356         char instr[32];
357         FILE *fp;
358         char feedurl[256];
359         rssnetcfg *rncptr = NULL;
360         rssnetcfg *use_this_rncptr = NULL;
361         int len = 0;
362         char *ptr = NULL;
363
364         assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir);
365
366         if (CtdlThreadCheckStop())
367                 return;
368                 
369         /* Only do net processing for rooms that have netconfigs */
370         fp = fopen(filename, "r");
371         if (fp == NULL) {
372                 return;
373         }
374
375         while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) {
376                 buf[strlen(buf)-1] = 0;
377
378                 extract_token(instr, buf, 0, '|', sizeof instr);
379                 if (!strcasecmp(instr, "rssclient")) {
380
381                         use_this_rncptr = NULL;
382
383                         extract_token(feedurl, buf, 1, '|', sizeof feedurl);
384
385                         /* If any other rooms have requested the same feed, then we will just add this
386                          * room to the target list for that client request.
387                          */
388                         for (rncptr=rnclist; rncptr!=NULL; rncptr=rncptr->next) {
389                                 if (!strcmp(ChrPtr(rncptr->Url), feedurl)) {
390                                         use_this_rncptr = rncptr;
391                                 }
392                         }
393
394                         /* Otherwise create a new client request */
395                         if (use_this_rncptr == NULL) {
396                                 rncptr = (rssnetcfg *) malloc(sizeof(rssnetcfg));
397                                 memset(rncptr, 0, sizeof(rssnetcfg));
398                                 rncptr->ItemType = RSS_UNSET;
399                                 if (rncptr != NULL) {
400                                         rncptr->next = rnclist;
401                                         rncptr->Url = NewStrBufPlain(feedurl, -1);
402                                         rncptr->rooms = NULL;
403                                         rnclist = rncptr;
404                                         use_this_rncptr = rncptr;
405                                 }
406                         }
407
408                         /* Add the room name to the request */
409                         if (use_this_rncptr != NULL) {
410                                 if (use_this_rncptr->rooms == NULL) {
411                                         rncptr->rooms = strdup(qrbuf->QRname);
412                                 }
413                                 else {
414                                         len = strlen(use_this_rncptr->rooms) + strlen(qrbuf->QRname) + 5;
415                                         ptr = realloc(use_this_rncptr->rooms, len);
416                                         if (ptr != NULL) {
417                                                 strcat(ptr, "|");
418                                                 strcat(ptr, qrbuf->QRname);
419                                                 use_this_rncptr->rooms = ptr;
420                                         }
421                                 }
422                         }
423                 }
424
425         }
426
427         fclose(fp);
428
429 }
430
431 /*
432  * Scan for rooms that have RSS client requests configured
433  */
434 void rssclient_scan(void) {
435         static time_t last_run = 0L;
436         static int doing_rssclient = 0;
437         rssnetcfg *rptr = NULL;
438
439         /*
440          * This is a simple concurrency check to make sure only one rssclient run
441          * is done at a time.  We could do this with a mutex, but since we
442          * don't really require extremely fine granularity here, we'll do it
443          * with a static variable instead.
444          */
445         if (doing_rssclient) return;
446         doing_rssclient = 1;
447
448         CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n");
449         CtdlForEachRoom(rssclient_scan_room, NULL);
450
451         while (rnclist != NULL && !CtdlThreadCheckStop()) {
452                 rss_do_fetching(rnclist);
453                 rptr = rnclist;
454                 rnclist = rnclist->next;
455                 if (rptr->rooms != NULL) free(rptr->rooms);
456                 free(rptr);
457         }
458
459         CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n");
460         last_run = time(NULL);
461         doing_rssclient = 0;
462         return;
463 }
464
465
466 CTDL_MODULE_INIT(rssclient)
467 {
468         if (threading)
469         {
470                 CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version());
471                 CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);
472         }
473         return "rssclient";
474 }