Cleanup of shutdown of event contexts
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
1 /*
2  * Bring external RSS feeds into rooms.
3  *
4  * Copyright (c) 2007-2010 by the citadel.org team
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <stdio.h>
24
25 #if TIME_WITH_SYS_TIME
26 # include <sys/time.h>
27 # include <time.h>
28 #else
29 # if HAVE_SYS_TIME_H
30 #  include <sys/time.h>
31 # else
32 #  include <time.h>
33 # endif
34 #endif
35
36 #include <ctype.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <expat.h>
42 #include <curl/curl.h>
43 #include <libcitadel.h>
44 #include "citadel.h"
45 #include "server.h"
46 #include "citserver.h"
47 #include "support.h"
48 #include "config.h"
49 #include "threads.h"
50 #include "ctdl_module.h"
51 #include "msgbase.h"
52 #include "parsedate.h"
53 #include "database.h"
54 #include "citadel_dirs.h"
55 #include "md5.h"
56 #include "context.h"
57 #include "event_client.h"
58 #include "rss_atom_parser.h"
59
60
61 #define TMP_MSGDATA 0xFF
62 #define TMP_SHORTER_URL_OFFSET 0xFE
63 #define TMP_SHORTER_URLS 0xFD
64
65 time_t last_run = 0L;
66
67 pthread_mutex_t RSSQueueMutex; /* locks the access to the following vars: */
68 HashList *RSSQueueRooms = NULL; /* rss_room_counter */
69 HashList *RSSFetchUrls = NULL; /*->rss_aggregator;->RefCount access locked*/
70
71 eNextState RSSAggregator_Terminate(AsyncIO *IO);
72 eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO);
73 struct CitContext rss_CC;
74
75 struct rssnetcfg *rnclist = NULL;
76
77
78 void DeleteRoomReference(long QRnumber)
79 {
80         HashPos *At;
81         long HKLen;
82         const char *HK;
83         void *vData = NULL;
84         rss_room_counter *pRoomC;
85
86         At = GetNewHashPos(RSSQueueRooms, 0);
87
88         if (GetHashPosFromKey(RSSQueueRooms, LKEY(QRnumber), At))
89         {
90                 GetHashPos(RSSQueueRooms, At, &HKLen, &HK, &vData);
91                 if (vData != NULL)
92                 {
93                         pRoomC = (rss_room_counter *) vData;
94                         pRoomC->count --;
95                         if (pRoomC->count == 0)
96                                 DeleteEntryFromHash(RSSQueueRooms, At);
97                 }
98         }
99         DeleteHashPos(&At);
100 }
101
102 void UnlinkRooms(rss_aggregator *Cfg)
103 {
104         DeleteRoomReference(Cfg->QRnumber);
105         if (Cfg->OtherQRnumbers != NULL)
106         {
107                 long HKLen;
108                 const char *HK;
109                 HashPos *At;
110                 void *vData;
111
112                 At = GetNewHashPos(Cfg->OtherQRnumbers, 0);
113                 while (! server_shutting_down &&
114                        GetNextHashPos(Cfg->OtherQRnumbers,
115                                       At,
116                                       &HKLen, &HK,
117                                       &vData) &&
118                        (vData != NULL))
119                 {
120                         long *lData = (long*) vData;
121                         DeleteRoomReference(*lData);
122                 }
123
124                 DeleteHashPos(&At);
125         }
126 }
127
128 void UnlinkRSSAggregator(rss_aggregator *Cfg)
129 {
130         HashPos *At;
131
132         UnlinkRooms(Cfg);
133
134         At = GetNewHashPos(RSSFetchUrls, 0);
135         if (GetHashPosFromKey(RSSFetchUrls, SKEY(Cfg->Url), At))
136         {
137                 DeleteEntryFromHash(RSSFetchUrls, At);
138         }
139         DeleteHashPos(&At);
140         last_run = time(NULL);
141 }
142
143
144 void DeleteRssCfg(void *vptr)
145 {
146         rss_aggregator *RSSAggr = (rss_aggregator *)vptr;
147         AsyncIO *IO = &RSSAggr->IO;
148         EVM_syslog(LOG_DEBUG, "RSS: destroying\n");
149
150         FreeStrBuf(&RSSAggr->Url);
151         FreeStrBuf(&RSSAggr->rooms);
152         FreeStrBuf(&RSSAggr->CData);
153         FreeStrBuf(&RSSAggr->Key);
154         DeleteHash(&RSSAggr->OtherQRnumbers);
155
156         DeleteHashPos (&RSSAggr->Pos);
157         DeleteHash (&RSSAggr->Messages);
158         if (RSSAggr->recp.recp_room != NULL)
159                 free(RSSAggr->recp.recp_room);
160
161
162         if (RSSAggr->Item != NULL)
163         {
164                 flush_rss_item(RSSAggr->Item);
165
166                 free(RSSAggr->Item);
167         }
168
169         FreeAsyncIOContents(&RSSAggr->IO);
170         free(RSSAggr);
171 }
172
173 eNextState RSSAggregator_Terminate(AsyncIO *IO)
174 {
175         rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data;
176
177         EVM_syslog(LOG_DEBUG, "RSS: Terminating.\n");
178
179
180         UnlinkRSSAggregator(RSSAggr);
181         return eAbort;
182 }
183 eNextState RSSAggregator_ShutdownAbort(AsyncIO *IO)
184 {
185         const char *pUrl;
186         rss_aggregator *RSSAggr = (rss_aggregator *)IO->Data;
187
188         pUrl = IO->ConnectMe->PlainUrl;
189         if (pUrl == NULL)
190                 pUrl = "";
191
192         EV_syslog(LOG_DEBUG, "RSS: Aborting by shutdown: %s.\n", pUrl);
193
194
195         UnlinkRSSAggregator(RSSAggr);
196         return eAbort;
197 }
198
199
200 eNextState AbortNetworkSaveMessage (AsyncIO *IO)
201 {
202         return eAbort; ///TODO
203 }
204
205 eNextState RSSSaveMessage(AsyncIO *IO)
206 {
207         long len;
208         const char *Key;
209         rss_aggregator *RSSAggr = (rss_aggregator *) IO->Data;
210
211         RSSAggr->ThisMsg->Msg.cm_fields['M'] = SmashStrBuf(&RSSAggr->ThisMsg->Message);
212
213         CtdlSubmitMsg(&RSSAggr->ThisMsg->Msg, &RSSAggr->recp, NULL, 0);
214
215         /* write the uidl to the use table so we don't store this item again */
216         cdb_store(CDB_USETABLE,
217                   SKEY(RSSAggr->ThisMsg->MsgGUID),
218                   &RSSAggr->ThisMsg->ut,
219                   sizeof(struct UseTable) );
220
221         if (GetNextHashPos(RSSAggr->Messages,
222                            RSSAggr->Pos,
223                            &len, &Key,
224                            (void**) &RSSAggr->ThisMsg))
225                 return NextDBOperation(IO, RSS_FetchNetworkUsetableEntry);
226         else
227                 return eAbort;
228 }
229
230 eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO)
231 {
232         const char *Key;
233         long len;
234         struct cdbdata *cdbut;
235         rss_aggregator *Ctx = (rss_aggregator *) IO->Data;
236
237         /* Find out if we've already seen this item */
238         strcpy(Ctx->ThisMsg->ut.ut_msgid,
239                ChrPtr(Ctx->ThisMsg->MsgGUID)); /// TODO
240         Ctx->ThisMsg->ut.ut_timestamp = time(NULL);
241
242         cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->ThisMsg->MsgGUID));
243 #ifndef DEBUG_RSS
244         if (cdbut != NULL) {
245                 /* Item has already been seen */
246                 EV_syslog(LOG_DEBUG,
247                           "%s has already been seen\n",
248                           ChrPtr(Ctx->ThisMsg->MsgGUID));
249                 cdb_free(cdbut);
250
251                 /* rewrite the record anyway, to update the timestamp */
252                 cdb_store(CDB_USETABLE,
253                           SKEY(Ctx->ThisMsg->MsgGUID),
254                           &Ctx->ThisMsg->ut, sizeof(struct UseTable) );
255
256                 if (GetNextHashPos(Ctx->Messages,
257                                    Ctx->Pos,
258                                    &len, &Key,
259                                    (void**) &Ctx->ThisMsg))
260                         return NextDBOperation(
261                                 IO,
262                                 RSS_FetchNetworkUsetableEntry);
263                 else
264                         return eAbort;
265         }
266         else
267 #endif
268         {
269                 NextDBOperation(IO, RSSSaveMessage);
270                 return eSendMore;
271         }
272 }
273
274
275
276 /*
277  * Begin a feed parse
278  */
279 int rss_do_fetching(rss_aggregator *Cfg)
280 {
281         rss_item *ri;
282         time_t now;
283
284         now = time(NULL);
285
286         if ((Cfg->next_poll != 0) && (now < Cfg->next_poll))
287                 return 0;
288
289         ri = (rss_item*) malloc(sizeof(rss_item));
290         memset(ri, 0, sizeof(rss_item));
291         Cfg->Item = ri;
292
293         if (! InitcURLIOStruct(&Cfg->IO,
294                                Cfg,
295                                "Citadel RSS Client",
296                                RSSAggregator_ParseReply,
297                                RSSAggregator_Terminate,
298                                RSSAggregator_ShutdownAbort))
299         {
300                 syslog(LOG_ALERT, "Unable to initialize libcurl.\n");
301                 return 0;
302         }
303
304         safestrncpy(((CitContext*)Cfg->IO.CitContext)->cs_host,
305                     ChrPtr(Cfg->Url),
306                     sizeof(((CitContext*)Cfg->IO.CitContext)->cs_host));
307
308         syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url));
309         ParseURL(&Cfg->IO.ConnectMe, Cfg->Url, 80);
310         CurlPrepareURL(Cfg->IO.ConnectMe);
311
312         QueueCurlContext(&Cfg->IO);
313         return 1;
314 }
315
316 /*
317  * Scan a room's netconfig to determine whether it is requesting any RSS feeds
318  */
319 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
320 {
321         StrBuf *CfgData=NULL;
322         StrBuf *CfgType;
323         StrBuf *Line;
324         rss_room_counter *Count = NULL;
325         struct stat statbuf;
326         char filename[PATH_MAX];
327         int  fd;
328         int Done;
329         rss_aggregator *RSSAggr = NULL;
330         rss_aggregator *use_this_RSSAggr = NULL;
331         void *vptr;
332         const char *CfgPtr, *lPtr;
333         const char *Err;
334
335         pthread_mutex_lock(&RSSQueueMutex);
336         if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr))
337         {
338                 syslog(LOG_DEBUG,
339                        "rssclient: [%ld] %s already in progress.\n",
340                        qrbuf->QRnumber,
341                        qrbuf->QRname);
342                 pthread_mutex_unlock(&RSSQueueMutex);
343                 return;
344         }
345         pthread_mutex_unlock(&RSSQueueMutex);
346
347         assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir);
348
349         if (server_shutting_down)
350                 return;
351
352         /* Only do net processing for rooms that have netconfigs */
353         fd = open(filename, 0);
354         if (fd <= 0) {
355                 /* syslog(LOG_DEBUG,
356                    "rssclient: %s no config.\n",
357                    qrbuf->QRname); */
358                 return;
359         }
360
361         if (server_shutting_down)
362                 return;
363
364         if (fstat(fd, &statbuf) == -1) {
365                 syslog(LOG_DEBUG,
366                        "ERROR: could not stat configfile '%s' - %s\n",
367                        filename,
368                        strerror(errno));
369                 return;
370         }
371
372         if (server_shutting_down)
373                 return;
374
375         CfgData = NewStrBufPlain(NULL, statbuf.st_size + 1);
376
377         if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) {
378                 close(fd);
379                 FreeStrBuf(&CfgData);
380                 syslog(LOG_DEBUG, "ERROR: reading config '%s' - %s<br>\n",
381                         filename, strerror(errno));
382                 return;
383         }
384         close(fd);
385         if (server_shutting_down)
386                 return;
387
388         CfgPtr = NULL;
389         CfgType = NewStrBuf();
390         Line = NewStrBufPlain(NULL, StrLength(CfgData));
391         Done = 0;
392         while (!Done)
393         {
394             Done = StrBufSipLine(Line, CfgData, &CfgPtr) == 0;
395             if (StrLength(Line) > 0)
396             {
397                 lPtr = NULL;
398                 StrBufExtract_NextToken(CfgType, Line, &lPtr, '|');
399                 if (!strcasecmp("rssclient", ChrPtr(CfgType)))
400                 {
401                     if (Count == NULL)
402                     {
403                         Count = malloc(sizeof(rss_room_counter));
404                         Count->count = 0;
405                     }
406                     Count->count ++;
407                     RSSAggr = (rss_aggregator *) malloc(sizeof(rss_aggregator));
408                     memset (RSSAggr, 0, sizeof(rss_aggregator));
409                     RSSAggr->roomlist_parts = 1;
410                     RSSAggr->Url = NewStrBuf();
411                     StrBufExtract_NextToken(RSSAggr->Url, Line, &lPtr, '|');
412
413                     pthread_mutex_lock(&RSSQueueMutex);
414                     GetHash(RSSFetchUrls, SKEY(RSSAggr->Url), &vptr);
415                     use_this_RSSAggr = (rss_aggregator *)vptr;
416                     if (use_this_RSSAggr != NULL)
417                     {
418                             long *QRnumber;
419                             StrBufAppendBufPlain(use_this_RSSAggr->rooms,
420                                                  qrbuf->QRname,
421                                                  -1, 0);
422                             if (use_this_RSSAggr->roomlist_parts == 1)
423                             {
424                                     use_this_RSSAggr->OtherQRnumbers =
425                                             NewHash(1, lFlathash);
426                             }
427                             QRnumber = (long*)malloc(sizeof(long));
428                             *QRnumber = qrbuf->QRnumber;
429                             Put(use_this_RSSAggr->OtherQRnumbers,
430                                 LKEY(qrbuf->QRnumber),
431                                 QRnumber,
432                                 NULL);
433                             use_this_RSSAggr->roomlist_parts++;
434
435                             pthread_mutex_unlock(&RSSQueueMutex);
436
437                             FreeStrBuf(&RSSAggr->Url);
438                             free(RSSAggr);
439                             RSSAggr = NULL;
440                             continue;
441                     }
442                     pthread_mutex_unlock(&RSSQueueMutex);
443
444                     RSSAggr->ItemType = RSS_UNSET;
445
446                     RSSAggr->rooms = NewStrBufPlain(qrbuf->QRname, -1);
447
448                     pthread_mutex_lock(&RSSQueueMutex);
449                     Put(RSSFetchUrls, SKEY(RSSAggr->Url), RSSAggr, DeleteRssCfg);
450                     pthread_mutex_unlock(&RSSQueueMutex);
451                 }
452             }
453         }
454         if (Count != NULL)
455         {
456                 Count->QRnumber = qrbuf->QRnumber;
457                 pthread_mutex_lock(&RSSQueueMutex);
458                 syslog(LOG_DEBUG, "rssclient: [%ld] %s now starting.\n", 
459                               qrbuf->QRnumber, qrbuf->QRname);
460                 Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL);
461                 pthread_mutex_unlock(&RSSQueueMutex);
462         }
463         FreeStrBuf(&CfgData);
464         FreeStrBuf(&CfgType);
465         FreeStrBuf(&Line);
466 }
467
468 /*
469  * Scan for rooms that have RSS client requests configured
470  */
471 void rssclient_scan(void) {
472         static int doing_rssclient = 0;
473         rss_aggregator *rptr = NULL;
474         void *vrptr = NULL;
475         HashPos  *it;
476         long len;
477         const char *Key;
478
479         /* Run no more than once every 15 minutes. */
480         if ((time(NULL) - last_run) < 900) {
481                 return;
482         }
483
484         /*
485          * This is a simple concurrency check to make sure only one rssclient
486          * run is done at a time.  We could do this with a mutex, but since we
487          * don't really require extremely fine granularity here, we'll do it
488          * with a static variable instead.
489          */
490         if (doing_rssclient) return;
491         doing_rssclient = 1;
492         if ((GetCount(RSSQueueRooms) > 0) || (GetCount(RSSFetchUrls) > 0))
493                 return;
494
495         become_session(&rss_CC);
496         syslog(LOG_DEBUG, "rssclient started\n");
497         CtdlForEachRoom(rssclient_scan_room, NULL);
498
499         pthread_mutex_lock(&RSSQueueMutex);
500
501         it = GetNewHashPos(RSSFetchUrls, 0);
502         while (!server_shutting_down &&
503                GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) &&
504                (vrptr != NULL)) {
505                 rptr = (rss_aggregator *)vrptr;
506                 if (!rss_do_fetching(rptr))
507                         UnlinkRSSAggregator(rptr);
508         }
509         DeleteHashPos(&it);
510         pthread_mutex_unlock(&RSSQueueMutex);
511
512         syslog(LOG_DEBUG, "rssclient ended\n");
513         doing_rssclient = 0;
514         return;
515 }
516
517 void rss_cleanup(void)
518 {
519         /* citthread_mutex_destroy(&RSSQueueMutex); TODO */
520         DeleteHash(&RSSFetchUrls);
521         DeleteHash(&RSSQueueRooms);
522 }
523
524
525 CTDL_MODULE_INIT(rssclient)
526 {
527         if (threading)
528         {
529                 CtdlFillSystemContext(&rss_CC, "rssclient");
530                 pthread_mutex_init(&RSSQueueMutex, NULL);
531                 RSSQueueRooms = NewHash(1, lFlathash);
532                 RSSFetchUrls = NewHash(1, NULL);
533                 syslog(LOG_INFO, "%s\n", curl_version());
534                 CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);
535                 CtdlRegisterCleanupHook(rss_cleanup);
536         }
537         return "rssclient";
538 }