don't fetch a RSS before the network frequency is elapsed
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
1 /*
2  * Bring external RSS feeds into rooms.
3  *
4  * Copyright (c) 2007-2010 by the citadel.org team
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <stdio.h>
24
25 #if TIME_WITH_SYS_TIME
26 # include <sys/time.h>
27 # include <time.h>
28 #else
29 # if HAVE_SYS_TIME_H
30 #  include <sys/time.h>
31 # else
32 #  include <time.h>
33 # endif
34 #endif
35
36 #include <ctype.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <expat.h>
42 #include <curl/curl.h>
43 #include <libcitadel.h>
44 #include "citadel.h"
45 #include "server.h"
46 #include "citserver.h"
47 #include "support.h"
48 #include "config.h"
49 #include "threads.h"
50 #include "ctdl_module.h"
51 #include "clientsocket.h"
52 #include "msgbase.h"
53 #include "parsedate.h"
54 #include "database.h"
55 #include "citadel_dirs.h"
56 #include "md5.h"
57 #include "context.h"
58
59
60 typedef struct rssnetcfg rssnetcfg;
61 struct rssnetcfg {
62         rssnetcfg *next;
63         char url[256];
64         char *rooms;
65         time_t last_error_when;
66         int ItemType;
67         time_t next_poll;
68 };
69
70 #define RSS_UNSET       (1<<0)
71 #define RSS_RSS         (1<<1)
72 #define RSS_ATOM        (1<<2)
73 #define RSS_REQUIRE_BUF (1<<3)
74
75 typedef struct _rss_item {
76         char *roomlist;
77         int done_parsing;
78         StrBuf *guid;
79         StrBuf *title;
80         StrBuf *link;
81         StrBuf *linkTitle;
82         StrBuf *reLink;
83         StrBuf *reLinkTitle;
84         StrBuf *description;
85         time_t pubdate;
86         StrBuf *channel_title;
87         int item_tag_nesting;
88         StrBuf *author_or_creator;
89         StrBuf *author_url;
90         StrBuf *author_email;
91 }rss_item;
92
93
94 typedef void (*rss_handler_func)(StrBuf *CData, 
95                                  rss_item *ri, 
96                                  rssnetcfg *Cfg, 
97                                  const char** Attr);
98
99 typedef struct __rss_xml_handler {
100         int Flags;
101         rss_handler_func Handler;
102 }rss_xml_handler;
103
104
105 typedef struct _rsscollection {
106         StrBuf *CData;
107         StrBuf *Key;
108
109         rss_item *Item;
110         rssnetcfg *Cfg;
111         
112         rss_xml_handler *Current;
113 } rsscollection;
114
115 struct rssnetcfg *rnclist = NULL;
116 HashList *StartHandlers = NULL;
117 HashList *EndHandlers = NULL;
118 HashList *KnownNameSpaces = NULL;
119 HashList *UrlShorteners = NULL;
120 void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len)
121 {
122         rss_xml_handler *h;
123         h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
124         h->Flags = Flags;
125         h->Handler = Handler;
126         Put(StartHandlers, key, len, h, NULL);
127 }
128 void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len)
129 {
130         rss_xml_handler *h;
131         h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
132         h->Flags = Flags;
133         h->Handler = Handler;
134         Put(EndHandlers, key, len, h, NULL);
135 }
136
137 #if 0
138 //#ifdef HAVE_ICONV
139 #include <iconv.h>
140
141
142 /* 
143  * dug this out of the trashcan of the midgard project, lets see if it works for us.
144  * original code by Alexander Bokovoy <bokovoy@avilink.ne> distributed under GPL V2 or later
145  */
146
147 /* Returns: 
148  >= 0 - successfull, 0 means conversion doesn't use multibyte sequences 
149    -1 - error during iconv_open call 
150    -2 - error during iconv_close call 
151    ---------------------------------- 
152    This function expects that multibyte encoding in 'charset' wouldn't have 
153    characters with more than 3 bytes. It is not intended to convert UTF-8 because 
154    we'll never receive UTF-8 in our handler (it is handled by Exat itself). 
155 */ 
156 static int 
157 fill_encoding_info (const char *charset, XML_Encoding * info) 
158
159   iconv_t cd = (iconv_t)(-1); 
160   int flag; 
161         CtdlLogPrintf(0, "RSS: fill encoding info ...\n");
162  
163 #if G_BYTE_ORDER == G_LITTLE_ENDIAN 
164   cd = iconv_open ("UCS-2LE", charset); 
165 #else 
166   cd = iconv_open ("UCS-2BE", charset); 
167 #endif 
168  
169   if (cd == (iconv_t) (-1)) 
170     { 
171       return -1; 
172     } 
173  
174   { 
175     unsigned short out = 0; 
176     unsigned char buf[4]; 
177     unsigned int i0, i1, i2; 
178     int result = 0; 
179     flag = 0; 
180     for (i0 = 0; i0 < 0x100; i0++) 
181       { 
182         buf[0] = i0; 
183         info->map[i0] = 0; 
184         //result = try (cd, buf, 1, &out); 
185         if (result < 0) 
186           { 
187           } 
188         else if (result > 0) 
189           { 
190             info->map[i0] = out; 
191           } 
192         else 
193           { 
194             for (i1 = 0; i1 < 0x100; i1++) 
195               { 
196                 buf[1] = i1; 
197                 ///result = try (cd, buf, 2, &out); 
198                 if (result < 0) 
199                   { 
200                   } 
201                 else if (result > 0) 
202                   { 
203                     flag++; 
204                     info->map[i0] = -2; 
205                   } 
206                 else 
207                   { 
208                     for (i2 = 0; i2 < 0x100; i2++) 
209                       { 
210                         buf[2] = i2; 
211                         ////result = try (cd, buf, 3, &out); 
212                         if (result < 0) 
213                           { 
214                           } 
215                         else if (result > 0) 
216                           { 
217                             flag++; 
218                             info->map[i0] = -3; 
219                           } 
220                       } 
221                   } 
222               } 
223           } 
224       } 
225   } 
226  
227   if (iconv_close (cd) < 0) 
228     { 
229       return -2; 
230     } 
231   return flag; 
232
233
234 static int 
235 iconv_convertor (void *data, const char *s) 
236
237   XML_Encoding *info = data; 
238   int res; 
239         CtdlLogPrintf(0, "RSS: Converting ...\n");
240
241   if (s == NULL) 
242     return -1; 
243 /*
244   GByteArray *result; 
245   result = g_byte_array_new (); 
246   if (process_block (info->data, (char *) s, strlen (s), result) == 0) 
247     { 
248       res = *(result->data); 
249       g_byte_array_free (result, TRUE); 
250       return res; 
251     } 
252   g_byte_array_free (result, TRUE); 
253 */
254   return -1; 
255
256
257 static void 
258 my_release (void *data) 
259
260   iconv_t cd = (iconv_t) data; 
261   if (iconv_close (cd) != 0) 
262     { 
263 /// TODO: uh no.      exit (1); 
264     } 
265
266 int 
267 handle_unknown_xml_encoding (void *encodingHandleData, 
268                              const XML_Char * name, 
269                              XML_Encoding * info) 
270
271   int result; 
272   CtdlLogPrintf(0, "RSS: unknown encoding ...\n");
273   result = fill_encoding_info (name, info); 
274   if (result >= 0) 
275     { 
276       /*  
277         Special case: client asked for reverse conversion, we'll provide him with 
278         iconv descriptor which handles it. Client should release it by himself. 
279       */ 
280       if(encodingHandleData != NULL) 
281             *((iconv_t *)encodingHandleData) = iconv_open(name, "UTF-8"); 
282       /*  
283          Optimization: we do not need conversion function if encoding is one-to-one,  
284          info->map table will be enough  
285        */ 
286       if (result == 0) 
287         { 
288           info->data = NULL; 
289           info->convert = NULL; 
290           info->release = NULL; 
291           return 1; 
292         } 
293       /*  
294          We do need conversion function because this encoding uses multibyte sequences 
295        */ 
296       info->data = (void *) iconv_open ("UTF-8", name); 
297       if ((int)info->data == -1) 
298         return -1; 
299       info->convert = iconv_convertor; 
300       info->release = my_release; 
301       return 1; 
302     } 
303   if(encodingHandleData != NULL)  
304     *(iconv_t *)encodingHandleData = NULL; 
305   return 0; 
306
307
308 ///#endif
309 #endif
310 size_t GetLocationString( void *ptr, size_t size, size_t nmemb, void *userdata)
311 {
312 #define LOCATION "location"
313         if (strncasecmp((char*)ptr, LOCATION, sizeof(LOCATION) - 1) == 0)
314         {
315                 StrBuf *pURL = (StrBuf*) userdata;
316                 char *pch = (char*) ptr;
317                 char *pche;
318                 
319                 pche = pch + (size * nmemb);
320                 pch += sizeof(LOCATION);
321                 
322                 while (isspace(*pch) || (*pch == ':'))
323                         pch ++;
324
325                 while (isspace(*pche) || (*pche == '\0'))
326                         pche--;
327                 
328                 FlushStrBuf(pURL);
329                 StrBufPlain(pURL, pch, pche - pch + 1); 
330         }
331         return size * nmemb;
332 }
333
334 int LookupUrl(StrBuf *ShorterUrlStr)
335 {
336         CURL *curl;
337         char errmsg[1024] = "";
338         StrBuf *Answer;
339         int rc = 0;
340
341         curl = curl_easy_init();
342         if (!curl) {
343                 CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
344                 return 0;
345         }
346         Answer = NewStrBufPlain(NULL, SIZ);
347
348         curl_easy_setopt(curl, CURLOPT_URL, ChrPtr(ShorterUrlStr));
349         curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
350         curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
351         curl_easy_setopt(curl, CURLOPT_WRITEDATA, Answer);
352 //      curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback);
353         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlFillStrBuf_callback);
354         curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errmsg);
355         curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
356 #ifdef CURLOPT_HTTP_CONTENT_DECODING
357         curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, 1);
358         curl_easy_setopt(curl, CURLOPT_ENCODING, "");
359 #endif
360         curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL);
361         curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180);           /* die after 180 seconds */
362         curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 0);
363
364         curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION , GetLocationString);
365         curl_easy_setopt(curl, CURLOPT_WRITEHEADER, ShorterUrlStr);
366
367
368         if (
369                 (!IsEmptyStr(config.c_ip_addr))
370                 && (strcmp(config.c_ip_addr, "*"))
371                 && (strcmp(config.c_ip_addr, "::"))
372                 && (strcmp(config.c_ip_addr, "0.0.0.0"))
373         ) {
374                 curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr);
375         }
376
377         if (CtdlThreadCheckStop())
378                 goto shutdown ;
379
380         rc = curl_easy_perform(curl);
381         if (rc) {
382                 CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", rc, errmsg);
383                 rc = 0;
384         }
385         else 
386                 rc = 1;
387
388 shutdown:
389         curl_easy_cleanup(curl);
390
391         return rc;
392
393 }
394
395
396
397 void CrawlMessageForShorterUrls(HashList *pUrls, StrBuf *Message)
398 {
399         int nHits = 0;
400         void *pv;
401         int nShorter = 0;
402         const char *pch;
403         const char *pUrl;
404         ConstStr *pCUrl;
405
406         while (GetHash(UrlShorteners, IKEY(nShorter), &pv))
407         {
408                 nShorter++;
409                 pch = ChrPtr(Message);
410                 pUrl = strstr(pch, ChrPtr((StrBuf*)pv));
411                 while ((pUrl != NULL) && (nHits < 99))
412                 {
413                         pCUrl = malloc(sizeof(ConstStr));
414
415                         pCUrl->Key = pUrl;
416                         pch = pUrl + StrLength((StrBuf*)pv);
417                         while (isalnum(*pch)||(*pch == '-')||(*pch == '/'))
418                                 pch++;
419                         pCUrl->len = pch - pCUrl->Key;
420
421                         Put(pUrls, IKEY(nHits), pCUrl, NULL);
422                         nHits ++;
423                         pUrl = strstr(pch, ChrPtr((StrBuf*)pv));
424                 }
425         }
426 }
427
428 int SortConstStrByPosition(const void *Item1, const void *Item2)
429 {
430         const ConstStr *p1, *p2;
431         p1 = (const ConstStr*) Item1;
432         p2 = (const ConstStr*) Item2;
433         if (p1->Key == p2->Key)
434                 return 0;
435         if (p1->Key > p2->Key)
436                 return 1;
437         return -1;
438 }
439
440 void ExpandShortUrls(StrBuf *Message)
441 {
442         StrBuf *Shadow;
443         HashList *pUrls;
444         ConstStr *pCUrl;
445         const char *pch;
446         const char *pche;
447
448         /* we just suspect URL shorteners to be inside of feeds from twitter
449          * or other short content messages, so don't crawl through real blogs.
450          */
451         if (StrLength(Message) > 500)
452                 return;
453
454         pUrls = NewHash(1, Flathash);
455         CrawlMessageForShorterUrls(pUrls, Message);
456
457         if (GetCount(pUrls) > 0)
458         {
459                 StrBuf *ShorterUrlStr;
460                 HashPos *Pos;
461                 const char *Key;
462                 void *pv;
463                 long len;
464
465                 Shadow = NewStrBufPlain(NULL, StrLength(Message));
466                 SortByPayload (pUrls, SortConstStrByPosition);
467
468                 ShorterUrlStr = NewStrBufPlain(NULL, StrLength(Message));
469
470                 pch = ChrPtr(Message);
471                 pche = pch + StrLength(Message);
472                 Pos = GetNewHashPos(pUrls, 1);
473                 while (GetNextHashPos(pUrls, Pos, &len, &Key, &pv))
474                 {
475                         pCUrl = (ConstStr*) pv;
476
477                         if (pch != pCUrl->Key)
478                                 StrBufAppendBufPlain(Shadow, pch, pCUrl->Key - pch, 0);
479                         
480                         StrBufPlain(ShorterUrlStr, CKEY(*pCUrl));
481                         if (LookupUrl(ShorterUrlStr))
482                         {
483                                 StrBufAppendBufPlain(Shadow, HKEY("<a href=\""), 0);
484                                 StrBufAppendBuf(Shadow, ShorterUrlStr, 0);
485                                 StrBufAppendBufPlain(Shadow, HKEY("\">"), 0);
486                                 StrBufAppendBuf(Shadow, ShorterUrlStr, 0);
487                                 StrBufAppendBufPlain(Shadow, HKEY("["), 0);
488                                 StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
489                                 StrBufAppendBufPlain(Shadow, HKEY("]</a>"), 0);
490                         }
491                         else
492                         {
493                                 StrBufAppendBufPlain(Shadow, HKEY("<a href=\""), 0);
494                                 StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
495                                 StrBufAppendBufPlain(Shadow, HKEY("\">"), 0);
496                                 StrBufAppendBufPlain(Shadow, pCUrl->Key, pCUrl->len, 0);
497                                 StrBufAppendBufPlain(Shadow, HKEY("</a>"), 0);
498                         }
499                         pch = pCUrl->Key + pCUrl->len + 1;
500
501                 }
502                 if (pch < pche)
503                         StrBufAppendBufPlain(Shadow, pch, pche - pch, 0);
504                 FlushStrBuf(Message);
505                 StrBufAppendBuf(Message, Shadow, 0);
506
507                 FreeStrBuf(&ShorterUrlStr);
508                 FreeStrBuf(&Shadow);
509                 DeleteHashPos(&Pos);
510         }
511
512         DeleteHash(&pUrls);
513 }
514
515
516 void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title)
517 {
518         if (StrLength(link) > 0)
519         {
520                 StrBufAppendBufPlain(Message, HKEY("<a href=\""), 0);
521                 StrBufAppendBuf(Message, link, 0);
522                 StrBufAppendBufPlain(Message, HKEY("\">"), 0);
523                 if (StrLength(LinkTitle) > 0)
524                         StrBufAppendBuf(Message, LinkTitle, 0);
525                 else if ((Title != NULL) && !IsEmptyStr(Title))
526                         StrBufAppendBufPlain(Message, Title, -1, 0);
527                 else
528                         StrBufAppendBuf(Message, link, 0);
529                 StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
530         }
531 }
532 /*
533  * Commit a fetched and parsed RSS item to disk
534  */
535 void rss_save_item(rss_item *ri)
536 {
537
538         struct MD5Context md5context;
539         u_char rawdigest[MD5_DIGEST_LEN];
540         int i;
541         char utmsgid[SIZ];
542         struct cdbdata *cdbut;
543         struct UseTable ut;
544         struct CtdlMessage *msg;
545         struct recptypes *recp = NULL;
546         int msglen = 0;
547         StrBuf *Message;
548
549         recp = (struct recptypes *) malloc(sizeof(struct recptypes));
550         if (recp == NULL) return;
551         memset(recp, 0, sizeof(struct recptypes));
552         memset(&ut, 0, sizeof(struct UseTable));
553         recp->recp_room = strdup(ri->roomlist);
554         recp->num_room = num_tokens(ri->roomlist, '|');
555         recp->recptypes_magic = RECPTYPES_MAGIC;
556    
557         /* Construct a GUID to use in the S_USETABLE table.
558          * If one is not present in the item itself, make one up.
559          */
560         if (ri->guid != NULL) {
561                 StrBufSpaceToBlank(ri->guid);
562                 StrBufTrim(ri->guid);
563                 snprintf(utmsgid, sizeof utmsgid, "rss/%s", ChrPtr(ri->guid));
564         }
565         else {
566                 MD5Init(&md5context);
567                 if (ri->title != NULL) {
568                         MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->title), StrLength(ri->title));
569                 }
570                 if (ri->link != NULL) {
571                         MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->link), StrLength(ri->link));
572                 }
573                 MD5Final(rawdigest, &md5context);
574                 for (i=0; i<MD5_DIGEST_LEN; i++) {
575                         sprintf(&utmsgid[i*2], "%02X", (unsigned char) (rawdigest[i] & 0xff));
576                         utmsgid[i*2] = tolower(utmsgid[i*2]);
577                         utmsgid[(i*2)+1] = tolower(utmsgid[(i*2)+1]);
578                 }
579                 strcat(utmsgid, "_rss2ctdl");
580         }
581
582         /* Find out if we've already seen this item */
583
584         cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid));
585 #ifndef DEBUG_RSS
586         if (cdbut != NULL) {
587                 /* Item has already been seen */
588                 CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid);
589                 cdb_free(cdbut);
590
591                 /* rewrite the record anyway, to update the timestamp */
592                 strcpy(ut.ut_msgid, utmsgid);
593                 ut.ut_timestamp = time(NULL);
594                 cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
595         }
596         else
597 #endif
598 {
599                 /* Item has not been seen, so save it. */
600                 CtdlLogPrintf(CTDL_DEBUG, "RSS: saving item...\n");
601                 if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
602                 StrBufSpaceToBlank(ri->description);
603                 msg = malloc(sizeof(struct CtdlMessage));
604                 memset(msg, 0, sizeof(struct CtdlMessage));
605                 msg->cm_magic = CTDLMESSAGE_MAGIC;
606                 msg->cm_anon_type = MES_NORMAL;
607                 msg->cm_format_type = FMT_RFC822;
608
609                 if (ri->guid != NULL) {
610                         msg->cm_fields['E'] = strdup(ChrPtr(ri->guid));
611                 }
612
613                 if (ri->author_or_creator != NULL) {
614                         char *From;
615                         StrBuf *Encoded = NULL;
616                         int FromAt;
617                         
618                         From = html_to_ascii(ChrPtr(ri->author_or_creator),
619                                              StrLength(ri->author_or_creator), 
620                                              512, 0);
621                         StrBufPlain(ri->author_or_creator, From, -1);
622                         StrBufTrim(ri->author_or_creator);
623                         free(From);
624
625                         FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL;
626                         if (!FromAt && StrLength (ri->author_email) > 0)
627                         {
628                                 StrBufRFC2047encode(&Encoded, ri->author_or_creator);
629                                 msg->cm_fields['A'] = SmashStrBuf(&Encoded);
630                                 msg->cm_fields['P'] = SmashStrBuf(&ri->author_email);
631                         }
632                         else
633                         {
634                                 if (FromAt)
635                                         msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator);
636                                 else 
637                                 {
638                                         StrBufRFC2047encode(&Encoded, ri->author_or_creator);
639                                         msg->cm_fields['A'] = SmashStrBuf(&Encoded);
640                                         msg->cm_fields['P'] = strdup("rss@localhost");
641                                 }
642                         }
643                 }
644                 else {
645                         msg->cm_fields['A'] = strdup("rss");
646                 }
647
648                 msg->cm_fields['N'] = strdup(NODENAME);
649                 if (ri->title != NULL) {
650                         long len;
651                         char *Sbj;
652                         StrBuf *Encoded, *QPEncoded;
653
654                         QPEncoded = NULL;
655                         StrBufSpaceToBlank(ri->title);
656                         len = StrLength(ri->title);
657                         Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0);
658                         len = strlen(Sbj);
659                         if (Sbj[len - 1] == '\n')
660                         {
661                                 len --;
662                                 Sbj[len] = '\0';
663                         }
664                         Encoded = NewStrBufPlain(Sbj, len);
665                         free(Sbj);
666
667                         StrBufTrim(Encoded);
668                         StrBufRFC2047encode(&QPEncoded, Encoded);
669
670                         msg->cm_fields['U'] = SmashStrBuf(&QPEncoded);
671                         FreeStrBuf(&Encoded);
672                 }
673                 msg->cm_fields['T'] = malloc(64);
674                 snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
675                 if (ri->channel_title != NULL) {
676                         if (StrLength(ri->channel_title) > 0) {
677                                 msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title));
678                         }
679                 }
680                 if (ri->link == NULL) 
681                         ri->link = NewStrBufPlain(HKEY(""));
682                 ExpandShortUrls(ri->description);
683                 msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ;
684
685                 Message = NewStrBufPlain(NULL, StrLength(ri->description));
686
687                 StrBufPlain(Message, HKEY(
688                          "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
689                          "<html><body>\n"));
690
691                 StrBufAppendBuf(Message, ri->description, 0);
692                 StrBufAppendBufPlain(Message, HKEY("<br><br>\n"), 0);
693
694                 AppendLink(Message, ri->link, ri->linkTitle, NULL);
695                 AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this");
696                 StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
697
698                 msg->cm_fields['M'] = SmashStrBuf(&Message);
699
700                 CtdlSubmitMsg(msg, recp, NULL, 0);
701                 CtdlFreeMessage(msg);
702
703                 /* write the uidl to the use table so we don't store this item again */
704                 strcpy(ut.ut_msgid, utmsgid);
705                 ut.ut_timestamp = time(NULL);
706                 cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
707         }
708         free_recipients(recp);
709 }
710
711
712
713 /*
714  * Convert an RDF/RSS datestamp into a time_t
715  */
716 time_t rdf_parsedate(const char *p)
717 {
718         struct tm tm;
719         time_t t = 0;
720
721         if (!p) return 0L;
722         if (strlen(p) < 10) return 0L;
723
724         memset(&tm, 0, sizeof tm);
725
726         /*
727          * If the timestamp appears to be in W3C datetime format, try to
728          * parse it.  See also: http://www.w3.org/TR/NOTE-datetime
729          *
730          * This code, along with parsedate.c, is a potential candidate for
731          * moving into libcitadel.
732          */
733         if ( (p[4] == '-') && (p[7] == '-') ) {
734                 tm.tm_year = atoi(&p[0]) - 1900;
735                 tm.tm_mon = atoi(&p[5]) - 1;
736                 tm.tm_mday = atoi(&p[8]);
737                 if ( (p[10] == 'T') && (p[13] == ':') ) {
738                         tm.tm_hour = atoi(&p[11]);
739                         tm.tm_min = atoi(&p[14]);
740                 }
741                 return mktime(&tm);
742         }
743
744         /* hmm... try RFC822 date stamp format */
745
746         t = parsedate(p);
747         if (t > 0) return(t);
748
749         /* yeesh.  ok, just return the current date and time. */
750         return(time(NULL));
751 }
752
753 void flush_rss_item(rss_item *ri)
754 {
755         /* Initialize the feed item data structure */
756         FreeStrBuf(&ri->guid);
757         FreeStrBuf(&ri->title);
758         FreeStrBuf(&ri->link);
759         FreeStrBuf(&ri->author_or_creator);
760         FreeStrBuf(&ri->author_email);
761         FreeStrBuf(&ri->author_url);
762         FreeStrBuf(&ri->description);
763 }
764
765 void rss_xml_start(void *data, const char *supplied_el, const char **attr)
766 {
767         rss_xml_handler *h;
768         rsscollection   *rssc = (rsscollection*) data;
769         rssnetcfg       *Cfg = rssc->Cfg;
770         rss_item        *ri = rssc->Item;
771         void            *pv;
772         const char      *pel;
773         char            *sep = NULL;
774
775         /* Axe the namespace, we don't care about it */
776 ///     CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el);
777         pel = supplied_el;
778         while (sep = strchr(pel, ':'), sep) {
779                 pel = sep + 1;
780         }
781
782         if (pel != supplied_el)
783         {
784                 void *v;
785                 
786                 if (!GetHash(KnownNameSpaces, 
787                              supplied_el, 
788                              pel - supplied_el - 1,
789                              &v))
790                 {
791 #ifdef DEBUG_RSS
792                         CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", 
793                                       supplied_el);
794 #endif
795                         return;
796                 }
797         }
798
799         StrBufPlain(rssc->Key, pel, -1);
800         StrBufLowerCase(rssc->Key);
801         if (GetHash(StartHandlers, SKEY(rssc->Key), &pv))
802         {
803                 rssc->Current = h = (rss_xml_handler*) pv;
804
805                 if (((h->Flags & RSS_UNSET) != 0) && 
806                     (Cfg->ItemType == RSS_UNSET))
807                 {
808                         h->Handler(rssc->CData, ri, Cfg, attr);
809                 }
810                 else if (((h->Flags & RSS_RSS) != 0) &&
811                     (Cfg->ItemType == RSS_RSS))
812                 {
813                         h->Handler(rssc->CData, ri, Cfg, attr);
814                 }
815                 else if (((h->Flags & RSS_ATOM) != 0) &&
816                          (Cfg->ItemType == RSS_ATOM))
817                 {
818                         h->Handler(rssc->CData, ri, Cfg, attr);                 
819                 }
820 #ifdef DEBUG_RSS
821                 else 
822                         CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
823 #endif
824         }
825 #ifdef DEBUG_RSS
826         else 
827                 CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel,  supplied_el);
828 #endif
829 }
830
831 void rss_xml_end(void *data, const char *supplied_el)
832 {
833         rss_xml_handler *h;
834         rsscollection   *rssc = (rsscollection*) data;
835         rssnetcfg       *Cfg = rssc->Cfg;
836         rss_item        *ri = rssc->Item;
837         const char      *pel;
838         char            *sep = NULL;
839         void            *pv;
840
841         /* Axe the namespace, we don't care about it */
842         pel = supplied_el;
843         while (sep = strchr(pel, ':'), sep) {
844                 pel = sep + 1;
845         }
846 //      CtdlLogPrintf(0, "RSS: END %s...\n", el);
847         if (pel != supplied_el)
848         {
849                 void *v;
850                 
851                 if (!GetHash(KnownNameSpaces, 
852                              supplied_el, 
853                              pel - supplied_el - 1,
854                              &v))
855                 {
856 #ifdef DEBUG_RSS
857                         CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", 
858                                       supplied_el, ChrPtr(rssc->CData));
859 #endif
860                         FlushStrBuf(rssc->CData);
861                         return;
862                 }
863         }
864
865         StrBufPlain(rssc->Key, pel, -1);
866         StrBufLowerCase(rssc->Key);
867         if (GetHash(EndHandlers, SKEY(rssc->Key), &pv))
868         {
869                 h = (rss_xml_handler*) pv;
870
871                 if (((h->Flags & RSS_UNSET) != 0) && 
872                     (Cfg->ItemType == RSS_UNSET))
873                 {
874                         h->Handler(rssc->CData, ri, Cfg, NULL);
875                 }
876                 else if (((h->Flags & RSS_RSS) != 0) &&
877                     (Cfg->ItemType == RSS_RSS))
878                 {
879                         h->Handler(rssc->CData, ri, Cfg, NULL);
880                 }
881                 else if (((h->Flags & RSS_ATOM) != 0) &&
882                          (Cfg->ItemType == RSS_ATOM))
883                 {
884                         h->Handler(rssc->CData, ri, Cfg, NULL);
885                 }
886 #ifdef DEBUG_RSS
887                 else 
888                         CtdlLogPrintf(0, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
889 #endif
890         }
891 #ifdef DEBUG_RSS
892         else 
893                 CtdlLogPrintf(0, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
894 #endif
895         FlushStrBuf(rssc->CData);
896         rssc->Current = NULL;
897 }
898
899
900
901
902
903 void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
904 {
905         CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RSS feed.\n");
906         Cfg->ItemType = RSS_RSS;
907 }
908
909 void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
910 {
911         CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RDF feed.\n");
912         Cfg->ItemType = RSS_RSS;
913 }
914
915 void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
916 {
917         CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an ATOM feed.\n");
918         Cfg->ItemType = RSS_ATOM;
919 }
920
921
922 void RSS_item_item_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
923 {
924         ri->item_tag_nesting ++;
925         flush_rss_item(ri);
926 }
927
928 void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
929 {
930 /* Atom feed... */
931         ri->item_tag_nesting ++;
932         flush_rss_item(ri);
933 }
934
935 void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
936 {
937         int i;
938         const char *pHref = NULL;
939         const char *pType = NULL;
940         const char *pRel = NULL;
941         const char *pTitle = NULL;
942
943         for (i = 0; Attr[i] != NULL; i+=2)
944         {
945                 if (!strcmp(Attr[i], "href"))
946                 {
947                         pHref = Attr[i+1];
948                 }
949                 else if (!strcmp(Attr[i], "rel"))
950                 {
951                         pRel = Attr[i+1];
952                 }
953                 else if (!strcmp(Attr[i], "type"))
954                 {
955                         pType = Attr[i+1];
956                 }
957                 else if (!strcmp(Attr[i], "title"))
958                 {
959                         pTitle = Attr[i+1];
960                 }
961         }
962         if (pHref == NULL)
963                 return; /* WHUT? Pointing... where? */
964         if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml"))
965                 return; /* these just point to other rss resources, we're not interested in them. */
966         if (pRel != NULL)
967         {
968                 if (!strcasecmp (pRel, "replies"))
969                 {
970                         NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1);
971                         StrBufTrim(ri->link);
972                         NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1);
973                 }
974                 else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */
975                 {
976                         NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
977                         StrBufTrim(ri->link);
978                         NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
979
980                 }
981 #if 0 /* these are also defined, but dunno what to do with them.. */
982                 else if (!strcasecmp(pRel, "related"))
983                 {
984                 }
985                 else if (!strcasecmp(pRel, "self"))
986                 {
987                 }
988                 else if (!strcasecmp(pRel, "enclosure"))
989                 {/* this reference can get big, and is probably the full article... */
990                 }
991                 else if (!strcasecmp(pRel, "via"))
992                 {/* this article was provided via... */
993                 }
994 #endif
995         }
996         else if (StrLength(ri->link) == 0)
997         {
998                 NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
999                 StrBufTrim(ri->link);
1000                 NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
1001         }
1002 }
1003
1004
1005
1006
1007 void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1008 {
1009         if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) {
1010                 NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0);
1011                 StrBufTrim(ri->channel_title);
1012         }
1013 }
1014
1015 void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1016 {
1017         if (StrLength(CData) > 0) {
1018                 NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
1019         }
1020 }
1021
1022 void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1023 {
1024         if (StrLength(CData) > 0) {
1025                 NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
1026         }
1027 }
1028
1029
1030 void RSS_item_link_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1031 {
1032         if (StrLength(CData) > 0) {
1033                 NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0);
1034                 StrBufTrim(ri->link);
1035         }
1036 }
1037 void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1038 {
1039         if (StrLength(CData) > 0) {
1040                 NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0);
1041                 StrBufTrim(ri->reLink);
1042         }
1043 }
1044
1045 void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1046 {
1047         if (StrLength(CData) > 0) {
1048                 NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0);
1049                 StrBufTrim(ri->title);
1050         }
1051 }
1052
1053 void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1054 {
1055         long olen = StrLength (ri->description);
1056         long clen = StrLength (CData);
1057         if (clen > 0) 
1058         {
1059                 if (olen == 0) {
1060                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
1061                         StrBufTrim(ri->description);
1062                 }
1063                 else if (olen < clen) {
1064                         FlushStrBuf(ri->description);
1065                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
1066                         StrBufTrim(ri->description);
1067                 }
1068         }
1069 }
1070 void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1071 {
1072         /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */
1073         if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0))
1074         {
1075                 NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
1076                 StrBufTrim(ri->description);
1077         }
1078 }
1079
1080 void RSS_item_description_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1081 {
1082         long olen = StrLength (ri->description);
1083         long clen = StrLength (CData);
1084         if (clen > 0) 
1085         {
1086                 if (olen == 0) {
1087                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
1088                         StrBufTrim(ri->description);
1089                 }
1090                 else if (olen < clen) {
1091                         FlushStrBuf(ri->description);
1092                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
1093                         StrBufTrim(ri->description);
1094                 }
1095         }
1096 }
1097
1098 void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1099 {                 
1100         if (StrLength(CData) > 0) {
1101                 StrBufTrim(CData);
1102                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
1103         }
1104 }
1105
1106 void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1107 {
1108         if (StrLength(CData) > 0) {
1109                 StrBufTrim(CData);
1110                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
1111         }
1112 }
1113
1114 void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1115 {
1116         if (StrLength(CData) > 0) {
1117                 StrBufTrim(CData);
1118                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
1119         }
1120 }
1121
1122
1123 void RSS_item_date_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1124 {
1125         if (StrLength(CData) > 0) {
1126                 StrBufTrim(CData);
1127                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
1128         }
1129 }
1130
1131
1132
1133 void RSS_item_author_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1134 {
1135         if (StrLength(CData) > 0) {
1136                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
1137                 StrBufTrim(ri->author_or_creator);
1138         }
1139 }
1140
1141
1142 void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1143 {
1144         if (StrLength(CData) > 0) {
1145                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
1146                 StrBufTrim(ri->author_or_creator);
1147         }
1148 }
1149
1150 void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1151 {
1152         if (StrLength(CData) > 0) {
1153                 NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0);
1154                 StrBufTrim(ri->author_email);
1155         }
1156 }
1157
1158 void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1159 {
1160         if ((StrLength(CData) > 0) && 
1161             (StrLength(ri->author_or_creator) == 0))
1162         {
1163                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
1164                 StrBufTrim(ri->author_or_creator);
1165         }
1166 }
1167
1168
1169 void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1170 {
1171         if (StrLength(CData) > 0) {
1172                 NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0);
1173                 StrBufTrim(ri->author_url);
1174         }
1175 }
1176
1177 void RSS_item_item_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1178 {
1179         --ri->item_tag_nesting;
1180         rss_save_item(ri);
1181 }
1182
1183
1184 void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1185 {
1186         --ri->item_tag_nesting;
1187         rss_save_item(ri);
1188 }
1189
1190 void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1191 {
1192 //              CtdlLogPrintf(CTDL_DEBUG, "End of feed detected.  Closing parser.\n");
1193         ri->done_parsing = 1;
1194         
1195 }
1196 void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1197 {
1198 //              CtdlLogPrintf(CTDL_DEBUG, "End of feed detected.  Closing parser.\n");
1199         ri->done_parsing = 1;
1200 }
1201
1202
1203 void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
1204 {
1205 }
1206
1207
1208
1209 /*
1210  * This callback stores up the data which appears in between tags.
1211  */
1212 void rss_xml_cdata_start(void *data) 
1213 {
1214         rsscollection *rssc = (rsscollection*) data;
1215
1216         FlushStrBuf(rssc->CData);
1217 }
1218
1219 void rss_xml_cdata_end(void *data) 
1220 {
1221 }
1222 void rss_xml_chardata(void *data, const XML_Char *s, int len) 
1223 {
1224         rsscollection *rssc = (rsscollection*) data;
1225
1226         StrBufAppendBufPlain (rssc->CData, s, len, 0);
1227 }
1228
1229 /*
1230  * Callback function for passing libcurl's output to expat for parsing
1231  */
1232 size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream)
1233 {
1234         XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0);
1235         return (size*nmemb);
1236 }
1237
1238
1239
1240 /*
1241  * Begin a feed parse
1242  */
1243 void rss_do_fetching(rssnetcfg *Cfg) {
1244         rsscollection rssc;
1245         rss_item ri;
1246         XML_Parser xp = NULL;
1247         StrBuf *Answer;
1248
1249         CURL *curl;
1250         CURLcode res;
1251         char errmsg[1024] = "";
1252         char *ptr;
1253         const char *at;
1254         long len;
1255
1256         time_t now;
1257
1258         now = time(NULL);
1259
1260         if ((Cfg->next_poll != 0) && (now < Cfg->next_poll))
1261                 return;
1262         memset(&ri, 0, sizeof(rss_item));
1263         rssc.Item = &ri;
1264         rssc.Cfg = Cfg;
1265
1266         CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", Cfg->url);
1267
1268         curl = curl_easy_init();
1269         if (!curl) {
1270                 CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
1271                 return;
1272         }
1273         Answer = NewStrBufPlain(NULL, SIZ);
1274
1275         curl_easy_setopt(curl, CURLOPT_URL, Cfg->url);
1276         curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
1277         curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
1278         curl_easy_setopt(curl, CURLOPT_WRITEDATA, Answer);
1279 //      curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, rss_libcurl_callback);
1280         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, CurlFillStrBuf_callback);
1281         curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, errmsg);
1282         curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
1283 #ifdef CURLOPT_HTTP_CONTENT_DECODING
1284         curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, 1);
1285         curl_easy_setopt(curl, CURLOPT_ENCODING, "");
1286 #endif
1287         curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL);
1288         curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180);           /* die after 180 seconds */
1289         if (
1290                 (!IsEmptyStr(config.c_ip_addr))
1291                 && (strcmp(config.c_ip_addr, "*"))
1292                 && (strcmp(config.c_ip_addr, "::"))
1293                 && (strcmp(config.c_ip_addr, "0.0.0.0"))
1294         ) {
1295                 curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr);
1296         }
1297
1298         if (CtdlThreadCheckStop())
1299         {
1300                 curl_easy_cleanup(curl);
1301                 return;
1302         }
1303         
1304         if (CtdlThreadCheckStop())
1305                 goto shutdown ;
1306
1307         res = curl_easy_perform(curl);
1308         if (res) {
1309                 CtdlLogPrintf(CTDL_ALERT, "libcurl error %d: %s\n", res, errmsg);
1310         }
1311
1312         if (CtdlThreadCheckStop())
1313                 goto shutdown ;
1314
1315
1316
1317
1318         memset(&ri, 0, sizeof(rss_item));
1319         ri.roomlist = Cfg->rooms;
1320         rssc.CData = NewStrBufPlain(NULL, SIZ);
1321         rssc.Key = NewStrBuf();
1322         at = NULL;
1323         StrBufSipLine(rssc.Key, Answer, &at);
1324         ptr = NULL;
1325
1326 #define encoding "encoding=\""
1327         ptr = strstr(ChrPtr(rssc.Key), encoding);
1328         if (ptr != NULL)
1329         {
1330                 char *pche;
1331
1332                 ptr += sizeof (encoding) - 1;
1333                 pche = strchr(ptr, '"');
1334                 if (pche != NULL)
1335                         StrBufCutAt(rssc.Key, -1, pche);
1336                 else 
1337                         ptr = "UTF-8";
1338         }
1339         else
1340                 ptr = "UTF-8";
1341
1342
1343         xp = XML_ParserCreateNS(ptr, ':');
1344         if (!xp) {
1345                 CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n");
1346                 goto shutdown;
1347         }
1348         FlushStrBuf(rssc.Key);
1349 //#ifdef HAVE_ICONV
1350 #if 0
1351         XML_SetUnknownEncodingHandler(xp,
1352                                       handle_unknown_xml_encoding,
1353                                       &rssc);
1354 #endif
1355 //#endif
1356         XML_SetElementHandler(xp, rss_xml_start, rss_xml_end);
1357         XML_SetCharacterDataHandler(xp, rss_xml_chardata);
1358         XML_SetUserData(xp, &rssc);
1359         XML_SetCdataSectionHandler(xp,
1360                                    rss_xml_cdata_start,
1361                                    rss_xml_cdata_end);
1362
1363
1364         len = StrLength(Answer);
1365         ptr = SmashStrBuf(&Answer);
1366         XML_Parse(xp, ptr, len, 0);
1367         free (ptr);
1368         if (ri.done_parsing == 0)
1369                 XML_Parse(xp, "", 0, 1);
1370
1371
1372         CtdlLogPrintf(CTDL_ALERT, "RSS: XML Status [%s] \n", 
1373                       XML_ErrorString(
1374                               XML_GetErrorCode(xp)));
1375
1376 shutdown:
1377         curl_easy_cleanup(curl);
1378         XML_ParserFree(xp);
1379
1380         flush_rss_item(&ri);
1381         FreeStrBuf(&rssc.CData);
1382         FreeStrBuf(&rssc.Key);
1383
1384         Cfg->next_poll = time(NULL) + config.c_net_freq; 
1385 }
1386
1387
1388 /*
1389  * Scan a room's netconfig to determine whether it is requesting any RSS feeds
1390  */
1391 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
1392 {
1393         char filename[PATH_MAX];
1394         char buf[1024];
1395         char instr[32];
1396         FILE *fp;
1397         char feedurl[256];
1398         rssnetcfg *rncptr = NULL;
1399         rssnetcfg *use_this_rncptr = NULL;
1400         int len = 0;
1401         char *ptr = NULL;
1402
1403         assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir);
1404
1405         if (CtdlThreadCheckStop())
1406                 return;
1407                 
1408         /* Only do net processing for rooms that have netconfigs */
1409         fp = fopen(filename, "r");
1410         if (fp == NULL) {
1411                 return;
1412         }
1413
1414         while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) {
1415                 buf[strlen(buf)-1] = 0;
1416
1417                 extract_token(instr, buf, 0, '|', sizeof instr);
1418                 if (!strcasecmp(instr, "rssclient")) {
1419
1420                         use_this_rncptr = NULL;
1421
1422                         extract_token(feedurl, buf, 1, '|', sizeof feedurl);
1423
1424                         /* If any other rooms have requested the same feed, then we will just add this
1425                          * room to the target list for that client request.
1426                          */
1427                         for (rncptr=rnclist; rncptr!=NULL; rncptr=rncptr->next) {
1428                                 if (!strcmp(rncptr->url, feedurl)) {
1429                                         use_this_rncptr = rncptr;
1430                                 }
1431                         }
1432
1433                         /* Otherwise create a new client request */
1434                         if (use_this_rncptr == NULL) {
1435                                 rncptr = (rssnetcfg *) malloc(sizeof(rssnetcfg));
1436                                 memset(rncptr, 0, sizeof(rssnetcfg));
1437                                 rncptr->ItemType = RSS_UNSET;
1438                                 if (rncptr != NULL) {
1439                                         rncptr->next = rnclist;
1440                                         safestrncpy(rncptr->url, feedurl, sizeof rncptr->url);
1441                                         rncptr->rooms = NULL;
1442                                         rnclist = rncptr;
1443                                         use_this_rncptr = rncptr;
1444                                 }
1445                         }
1446
1447                         /* Add the room name to the request */
1448                         if (use_this_rncptr != NULL) {
1449                                 if (use_this_rncptr->rooms == NULL) {
1450                                         rncptr->rooms = strdup(qrbuf->QRname);
1451                                 }
1452                                 else {
1453                                         len = strlen(use_this_rncptr->rooms) + strlen(qrbuf->QRname) + 5;
1454                                         ptr = realloc(use_this_rncptr->rooms, len);
1455                                         if (ptr != NULL) {
1456                                                 strcat(ptr, "|");
1457                                                 strcat(ptr, qrbuf->QRname);
1458                                                 use_this_rncptr->rooms = ptr;
1459                                         }
1460                                 }
1461                         }
1462                 }
1463
1464         }
1465
1466         fclose(fp);
1467
1468 }
1469
1470 /*
1471  * Scan for rooms that have RSS client requests configured
1472  */
1473 void rssclient_scan(void) {
1474         static time_t last_run = 0L;
1475         static int doing_rssclient = 0;
1476         rssnetcfg *rptr = NULL;
1477
1478         /*
1479          * This is a simple concurrency check to make sure only one rssclient run
1480          * is done at a time.  We could do this with a mutex, but since we
1481          * don't really require extremely fine granularity here, we'll do it
1482          * with a static variable instead.
1483          */
1484         if (doing_rssclient) return;
1485         doing_rssclient = 1;
1486
1487         CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n");
1488         CtdlForEachRoom(rssclient_scan_room, NULL);
1489
1490         while (rnclist != NULL && !CtdlThreadCheckStop()) {
1491                 rss_do_fetching(rnclist);
1492                 rptr = rnclist;
1493                 rnclist = rnclist->next;
1494                 if (rptr->rooms != NULL) free(rptr->rooms);
1495                 free(rptr);
1496         }
1497
1498         CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n");
1499         last_run = time(NULL);
1500         doing_rssclient = 0;
1501         return;
1502 }
1503
1504 void LoadUrlShorteners(void)
1505 {
1506         int i = 0;
1507         int fd;
1508         const char *POS = NULL;
1509         const char *Err = NULL;
1510         StrBuf *Content, *Line;
1511
1512
1513         UrlShorteners = NewHash(0, Flathash);
1514
1515         fd = open(file_citadel_urlshorteners, 0);
1516
1517         if (fd != 0)
1518         {
1519                 Content = NewStrBufPlain(NULL, SIZ);
1520                 Line = NewStrBuf();
1521                 while (POS != StrBufNOTNULL)
1522                 {
1523                         StrBufTCP_read_buffered_line_fast (Line, Content, &POS, &fd, 1, 1, &Err);
1524                         StrBufTrim(Line);
1525                         if ((*ChrPtr(Line) != '#') && (StrLength(Line) > 0))
1526                         {
1527                                 Put(UrlShorteners, IKEY(i), Line, HFreeStrBuf);
1528                                 i++;
1529                                 Line = NewStrBuf();
1530                         }
1531                         else
1532                                 FlushStrBuf(Line);
1533                         if (POS == NULL)
1534                                 POS = StrBufNOTNULL;
1535                 }
1536                 FreeStrBuf(&Line);
1537                 FreeStrBuf(&Content);
1538         }
1539         close(fd);
1540 }
1541
1542 CTDL_MODULE_INIT(rssclient)
1543 {
1544         if (threading)
1545         {
1546                 CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version());
1547                 CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);
1548         }
1549         else 
1550         {
1551                 LoadUrlShorteners ();
1552
1553                 StartHandlers = NewHash(1, NULL);
1554                 EndHandlers = NewHash(1, NULL);
1555
1556                 AddRSSStartHandler(RSS_item_rss_start,     RSS_UNSET, HKEY("rss"));
1557                 AddRSSStartHandler(RSS_item_rdf_start,     RSS_UNSET, HKEY("rdf"));
1558                 AddRSSStartHandler(ATOM_item_feed_start,    RSS_UNSET, HKEY("feed"));
1559                 AddRSSStartHandler(RSS_item_item_start,    RSS_RSS, HKEY("item"));
1560                 AddRSSStartHandler(ATOM_item_entry_start,  RSS_ATOM, HKEY("entry"));
1561                 AddRSSStartHandler(ATOM_item_link_start,   RSS_ATOM, HKEY("link"));
1562
1563                 AddRSSEndHandler(ATOMRSS_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
1564                 AddRSSEndHandler(RSS_item_guid_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid"));
1565                 AddRSSEndHandler(ATOM_item_id_end,         RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id"));
1566                 AddRSSEndHandler(RSS_item_link_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("link"));
1567 #if 0 
1568 // hm, rss to the comments of that blog, might be interesting in future, but... 
1569                 AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss"));
1570 // comment count...
1571                 AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments"));
1572 #endif
1573                 AddRSSEndHandler(RSSATOM_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
1574                 AddRSSEndHandler(ATOM_item_content_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content"));
1575                 AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded"));
1576                 AddRSSEndHandler(ATOM_item_summary_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary"));
1577                 AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description"));
1578                 AddRSSEndHandler(ATOM_item_published_end,  RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published"));
1579                 AddRSSEndHandler(ATOM_item_updated_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated"));
1580                 AddRSSEndHandler(RSS_item_pubdate_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate"));
1581                 AddRSSEndHandler(RSS_item_date_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("date"));
1582                 AddRSSEndHandler(RSS_item_author_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("author"));
1583                 AddRSSEndHandler(RSS_item_creator_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator"));
1584 /* <author> */
1585                 AddRSSEndHandler(ATOM_item_email_end,      RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email"));
1586                 AddRSSEndHandler(ATOM_item_name_end,       RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name"));
1587                 AddRSSEndHandler(ATOM_item_uri_end,        RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri"));
1588 /* </author> */
1589                 AddRSSEndHandler(RSS_item_item_end,        RSS_RSS, HKEY("item"));
1590                 AddRSSEndHandler(RSS_item_rss_end,         RSS_RSS, HKEY("rss"));
1591                 AddRSSEndHandler(RSS_item_rdf_end,         RSS_RSS, HKEY("rdf"));
1592                 AddRSSEndHandler(ATOM_item_entry_end,      RSS_ATOM, HKEY("entry"));
1593
1594
1595 /* at the start of atoms: <seq> <li>link to resource</li></seq> ignore them. */
1596                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
1597                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
1598                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
1599                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
1600
1601 /* links to other feed generators... */
1602                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
1603                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
1604                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
1605                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
1606
1607                 KnownNameSpaces = NewHash(1, NULL);
1608                 Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler);
1609                 Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler);
1610                 Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler);
1611                 Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler);
1612                 Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler);
1613                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
1614                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler);
1615                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler);
1616                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler);
1617                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
1618                 Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler);
1619                 Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler);
1620                 Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler);
1621                 Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler);
1622                 Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler);
1623                 Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler);
1624                 Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler);
1625                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler);
1626                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
1627                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
1628                 Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler);
1629                 Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler);
1630                 Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler);
1631 #if 0
1632                 /* we don't like these namespaces because of they shadow our usefull parameters. */
1633                 Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler);
1634 #endif
1635         }
1636         return "rssclient";
1637 }