Removed the logging facility from citserver, use syslog instead
[citadel.git] / citadel / modules / rssclient / rss_atom_parser.c
1 /*
2  * Bring external RSS feeds into rooms.
3  *
4  * Copyright (c) 2007-2010 by the citadel.org team
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <stdio.h>
24
25 #if TIME_WITH_SYS_TIME
26 # include <sys/time.h>
27 # include <time.h>
28 #else
29 # if HAVE_SYS_TIME_H
30 #  include <sys/time.h>
31 # else
32 #  include <time.h>
33 # endif
34 #endif
35
36 #include <ctype.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <expat.h>
42 #include <curl/curl.h>
43 #include <libcitadel.h>
44 #include "citadel.h"
45 #include "server.h"
46 #include "citserver.h"
47 #include "support.h"
48 #include "config.h"
49 #include "threads.h"
50 #include "ctdl_module.h"
51 #include "clientsocket.h"
52 #include "msgbase.h"
53 #include "parsedate.h"
54 #include "database.h"
55 #include "citadel_dirs.h"
56 #include "md5.h"
57 #include "context.h"
58 #include "event_client.h"
59 #include "rss_atom_parser.h"
60
61 HashList *StartHandlers = NULL;
62 HashList *EndHandlers = NULL;
63 HashList *KnownNameSpaces = NULL;
64 void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len)
65 {
66         rss_xml_handler *h;
67         h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
68         h->Flags = Flags;
69         h->Handler = Handler;
70         Put(StartHandlers, key, len, h, NULL);
71 }
72 void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len)
73 {
74         rss_xml_handler *h;
75         h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
76         h->Flags = Flags;
77         h->Handler = Handler;
78         Put(EndHandlers, key, len, h, NULL);
79 }
80
81
82
83 /*
84  * Convert an RDF/RSS datestamp into a time_t
85  */
86 time_t rdf_parsedate(const char *p)
87 {
88         struct tm tm;
89         time_t t = 0;
90
91         if (!p) return 0L;
92         if (strlen(p) < 10) return 0L;
93
94         memset(&tm, 0, sizeof tm);
95
96         /*
97          * If the timestamp appears to be in W3C datetime format, try to
98          * parse it.  See also: http://www.w3.org/TR/NOTE-datetime
99          *
100          * This code, along with parsedate.c, is a potential candidate for
101          * moving into libcitadel.
102          */
103         if ( (p[4] == '-') && (p[7] == '-') ) {
104                 tm.tm_year = atoi(&p[0]) - 1900;
105                 tm.tm_mon = atoi(&p[5]) - 1;
106                 tm.tm_mday = atoi(&p[8]);
107                 if ( (p[10] == 'T') && (p[13] == ':') ) {
108                         tm.tm_hour = atoi(&p[11]);
109                         tm.tm_min = atoi(&p[14]);
110                 }
111                 return mktime(&tm);
112         }
113
114         /* hmm... try RFC822 date stamp format */
115
116         t = parsedate(p);
117         if (t > 0) return(t);
118
119         /* yeesh.  ok, just return the current date and time. */
120         return(time(NULL));
121 }
122
123 void flush_rss_item(rss_item *ri)
124 {
125         /* Initialize the feed item data structure */
126         FreeStrBuf(&ri->guid);
127         FreeStrBuf(&ri->title);
128         FreeStrBuf(&ri->link);
129         FreeStrBuf(&ri->author_or_creator);
130         FreeStrBuf(&ri->author_email);
131         FreeStrBuf(&ri->author_url);
132         FreeStrBuf(&ri->description);
133 }
134
135 void rss_xml_start(void *data, const char *supplied_el, const char **attr)
136 {
137         rss_xml_handler *h;
138         rss_aggregator  *rssc = (rss_aggregator*) data;
139         rss_item        *ri = rssc->Item;
140         void            *pv;
141         const char      *pel;
142         char            *sep = NULL;
143
144         /* Axe the namespace, we don't care about it */
145 ///     CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el);
146         pel = supplied_el;
147         while (sep = strchr(pel, ':'), sep) {
148                 pel = sep + 1;
149         }
150
151         if (pel != supplied_el)
152         {
153                 void *v;
154                 
155                 if (!GetHash(KnownNameSpaces, 
156                              supplied_el, 
157                              pel - supplied_el - 1,
158                              &v))
159                 {
160 #ifdef DEBUG_RSS
161                         CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", 
162                                       supplied_el);
163 #endif
164                         return;
165                 }
166         }
167
168         StrBufPlain(rssc->Key, pel, -1);
169         StrBufLowerCase(rssc->Key);
170         if (GetHash(StartHandlers, SKEY(rssc->Key), &pv))
171         {
172                 rssc->Current = h = (rss_xml_handler*) pv;
173
174                 if (((h->Flags & RSS_UNSET) != 0) && 
175                     (rssc->ItemType == RSS_UNSET))
176                 {
177                         h->Handler(rssc->CData, ri, rssc, attr);
178                 }
179                 else if (((h->Flags & RSS_RSS) != 0) &&
180                     (rssc->ItemType == RSS_RSS))
181                 {
182                         h->Handler(rssc->CData, ri, rssc, attr);
183                 }
184                 else if (((h->Flags & RSS_ATOM) != 0) &&
185                          (rssc->ItemType == RSS_ATOM))
186                 {
187                         h->Handler(rssc->CData, ri, rssc, attr);                        
188                 }
189 #ifdef DEBUG_RSS
190                 else 
191                         CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
192 #endif
193         }
194 #ifdef DEBUG_RSS
195         else 
196                 CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel,  supplied_el);
197 #endif
198 }
199
200 void rss_xml_end(void *data, const char *supplied_el)
201 {
202         rss_xml_handler *h;
203         rss_aggregator  *rssc = (rss_aggregator*) data;
204         rss_item        *ri = rssc->Item;
205         const char      *pel;
206         char            *sep = NULL;
207         void            *pv;
208
209         /* Axe the namespace, we don't care about it */
210         pel = supplied_el;
211         while (sep = strchr(pel, ':'), sep) {
212                 pel = sep + 1;
213         }
214 //      CtdlLogPrintf(0, "RSS: END %s...\n", el);
215         if (pel != supplied_el)
216         {
217                 void *v;
218                 
219                 if (!GetHash(KnownNameSpaces, 
220                              supplied_el, 
221                              pel - supplied_el - 1,
222                              &v))
223                 {
224 #ifdef DEBUG_RSS
225                         CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", 
226                                       supplied_el, ChrPtr(rssc->CData));
227 #endif
228                         FlushStrBuf(rssc->CData);
229                         return;
230                 }
231         }
232
233         StrBufPlain(rssc->Key, pel, -1);
234         StrBufLowerCase(rssc->Key);
235         if (GetHash(EndHandlers, SKEY(rssc->Key), &pv))
236         {
237                 h = (rss_xml_handler*) pv;
238
239                 if (((h->Flags & RSS_UNSET) != 0) && 
240                     (rssc->ItemType == RSS_UNSET))
241                 {
242                         h->Handler(rssc->CData, ri, rssc, NULL);
243                 }
244                 else if (((h->Flags & RSS_RSS) != 0) &&
245                     (rssc->ItemType == RSS_RSS))
246                 {
247                         h->Handler(rssc->CData, ri, rssc, NULL);
248                 }
249                 else if (((h->Flags & RSS_ATOM) != 0) &&
250                          (rssc->ItemType == RSS_ATOM))
251                 {
252                         h->Handler(rssc->CData, ri, rssc, NULL);
253                 }
254 #ifdef DEBUG_RSS
255                 else 
256                         CtdlLogPrintf(0, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
257 #endif
258         }
259 #ifdef DEBUG_RSS
260         else 
261                 CtdlLogPrintf(0, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
262 #endif
263         FlushStrBuf(rssc->CData);
264         rssc->Current = NULL;
265 }
266
267
268
269
270
271 void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
272 {
273         syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n");
274         Cfg->ItemType = RSS_RSS;
275 }
276
277 void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
278 {
279         syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n");
280         Cfg->ItemType = RSS_RSS;
281 }
282
283 void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
284 {
285         syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n");
286         Cfg->ItemType = RSS_ATOM;
287 }
288
289
290 void RSS_item_item_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
291 {
292         ri->item_tag_nesting ++;
293         flush_rss_item(ri);
294 }
295
296 void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
297 {
298 /* Atom feed... */
299         ri->item_tag_nesting ++;
300         flush_rss_item(ri);
301 }
302
303 void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
304 {
305         int i;
306         const char *pHref = NULL;
307         const char *pType = NULL;
308         const char *pRel = NULL;
309         const char *pTitle = NULL;
310
311         for (i = 0; Attr[i] != NULL; i+=2)
312         {
313                 if (!strcmp(Attr[i], "href"))
314                 {
315                         pHref = Attr[i+1];
316                 }
317                 else if (!strcmp(Attr[i], "rel"))
318                 {
319                         pRel = Attr[i+1];
320                 }
321                 else if (!strcmp(Attr[i], "type"))
322                 {
323                         pType = Attr[i+1];
324                 }
325                 else if (!strcmp(Attr[i], "title"))
326                 {
327                         pTitle = Attr[i+1];
328                 }
329         }
330         if (pHref == NULL)
331                 return; /* WHUT? Pointing... where? */
332         if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml"))
333                 return; /* these just point to other rss resources, we're not interested in them. */
334         if (pRel != NULL)
335         {
336                 if (!strcasecmp (pRel, "replies"))
337                 {
338                         NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1);
339                         StrBufTrim(ri->link);
340                         NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1);
341                 }
342                 else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */
343                 {
344                         NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
345                         StrBufTrim(ri->link);
346                         NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
347
348                 }
349 #if 0 /* these are also defined, but dunno what to do with them.. */
350                 else if (!strcasecmp(pRel, "related"))
351                 {
352                 }
353                 else if (!strcasecmp(pRel, "self"))
354                 {
355                 }
356                 else if (!strcasecmp(pRel, "enclosure"))
357                 {/* this reference can get big, and is probably the full article... */
358                 }
359                 else if (!strcasecmp(pRel, "via"))
360                 {/* this article was provided via... */
361                 }
362 #endif
363         }
364         else if (StrLength(ri->link) == 0)
365         {
366                 NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
367                 StrBufTrim(ri->link);
368                 NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
369         }
370 }
371
372
373
374
375 void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
376 {
377         if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) {
378                 NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0);
379                 StrBufTrim(ri->channel_title);
380         }
381 }
382
383 void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
384 {
385         if (StrLength(CData) > 0) {
386                 NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
387         }
388 }
389
390 void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
391 {
392         if (StrLength(CData) > 0) {
393                 NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
394         }
395 }
396
397
398 void RSS_item_link_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
399 {
400         if (StrLength(CData) > 0) {
401                 NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0);
402                 StrBufTrim(ri->link);
403         }
404 }
405 void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
406 {
407         if (StrLength(CData) > 0) {
408                 NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0);
409                 StrBufTrim(ri->reLink);
410         }
411 }
412
413 void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
414 {
415         if (StrLength(CData) > 0) {
416                 NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0);
417                 StrBufTrim(ri->title);
418         }
419 }
420
421 void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
422 {
423         long olen = StrLength (ri->description);
424         long clen = StrLength (CData);
425         if (clen > 0) 
426         {
427                 if (olen == 0) {
428                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
429                         StrBufTrim(ri->description);
430                 }
431                 else if (olen < clen) {
432                         FlushStrBuf(ri->description);
433                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
434                         StrBufTrim(ri->description);
435                 }
436         }
437 }
438 void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
439 {
440         /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */
441         if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0))
442         {
443                 NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
444                 StrBufTrim(ri->description);
445         }
446 }
447
448 void RSS_item_description_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
449 {
450         long olen = StrLength (ri->description);
451         long clen = StrLength (CData);
452         if (clen > 0) 
453         {
454                 if (olen == 0) {
455                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
456                         StrBufTrim(ri->description);
457                 }
458                 else if (olen < clen) {
459                         FlushStrBuf(ri->description);
460                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
461                         StrBufTrim(ri->description);
462                 }
463         }
464 }
465
466 void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
467 {                 
468         if (StrLength(CData) > 0) {
469                 StrBufTrim(CData);
470                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
471         }
472 }
473
474 void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
475 {
476         if (StrLength(CData) > 0) {
477                 StrBufTrim(CData);
478                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
479         }
480 }
481
482 void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
483 {
484         if (StrLength(CData) > 0) {
485                 StrBufTrim(CData);
486                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
487         }
488 }
489
490
491 void RSS_item_date_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
492 {
493         if (StrLength(CData) > 0) {
494                 StrBufTrim(CData);
495                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
496         }
497 }
498
499
500
501 void RSS_item_author_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
502 {
503         if (StrLength(CData) > 0) {
504                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
505                 StrBufTrim(ri->author_or_creator);
506         }
507 }
508
509
510 void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
511 {
512         if (StrLength(CData) > 0) {
513                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
514                 StrBufTrim(ri->author_or_creator);
515         }
516 }
517
518 void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
519 {
520         if (StrLength(CData) > 0) {
521                 NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0);
522                 StrBufTrim(ri->author_email);
523         }
524 }
525
526 void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
527 {
528         if ((StrLength(CData) > 0) && 
529             (StrLength(ri->author_or_creator) == 0))
530         {
531                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
532                 StrBufTrim(ri->author_or_creator);
533         }
534 }
535
536
537 void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
538 {
539         if (StrLength(CData) > 0) {
540                 NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0);
541                 StrBufTrim(ri->author_url);
542         }
543 }
544
545 void RSS_item_item_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
546 {
547         --ri->item_tag_nesting;
548         rss_save_item(ri, Cfg);
549 }
550
551
552 void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
553 {
554         --ri->item_tag_nesting;
555         rss_save_item(ri, Cfg);
556 }
557
558 void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
559 {
560 //              syslog(LOG_DEBUG, "End of feed detected.  Closing parser.\n");
561         ri->done_parsing = 1;
562         
563 }
564 void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
565 {
566 //              syslog(LOG_DEBUG, "End of feed detected.  Closing parser.\n");
567         ri->done_parsing = 1;
568 }
569
570
571 void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
572 {
573 }
574
575
576
577 /*
578  * This callback stores up the data which appears in between tags.
579  */
580 void rss_xml_cdata_start(void *data) 
581 {
582         rss_aggregator *rssc = (rss_aggregator*) data;
583
584         FlushStrBuf(rssc->CData);
585 }
586
587 void rss_xml_cdata_end(void *data) 
588 {
589 }
590 void rss_xml_chardata(void *data, const XML_Char *s, int len) 
591 {
592         rss_aggregator *rssc = (rss_aggregator*) data;
593
594         StrBufAppendBufPlain (rssc->CData, s, len, 0);
595 }
596
597 /*
598  * Callback function for passing libcurl's output to expat for parsing
599  */
600 size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream)
601 {
602         XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0);
603         return (size*nmemb);
604 }
605
606
607
608 eNextState ParseRSSReply(AsyncIO *IO)
609 {
610         rss_aggregator *rssc;
611         rss_item *ri;
612         const char *at;
613         char *ptr;
614         long len;
615
616         rssc = IO->Data;
617         ri = rssc->Item;
618         rssc->CData = NewStrBufPlain(NULL, SIZ);
619         rssc->Key = NewStrBuf();
620         at = NULL;
621         StrBufSipLine(rssc->Key, IO->HttpReq.ReplyData, &at);
622         ptr = NULL;
623
624 #define encoding "encoding=\""
625         ptr = strstr(ChrPtr(rssc->Key), encoding);
626         if (ptr != NULL)
627         {
628                 char *pche;
629
630                 ptr += sizeof (encoding) - 1;
631                 pche = strchr(ptr, '"');
632                 if (pche != NULL)
633                         StrBufCutAt(rssc->Key, -1, pche);
634                 else 
635                         ptr = "UTF-8";
636         }
637         else
638                 ptr = "UTF-8";
639
640         syslog(LOG_DEBUG, "RSS: Now parsing [%s] \n", ChrPtr(rssc->Url));
641
642         rssc->xp = XML_ParserCreateNS(ptr, ':');
643         if (!rssc->xp) {
644                 syslog(LOG_DEBUG, "Cannot create XML parser!\n");
645                 goto shutdown;
646         }
647         FlushStrBuf(rssc->Key);
648
649         XML_SetElementHandler(rssc->xp, rss_xml_start, rss_xml_end);
650         XML_SetCharacterDataHandler(rssc->xp, rss_xml_chardata);
651         XML_SetUserData(rssc->xp, rssc);
652         XML_SetCdataSectionHandler(rssc->xp,
653                                    rss_xml_cdata_start,
654                                    rss_xml_cdata_end);
655
656
657         len = StrLength(IO->HttpReq.ReplyData);
658         ptr = SmashStrBuf(&IO->HttpReq.ReplyData);
659         XML_Parse(rssc->xp, ptr, len, 0);
660         free (ptr);
661         if (ri->done_parsing == 0)
662                 XML_Parse(rssc->xp, "", 0, 1);
663
664
665         syslog(LOG_DEBUG, "RSS: XML Status [%s] \n", 
666                       XML_ErrorString(
667                               XML_GetErrorCode(rssc->xp)));
668
669 shutdown:
670         XML_ParserFree(rssc->xp);
671
672         flush_rss_item(ri);
673         FreeStrBuf(&rssc->CData);
674         FreeStrBuf(&rssc->Key);
675
676         ///Cfg->next_poll = time(NULL) + config.c_net_freq; 
677
678         return eTerminateConnection;
679 }
680
681
682 void rss_parser_cleanup(void)
683 {
684         DeleteHash(&StartHandlers);
685         DeleteHash(&EndHandlers);
686         DeleteHash(&KnownNameSpaces);
687 }
688
689
690 CTDL_MODULE_INIT(rssparser)
691 {
692         if (!threading)
693         {
694                 StartHandlers = NewHash(1, NULL);
695                 EndHandlers = NewHash(1, NULL);
696
697                 AddRSSStartHandler(RSS_item_rss_start,     RSS_UNSET, HKEY("rss"));
698                 AddRSSStartHandler(RSS_item_rdf_start,     RSS_UNSET, HKEY("rdf"));
699                 AddRSSStartHandler(ATOM_item_feed_start,    RSS_UNSET, HKEY("feed"));
700                 AddRSSStartHandler(RSS_item_item_start,    RSS_RSS, HKEY("item"));
701                 AddRSSStartHandler(ATOM_item_entry_start,  RSS_ATOM, HKEY("entry"));
702                 AddRSSStartHandler(ATOM_item_link_start,   RSS_ATOM, HKEY("link"));
703
704                 AddRSSEndHandler(ATOMRSS_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
705                 AddRSSEndHandler(RSS_item_guid_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid"));
706                 AddRSSEndHandler(ATOM_item_id_end,         RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id"));
707                 AddRSSEndHandler(RSS_item_link_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("link"));
708 #if 0 
709 // hm, rss to the comments of that blog, might be interesting in future, but... 
710                 AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss"));
711 // comment count...
712                 AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments"));
713 #endif
714                 AddRSSEndHandler(RSSATOM_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
715                 AddRSSEndHandler(ATOM_item_content_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content"));
716                 AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded"));
717                 AddRSSEndHandler(ATOM_item_summary_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary"));
718                 AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description"));
719                 AddRSSEndHandler(ATOM_item_published_end,  RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published"));
720                 AddRSSEndHandler(ATOM_item_updated_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated"));
721                 AddRSSEndHandler(RSS_item_pubdate_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate"));
722                 AddRSSEndHandler(RSS_item_date_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("date"));
723                 AddRSSEndHandler(RSS_item_author_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("author"));
724                 AddRSSEndHandler(RSS_item_creator_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator"));
725 /* <author> */
726                 AddRSSEndHandler(ATOM_item_email_end,      RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email"));
727                 AddRSSEndHandler(ATOM_item_name_end,       RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name"));
728                 AddRSSEndHandler(ATOM_item_uri_end,        RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri"));
729 /* </author> */
730                 AddRSSEndHandler(RSS_item_item_end,        RSS_RSS, HKEY("item"));
731                 AddRSSEndHandler(RSS_item_rss_end,         RSS_RSS, HKEY("rss"));
732                 AddRSSEndHandler(RSS_item_rdf_end,         RSS_RSS, HKEY("rdf"));
733                 AddRSSEndHandler(ATOM_item_entry_end,      RSS_ATOM, HKEY("entry"));
734
735
736 /* at the start of atoms: <seq> <li>link to resource</li></seq> ignore them. */
737                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
738                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
739                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
740                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
741
742 /* links to other feed generators... */
743                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
744                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
745                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
746                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
747
748                 KnownNameSpaces = NewHash(1, NULL);
749                 Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler);
750                 Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler);
751                 Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler);
752                 Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler);
753                 Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler);
754                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
755                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler);
756                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler);
757                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler);
758                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
759                 Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler);
760                 Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler);
761                 Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler);
762                 Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler);
763                 Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler);
764                 Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler);
765                 Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler);
766                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler);
767                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
768                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
769                 Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler);
770                 Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler);
771                 Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler);
772 #if 0
773                 /* we don't like these namespaces because of they shadow our usefull parameters. */
774                 Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler);
775 #endif
776                 CtdlRegisterCleanupHook(rss_parser_cleanup);
777         }
778         return "rssparser";
779 }