first working RSS collection with async DB-Saves
[citadel.git] / citadel / modules / rssclient / rss_atom_parser.c
1 /*
2  * Bring external RSS feeds into rooms.
3  *
4  * Copyright (c) 2007-2010 by the citadel.org team
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <stdio.h>
24
25 #if TIME_WITH_SYS_TIME
26 # include <sys/time.h>
27 # include <time.h>
28 #else
29 # if HAVE_SYS_TIME_H
30 #  include <sys/time.h>
31 # else
32 #  include <time.h>
33 # endif
34 #endif
35
36 #include <ctype.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <expat.h>
42 #include <curl/curl.h>
43 #include <libcitadel.h>
44 #include "citadel.h"
45 #include "server.h"
46 #include "citserver.h"
47 #include "support.h"
48 #include "config.h"
49 #include "threads.h"
50 #include "ctdl_module.h"
51 #include "clientsocket.h"
52 #include "msgbase.h"
53 #include "parsedate.h"
54 #include "database.h"
55 #include "citadel_dirs.h"
56 #include "md5.h"
57 #include "context.h"
58 #include "event_client.h"
59 #include "rss_atom_parser.h"
60
61 HashList *StartHandlers = NULL;
62 HashList *EndHandlers = NULL;
63 HashList *KnownNameSpaces = NULL;
64 void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len)
65 {
66         rss_xml_handler *h;
67         h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
68         h->Flags = Flags;
69         h->Handler = Handler;
70         Put(StartHandlers, key, len, h, NULL);
71 }
72 void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len)
73 {
74         rss_xml_handler *h;
75         h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
76         h->Flags = Flags;
77         h->Handler = Handler;
78         Put(EndHandlers, key, len, h, NULL);
79 }
80
81
82
83 /*
84  * Convert an RDF/RSS datestamp into a time_t
85  */
86 time_t rdf_parsedate(const char *p)
87 {
88         struct tm tm;
89         time_t t = 0;
90
91         if (!p) return 0L;
92         if (strlen(p) < 10) return 0L;
93
94         memset(&tm, 0, sizeof tm);
95
96         /*
97          * If the timestamp appears to be in W3C datetime format, try to
98          * parse it.  See also: http://www.w3.org/TR/NOTE-datetime
99          *
100          * This code, along with parsedate.c, is a potential candidate for
101          * moving into libcitadel.
102          */
103         if ( (p[4] == '-') && (p[7] == '-') ) {
104                 tm.tm_year = atoi(&p[0]) - 1900;
105                 tm.tm_mon = atoi(&p[5]) - 1;
106                 tm.tm_mday = atoi(&p[8]);
107                 if ( (p[10] == 'T') && (p[13] == ':') ) {
108                         tm.tm_hour = atoi(&p[11]);
109                         tm.tm_min = atoi(&p[14]);
110                 }
111                 return mktime(&tm);
112         }
113
114         /* hmm... try RFC822 date stamp format */
115
116         t = parsedate(p);
117         if (t > 0) return(t);
118
119         /* yeesh.  ok, just return the current date and time. */
120         return(time(NULL));
121 }
122
123 void flush_rss_item(rss_item *ri)
124 {
125         /* Initialize the feed item data structure */
126         FreeStrBuf(&ri->guid);
127         FreeStrBuf(&ri->title);
128         FreeStrBuf(&ri->link);
129         FreeStrBuf(&ri->author_or_creator);
130         FreeStrBuf(&ri->author_email);
131         FreeStrBuf(&ri->author_url);
132         FreeStrBuf(&ri->description);
133 }
134
135 void rss_xml_start(void *data, const char *supplied_el, const char **attr)
136 {
137         rss_xml_handler *h;
138         rsscollection   *rssc = (rsscollection*) data;
139         rssnetcfg       *Cfg = rssc->Cfg;
140         rss_item        *ri = rssc->Item;
141         void            *pv;
142         const char      *pel;
143         char            *sep = NULL;
144
145         /* Axe the namespace, we don't care about it */
146 ///     CtdlLogPrintf(0, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el);
147         pel = supplied_el;
148         while (sep = strchr(pel, ':'), sep) {
149                 pel = sep + 1;
150         }
151
152         if (pel != supplied_el)
153         {
154                 void *v;
155                 
156                 if (!GetHash(KnownNameSpaces, 
157                              supplied_el, 
158                              pel - supplied_el - 1,
159                              &v))
160                 {
161 #ifdef DEBUG_RSS
162                         CtdlLogPrintf(0, "RSS: START ignoring because of wrong namespace [%s] = [%s]\n", 
163                                       supplied_el);
164 #endif
165                         return;
166                 }
167         }
168
169         StrBufPlain(rssc->Key, pel, -1);
170         StrBufLowerCase(rssc->Key);
171         if (GetHash(StartHandlers, SKEY(rssc->Key), &pv))
172         {
173                 rssc->Current = h = (rss_xml_handler*) pv;
174
175                 if (((h->Flags & RSS_UNSET) != 0) && 
176                     (Cfg->ItemType == RSS_UNSET))
177                 {
178                         h->Handler(rssc->CData, ri, Cfg, attr);
179                 }
180                 else if (((h->Flags & RSS_RSS) != 0) &&
181                     (Cfg->ItemType == RSS_RSS))
182                 {
183                         h->Handler(rssc->CData, ri, Cfg, attr);
184                 }
185                 else if (((h->Flags & RSS_ATOM) != 0) &&
186                          (Cfg->ItemType == RSS_ATOM))
187                 {
188                         h->Handler(rssc->CData, ri, Cfg, attr);                 
189                 }
190 #ifdef DEBUG_RSS
191                 else 
192                         CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
193 #endif
194         }
195 #ifdef DEBUG_RSS
196         else 
197                 CtdlLogPrintf(0, "RSS: START unhandled: [%s] [%s]...\n", pel,  supplied_el);
198 #endif
199 }
200
201 void rss_xml_end(void *data, const char *supplied_el)
202 {
203         rss_xml_handler *h;
204         rsscollection   *rssc = (rsscollection*) data;
205         rssnetcfg       *Cfg = rssc->Cfg;
206         rss_item        *ri = rssc->Item;
207         const char      *pel;
208         char            *sep = NULL;
209         void            *pv;
210
211         /* Axe the namespace, we don't care about it */
212         pel = supplied_el;
213         while (sep = strchr(pel, ':'), sep) {
214                 pel = sep + 1;
215         }
216 //      CtdlLogPrintf(0, "RSS: END %s...\n", el);
217         if (pel != supplied_el)
218         {
219                 void *v;
220                 
221                 if (!GetHash(KnownNameSpaces, 
222                              supplied_el, 
223                              pel - supplied_el - 1,
224                              &v))
225                 {
226 #ifdef DEBUG_RSS
227                         CtdlLogPrintf(0, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", 
228                                       supplied_el, ChrPtr(rssc->CData));
229 #endif
230                         FlushStrBuf(rssc->CData);
231                         return;
232                 }
233         }
234
235         StrBufPlain(rssc->Key, pel, -1);
236         StrBufLowerCase(rssc->Key);
237         if (GetHash(EndHandlers, SKEY(rssc->Key), &pv))
238         {
239                 h = (rss_xml_handler*) pv;
240
241                 if (((h->Flags & RSS_UNSET) != 0) && 
242                     (Cfg->ItemType == RSS_UNSET))
243                 {
244                         h->Handler(rssc->CData, ri, Cfg, NULL);
245                 }
246                 else if (((h->Flags & RSS_RSS) != 0) &&
247                     (Cfg->ItemType == RSS_RSS))
248                 {
249                         h->Handler(rssc->CData, ri, Cfg, NULL);
250                 }
251                 else if (((h->Flags & RSS_ATOM) != 0) &&
252                          (Cfg->ItemType == RSS_ATOM))
253                 {
254                         h->Handler(rssc->CData, ri, Cfg, NULL);
255                 }
256 #ifdef DEBUG_RSS
257                 else 
258                         CtdlLogPrintf(0, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
259 #endif
260         }
261 #ifdef DEBUG_RSS
262         else 
263                 CtdlLogPrintf(0, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
264 #endif
265         FlushStrBuf(rssc->CData);
266         rssc->Current = NULL;
267 }
268
269
270
271
272
273 void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
274 {
275         CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RSS feed.\n");
276         Cfg->ItemType = RSS_RSS;
277 }
278
279 void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
280 {
281         CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an RDF feed.\n");
282         Cfg->ItemType = RSS_RSS;
283 }
284
285 void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
286 {
287         CtdlLogPrintf(CTDL_DEBUG, "RSS: This is an ATOM feed.\n");
288         Cfg->ItemType = RSS_ATOM;
289 }
290
291
292 void RSS_item_item_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
293 {
294         ri->item_tag_nesting ++;
295         flush_rss_item(ri);
296 }
297
298 void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
299 {
300 /* Atom feed... */
301         ri->item_tag_nesting ++;
302         flush_rss_item(ri);
303 }
304
305 void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
306 {
307         int i;
308         const char *pHref = NULL;
309         const char *pType = NULL;
310         const char *pRel = NULL;
311         const char *pTitle = NULL;
312
313         for (i = 0; Attr[i] != NULL; i+=2)
314         {
315                 if (!strcmp(Attr[i], "href"))
316                 {
317                         pHref = Attr[i+1];
318                 }
319                 else if (!strcmp(Attr[i], "rel"))
320                 {
321                         pRel = Attr[i+1];
322                 }
323                 else if (!strcmp(Attr[i], "type"))
324                 {
325                         pType = Attr[i+1];
326                 }
327                 else if (!strcmp(Attr[i], "title"))
328                 {
329                         pTitle = Attr[i+1];
330                 }
331         }
332         if (pHref == NULL)
333                 return; /* WHUT? Pointing... where? */
334         if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml"))
335                 return; /* these just point to other rss resources, we're not interested in them. */
336         if (pRel != NULL)
337         {
338                 if (!strcasecmp (pRel, "replies"))
339                 {
340                         NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1);
341                         StrBufTrim(ri->link);
342                         NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1);
343                 }
344                 else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */
345                 {
346                         NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
347                         StrBufTrim(ri->link);
348                         NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
349
350                 }
351 #if 0 /* these are also defined, but dunno what to do with them.. */
352                 else if (!strcasecmp(pRel, "related"))
353                 {
354                 }
355                 else if (!strcasecmp(pRel, "self"))
356                 {
357                 }
358                 else if (!strcasecmp(pRel, "enclosure"))
359                 {/* this reference can get big, and is probably the full article... */
360                 }
361                 else if (!strcasecmp(pRel, "via"))
362                 {/* this article was provided via... */
363                 }
364 #endif
365         }
366         else if (StrLength(ri->link) == 0)
367         {
368                 NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
369                 StrBufTrim(ri->link);
370                 NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
371         }
372 }
373
374
375
376
377 void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
378 {
379         if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) {
380                 NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0);
381                 StrBufTrim(ri->channel_title);
382         }
383 }
384
385 void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
386 {
387         if (StrLength(CData) > 0) {
388                 NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
389         }
390 }
391
392 void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
393 {
394         if (StrLength(CData) > 0) {
395                 NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
396         }
397 }
398
399
400 void RSS_item_link_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
401 {
402         if (StrLength(CData) > 0) {
403                 NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0);
404                 StrBufTrim(ri->link);
405         }
406 }
407 void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
408 {
409         if (StrLength(CData) > 0) {
410                 NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0);
411                 StrBufTrim(ri->reLink);
412         }
413 }
414
415 void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
416 {
417         if (StrLength(CData) > 0) {
418                 NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0);
419                 StrBufTrim(ri->title);
420         }
421 }
422
423 void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
424 {
425         long olen = StrLength (ri->description);
426         long clen = StrLength (CData);
427         if (clen > 0) 
428         {
429                 if (olen == 0) {
430                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
431                         StrBufTrim(ri->description);
432                 }
433                 else if (olen < clen) {
434                         FlushStrBuf(ri->description);
435                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
436                         StrBufTrim(ri->description);
437                 }
438         }
439 }
440 void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
441 {
442         /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */
443         if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0))
444         {
445                 NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
446                 StrBufTrim(ri->description);
447         }
448 }
449
450 void RSS_item_description_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
451 {
452         long olen = StrLength (ri->description);
453         long clen = StrLength (CData);
454         if (clen > 0) 
455         {
456                 if (olen == 0) {
457                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
458                         StrBufTrim(ri->description);
459                 }
460                 else if (olen < clen) {
461                         FlushStrBuf(ri->description);
462                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
463                         StrBufTrim(ri->description);
464                 }
465         }
466 }
467
468 void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
469 {                 
470         if (StrLength(CData) > 0) {
471                 StrBufTrim(CData);
472                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
473         }
474 }
475
476 void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
477 {
478         if (StrLength(CData) > 0) {
479                 StrBufTrim(CData);
480                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
481         }
482 }
483
484 void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
485 {
486         if (StrLength(CData) > 0) {
487                 StrBufTrim(CData);
488                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
489         }
490 }
491
492
493 void RSS_item_date_end (StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
494 {
495         if (StrLength(CData) > 0) {
496                 StrBufTrim(CData);
497                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
498         }
499 }
500
501
502
503 void RSS_item_author_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
504 {
505         if (StrLength(CData) > 0) {
506                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
507                 StrBufTrim(ri->author_or_creator);
508         }
509 }
510
511
512 void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
513 {
514         if (StrLength(CData) > 0) {
515                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
516                 StrBufTrim(ri->author_or_creator);
517         }
518 }
519
520 void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
521 {
522         if (StrLength(CData) > 0) {
523                 NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0);
524                 StrBufTrim(ri->author_email);
525         }
526 }
527
528 void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
529 {
530         if ((StrLength(CData) > 0) && 
531             (StrLength(ri->author_or_creator) == 0))
532         {
533                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
534                 StrBufTrim(ri->author_or_creator);
535         }
536 }
537
538
539 void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
540 {
541         if (StrLength(CData) > 0) {
542                 NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0);
543                 StrBufTrim(ri->author_url);
544         }
545 }
546
547 void RSS_item_item_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
548 {
549         --ri->item_tag_nesting;
550         rss_save_item(ri);
551 }
552
553
554 void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
555 {
556         --ri->item_tag_nesting;
557         rss_save_item(ri);
558 }
559
560 void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
561 {
562 //              CtdlLogPrintf(CTDL_DEBUG, "End of feed detected.  Closing parser.\n");
563         ri->done_parsing = 1;
564         
565 }
566 void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
567 {
568 //              CtdlLogPrintf(CTDL_DEBUG, "End of feed detected.  Closing parser.\n");
569         ri->done_parsing = 1;
570 }
571
572
573 void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rssnetcfg *Cfg, const char** Attr)
574 {
575 }
576
577
578
579 /*
580  * This callback stores up the data which appears in between tags.
581  */
582 void rss_xml_cdata_start(void *data) 
583 {
584         rsscollection *rssc = (rsscollection*) data;
585
586         FlushStrBuf(rssc->CData);
587 }
588
589 void rss_xml_cdata_end(void *data) 
590 {
591 }
592 void rss_xml_chardata(void *data, const XML_Char *s, int len) 
593 {
594         rsscollection *rssc = (rsscollection*) data;
595
596         StrBufAppendBufPlain (rssc->CData, s, len, 0);
597 }
598
599 /*
600  * Callback function for passing libcurl's output to expat for parsing
601  */
602 size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream)
603 {
604         XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0);
605         return (size*nmemb);
606 }
607
608
609
610 eNextState ParseRSSReply(AsyncIO *IO)
611 {
612         rsscollection *rssc;
613         rss_item *ri;
614         const char *at;
615         char *ptr;
616         long len;
617
618         rssc = IO->Data;
619         ri = rssc->Item;
620         rssc->CData = NewStrBufPlain(NULL, SIZ);
621         rssc->Key = NewStrBuf();
622         at = NULL;
623         StrBufSipLine(rssc->Key, IO->HttpReq.ReplyData, &at);
624         ptr = NULL;
625
626 #define encoding "encoding=\""
627         ptr = strstr(ChrPtr(rssc->Key), encoding);
628         if (ptr != NULL)
629         {
630                 char *pche;
631
632                 ptr += sizeof (encoding) - 1;
633                 pche = strchr(ptr, '"');
634                 if (pche != NULL)
635                         StrBufCutAt(rssc->Key, -1, pche);
636                 else 
637                         ptr = "UTF-8";
638         }
639         else
640                 ptr = "UTF-8";
641
642         CtdlLogPrintf(CTDL_ALERT, "RSS: Now parsing [%s] \n", ChrPtr(rssc->Cfg->Url));
643
644         rssc->xp = XML_ParserCreateNS(ptr, ':');
645         if (!rssc->xp) {
646                 CtdlLogPrintf(CTDL_ALERT, "Cannot create XML parser!\n");
647                 goto shutdown;
648         }
649         FlushStrBuf(rssc->Key);
650
651         XML_SetElementHandler(rssc->xp, rss_xml_start, rss_xml_end);
652         XML_SetCharacterDataHandler(rssc->xp, rss_xml_chardata);
653         XML_SetUserData(rssc->xp, rssc);
654         XML_SetCdataSectionHandler(rssc->xp,
655                                    rss_xml_cdata_start,
656                                    rss_xml_cdata_end);
657
658
659         len = StrLength(IO->HttpReq.ReplyData);
660         ptr = SmashStrBuf(&IO->HttpReq.ReplyData);
661         XML_Parse(rssc->xp, ptr, len, 0);
662         free (ptr);
663         if (ri->done_parsing == 0)
664                 XML_Parse(rssc->xp, "", 0, 1);
665
666
667         CtdlLogPrintf(CTDL_ALERT, "RSS: XML Status [%s] \n", 
668                       XML_ErrorString(
669                               XML_GetErrorCode(rssc->xp)));
670
671 shutdown:
672         XML_ParserFree(rssc->xp);
673
674         flush_rss_item(ri);
675         FreeStrBuf(&rssc->CData);
676         FreeStrBuf(&rssc->Key);
677
678         ///Cfg->next_poll = time(NULL) + config.c_net_freq; 
679
680         return eTerminateConnection;
681 }
682
683
684 void rss_cleanup(void)
685 {
686         DeleteHash(&StartHandlers);
687         DeleteHash(&EndHandlers);
688         DeleteHash(&KnownNameSpaces);
689 }
690
691
692 CTDL_MODULE_INIT(rssparser)
693 {
694         if (!threading)
695         {
696                 StartHandlers = NewHash(1, NULL);
697                 EndHandlers = NewHash(1, NULL);
698
699                 AddRSSStartHandler(RSS_item_rss_start,     RSS_UNSET, HKEY("rss"));
700                 AddRSSStartHandler(RSS_item_rdf_start,     RSS_UNSET, HKEY("rdf"));
701                 AddRSSStartHandler(ATOM_item_feed_start,    RSS_UNSET, HKEY("feed"));
702                 AddRSSStartHandler(RSS_item_item_start,    RSS_RSS, HKEY("item"));
703                 AddRSSStartHandler(ATOM_item_entry_start,  RSS_ATOM, HKEY("entry"));
704                 AddRSSStartHandler(ATOM_item_link_start,   RSS_ATOM, HKEY("link"));
705
706                 AddRSSEndHandler(ATOMRSS_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
707                 AddRSSEndHandler(RSS_item_guid_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid"));
708                 AddRSSEndHandler(ATOM_item_id_end,         RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id"));
709                 AddRSSEndHandler(RSS_item_link_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("link"));
710 #if 0 
711 // hm, rss to the comments of that blog, might be interesting in future, but... 
712                 AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss"));
713 // comment count...
714                 AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments"));
715 #endif
716                 AddRSSEndHandler(RSSATOM_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
717                 AddRSSEndHandler(ATOM_item_content_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content"));
718                 AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded"));
719                 AddRSSEndHandler(ATOM_item_summary_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary"));
720                 AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description"));
721                 AddRSSEndHandler(ATOM_item_published_end,  RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published"));
722                 AddRSSEndHandler(ATOM_item_updated_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated"));
723                 AddRSSEndHandler(RSS_item_pubdate_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate"));
724                 AddRSSEndHandler(RSS_item_date_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("date"));
725                 AddRSSEndHandler(RSS_item_author_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("author"));
726                 AddRSSEndHandler(RSS_item_creator_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator"));
727 /* <author> */
728                 AddRSSEndHandler(ATOM_item_email_end,      RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email"));
729                 AddRSSEndHandler(ATOM_item_name_end,       RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name"));
730                 AddRSSEndHandler(ATOM_item_uri_end,        RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri"));
731 /* </author> */
732                 AddRSSEndHandler(RSS_item_item_end,        RSS_RSS, HKEY("item"));
733                 AddRSSEndHandler(RSS_item_rss_end,         RSS_RSS, HKEY("rss"));
734                 AddRSSEndHandler(RSS_item_rdf_end,         RSS_RSS, HKEY("rdf"));
735                 AddRSSEndHandler(ATOM_item_entry_end,      RSS_ATOM, HKEY("entry"));
736
737
738 /* at the start of atoms: <seq> <li>link to resource</li></seq> ignore them. */
739                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
740                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
741                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
742                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
743
744 /* links to other feed generators... */
745                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
746                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
747                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
748                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
749
750                 KnownNameSpaces = NewHash(1, NULL);
751                 Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler);
752                 Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler);
753                 Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler);
754                 Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler);
755                 Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler);
756                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
757                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler);
758                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler);
759                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler);
760                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
761                 Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler);
762                 Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler);
763                 Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler);
764                 Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler);
765                 Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler);
766                 Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler);
767                 Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler);
768                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler);
769                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
770                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
771                 Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler);
772                 Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler);
773                 Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler);
774 #if 0
775                 /* we don't like these namespaces because of they shadow our usefull parameters. */
776                 Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler);
777 #endif
778                 CtdlRegisterCleanupHook(rss_cleanup);
779         }
780         return "rssparser";
781 }