Straightn http client generation
[citadel.git] / citadel / modules / rssclient / rss_atom_parser.c
1 /*
2  * Bring external RSS feeds into rooms.
3  *
4  * Copyright (c) 2007-2010 by the citadel.org team
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <stdio.h>
24
25 #if TIME_WITH_SYS_TIME
26 # include <sys/time.h>
27 # include <time.h>
28 #else
29 # if HAVE_SYS_TIME_H
30 #  include <sys/time.h>
31 # else
32 #  include <time.h>
33 # endif
34 #endif
35
36 #include <ctype.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <expat.h>
42 #include <curl/curl.h>
43 #include <libcitadel.h>
44 #include "citadel.h"
45 #include "server.h"
46 #include "citserver.h"
47 #include "support.h"
48 #include "config.h"
49 #include "threads.h"
50 #include "ctdl_module.h"
51 #include "clientsocket.h"
52 #include "msgbase.h"
53 #include "parsedate.h"
54 #include "database.h"
55 #include "citadel_dirs.h"
56 #include "md5.h"
57 #include "context.h"
58 #include "event_client.h"
59 #include "rss_atom_parser.h"
60
61 extern pthread_mutex_t RSSQueueMutex;
62
63 HashList *StartHandlers = NULL;
64 HashList *EndHandlers = NULL;
65 HashList *KnownNameSpaces = NULL;
66 void AddRSSStartHandler(rss_handler_func Handler, int Flags, const char *key, long len)
67 {
68         rss_xml_handler *h;
69         h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
70         h->Flags = Flags;
71         h->Handler = Handler;
72         Put(StartHandlers, key, len, h, NULL);
73 }
74 void AddRSSEndHandler(rss_handler_func Handler, int Flags, const char *key, long len)
75 {
76         rss_xml_handler *h;
77         h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
78         h->Flags = Flags;
79         h->Handler = Handler;
80         Put(EndHandlers, key, len, h, NULL);
81 }
82
83
84
85 /*
86  * Convert an RDF/RSS datestamp into a time_t
87  */
88 time_t rdf_parsedate(const char *p)
89 {
90         struct tm tm;
91         time_t t = 0;
92
93         if (!p) return 0L;
94         if (strlen(p) < 10) return 0L;
95
96         memset(&tm, 0, sizeof tm);
97
98         /*
99          * If the timestamp appears to be in W3C datetime format, try to
100          * parse it.  See also: http://www.w3.org/TR/NOTE-datetime
101          *
102          * This code, along with parsedate.c, is a potential candidate for
103          * moving into libcitadel.
104          */
105         if ( (p[4] == '-') && (p[7] == '-') ) {
106                 tm.tm_year = atoi(&p[0]) - 1900;
107                 tm.tm_mon = atoi(&p[5]) - 1;
108                 tm.tm_mday = atoi(&p[8]);
109                 if ( (p[10] == 'T') && (p[13] == ':') ) {
110                         tm.tm_hour = atoi(&p[11]);
111                         tm.tm_min = atoi(&p[14]);
112                 }
113                 return mktime(&tm);
114         }
115
116         /* hmm... try RFC822 date stamp format */
117
118         t = parsedate(p);
119         if (t > 0) return(t);
120
121         /* yeesh.  ok, just return the current date and time. */
122         return(time(NULL));
123 }
124
125 void flush_rss_item(rss_item *ri)
126 {
127         /* Initialize the feed item data structure */
128         FreeStrBuf(&ri->guid);
129         FreeStrBuf(&ri->title);
130         FreeStrBuf(&ri->link);
131         FreeStrBuf(&ri->author_or_creator);
132         FreeStrBuf(&ri->author_email);
133         FreeStrBuf(&ri->author_url);
134         FreeStrBuf(&ri->description);
135 }
136
137 void rss_xml_start(void *data, const char *supplied_el, const char **attr)
138 {
139         rss_xml_handler *h;
140         rss_aggregator  *rssc = (rss_aggregator*) data;
141         rss_item        *ri = rssc->Item;
142         void            *pv;
143         const char      *pel;
144         char            *sep = NULL;
145
146         /* Axe the namespace, we don't care about it */
147 ///     syslog(LOG_DEBUG, "RSS: supplied el %d: %s...\n", rssc->Cfg->ItemType, supplied_el);
148         pel = supplied_el;
149         while (sep = strchr(pel, ':'), sep) {
150                 pel = sep + 1;
151         }
152
153         if (pel != supplied_el)
154         {
155                 void *v;
156                 
157                 if (!GetHash(KnownNameSpaces, 
158                              supplied_el, 
159                              pel - supplied_el - 1,
160                              &v))
161                 {
162 #ifdef DEBUG_RSS
163                         syslog(LOG_DEBUG, "RSS: START ignoring because of wrong namespace [%s]\n", 
164                                       supplied_el);
165 #endif
166                         return;
167                 }
168         }
169
170         StrBufPlain(rssc->Key, pel, -1);
171         StrBufLowerCase(rssc->Key);
172         if (GetHash(StartHandlers, SKEY(rssc->Key), &pv))
173         {
174                 rssc->Current = h = (rss_xml_handler*) pv;
175
176                 if (((h->Flags & RSS_UNSET) != 0) && 
177                     (rssc->ItemType == RSS_UNSET))
178                 {
179                         h->Handler(rssc->CData, ri, rssc, attr);
180                 }
181                 else if (((h->Flags & RSS_RSS) != 0) &&
182                     (rssc->ItemType == RSS_RSS))
183                 {
184                         h->Handler(rssc->CData, ri, rssc, attr);
185                 }
186                 else if (((h->Flags & RSS_ATOM) != 0) &&
187                          (rssc->ItemType == RSS_ATOM))
188                 {
189                         h->Handler(rssc->CData, ri, rssc, attr);                        
190                 }
191 #ifdef DEBUG_RSS
192                 else 
193                         syslog(LOG_DEBUG, "RSS: START unhandled: [%s] [%s]...\n", pel, supplied_el);
194 #endif
195         }
196 #ifdef DEBUG_RSS
197         else 
198                 syslog(LOG_DEBUG, "RSS: START unhandled: [%s] [%s]...\n", pel,  supplied_el);
199 #endif
200 }
201
202 void rss_xml_end(void *data, const char *supplied_el)
203 {
204         rss_xml_handler *h;
205         rss_aggregator  *rssc = (rss_aggregator*) data;
206         rss_item        *ri = rssc->Item;
207         const char      *pel;
208         char            *sep = NULL;
209         void            *pv;
210
211         /* Axe the namespace, we don't care about it */
212         pel = supplied_el;
213         while (sep = strchr(pel, ':'), sep) {
214                 pel = sep + 1;
215         }
216 //      syslog(LOG_DEBUG, "RSS: END %s...\n", el);
217         if (pel != supplied_el)
218         {
219                 void *v;
220                 
221                 if (!GetHash(KnownNameSpaces, 
222                              supplied_el, 
223                              pel - supplied_el - 1,
224                              &v))
225                 {
226 #ifdef DEBUG_RSS
227                         syslog(LOG_DEBUG, "RSS: END ignoring because of wrong namespace [%s] = [%s]\n", 
228                                       supplied_el, ChrPtr(rssc->CData));
229 #endif
230                         FlushStrBuf(rssc->CData);
231                         return;
232                 }
233         }
234
235         StrBufPlain(rssc->Key, pel, -1);
236         StrBufLowerCase(rssc->Key);
237         if (GetHash(EndHandlers, SKEY(rssc->Key), &pv))
238         {
239                 h = (rss_xml_handler*) pv;
240
241                 if (((h->Flags & RSS_UNSET) != 0) && 
242                     (rssc->ItemType == RSS_UNSET))
243                 {
244                         h->Handler(rssc->CData, ri, rssc, NULL);
245                 }
246                 else if (((h->Flags & RSS_RSS) != 0) &&
247                     (rssc->ItemType == RSS_RSS))
248                 {
249                         h->Handler(rssc->CData, ri, rssc, NULL);
250                 }
251                 else if (((h->Flags & RSS_ATOM) != 0) &&
252                          (rssc->ItemType == RSS_ATOM))
253                 {
254                         h->Handler(rssc->CData, ri, rssc, NULL);
255                 }
256 #ifdef DEBUG_RSS
257                 else 
258                         syslog(LOG_DEBUG, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
259 #endif
260         }
261 #ifdef DEBUG_RSS
262         else 
263                 syslog(LOG_DEBUG, "RSS: END   unhandled: [%s]  [%s] = [%s]...\n", pel, supplied_el, ChrPtr(rssc->CData));
264 #endif
265         FlushStrBuf(rssc->CData);
266         rssc->Current = NULL;
267 }
268
269
270
271
272
273 void RSS_item_rss_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
274 {
275         syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n");
276         Cfg->ItemType = RSS_RSS;
277 }
278
279 void RSS_item_rdf_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
280 {
281         syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n");
282         Cfg->ItemType = RSS_RSS;
283 }
284
285 void ATOM_item_feed_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
286 {
287         syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n");
288         Cfg->ItemType = RSS_ATOM;
289 }
290
291
292 void RSS_item_item_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
293 {
294         ri->item_tag_nesting ++;
295         flush_rss_item(ri);
296 }
297
298 void ATOM_item_entry_start(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
299 {
300 /* Atom feed... */
301         ri->item_tag_nesting ++;
302         flush_rss_item(ri);
303 }
304
305 void ATOM_item_link_start (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
306 {
307         int i;
308         const char *pHref = NULL;
309         const char *pType = NULL;
310         const char *pRel = NULL;
311         const char *pTitle = NULL;
312
313         for (i = 0; Attr[i] != NULL; i+=2)
314         {
315                 if (!strcmp(Attr[i], "href"))
316                 {
317                         pHref = Attr[i+1];
318                 }
319                 else if (!strcmp(Attr[i], "rel"))
320                 {
321                         pRel = Attr[i+1];
322                 }
323                 else if (!strcmp(Attr[i], "type"))
324                 {
325                         pType = Attr[i+1];
326                 }
327                 else if (!strcmp(Attr[i], "title"))
328                 {
329                         pTitle = Attr[i+1];
330                 }
331         }
332         if (pHref == NULL)
333                 return; /* WHUT? Pointing... where? */
334         if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml"))
335                 return; /* these just point to other rss resources, we're not interested in them. */
336         if (pRel != NULL)
337         {
338                 if (!strcasecmp (pRel, "replies"))
339                 {
340                         NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1);
341                         StrBufTrim(ri->link);
342                         NewStrBufDupAppendFlush(&ri->reLinkTitle, NULL, pTitle, -1);
343                 }
344                 else if (!strcasecmp(pRel, "alternate")) /* Alternative representation of this Item... */
345                 {
346                         NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
347                         StrBufTrim(ri->link);
348                         NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
349
350                 }
351 #if 0 /* these are also defined, but dunno what to do with them.. */
352                 else if (!strcasecmp(pRel, "related"))
353                 {
354                 }
355                 else if (!strcasecmp(pRel, "self"))
356                 {
357                 }
358                 else if (!strcasecmp(pRel, "enclosure"))
359                 {/* this reference can get big, and is probably the full article... */
360                 }
361                 else if (!strcasecmp(pRel, "via"))
362                 {/* this article was provided via... */
363                 }
364 #endif
365         }
366         else if (StrLength(ri->link) == 0)
367         {
368                 NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
369                 StrBufTrim(ri->link);
370                 NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
371         }
372 }
373
374
375
376
377 void ATOMRSS_item_title_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
378 {
379         if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) {
380                 NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0);
381                 StrBufTrim(ri->channel_title);
382         }
383 }
384
385 void RSS_item_guid_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
386 {
387         if (StrLength(CData) > 0) {
388                 NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
389         }
390 }
391
392 void ATOM_item_id_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
393 {
394         if (StrLength(CData) > 0) {
395                 NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
396         }
397 }
398
399
400 void RSS_item_link_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
401 {
402         if (StrLength(CData) > 0) {
403                 NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0);
404                 StrBufTrim(ri->link);
405         }
406 }
407 void RSS_item_relink_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
408 {
409         if (StrLength(CData) > 0) {
410                 NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0);
411                 StrBufTrim(ri->reLink);
412         }
413 }
414
415 void RSSATOM_item_title_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
416 {
417         if (StrLength(CData) > 0) {
418                 NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0);
419                 StrBufTrim(ri->title);
420         }
421 }
422
423 void ATOM_item_content_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
424 {
425         long olen = StrLength (ri->description);
426         long clen = StrLength (CData);
427         if (clen > 0) 
428         {
429                 if (olen == 0) {
430                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
431                         StrBufTrim(ri->description);
432                 }
433                 else if (olen < clen) {
434                         FlushStrBuf(ri->description);
435                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
436                         StrBufTrim(ri->description);
437                 }
438         }
439 }
440 void ATOM_item_summary_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
441 {
442         /* this can contain an abstract of the article. but we don't want to verwrite a full document if we already have it. */
443         if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0))
444         {
445                 NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
446                 StrBufTrim(ri->description);
447         }
448 }
449
450 void RSS_item_description_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
451 {
452         long olen = StrLength (ri->description);
453         long clen = StrLength (CData);
454         if (clen > 0) 
455         {
456                 if (olen == 0) {
457                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
458                         StrBufTrim(ri->description);
459                 }
460                 else if (olen < clen) {
461                         FlushStrBuf(ri->description);
462                         NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
463                         StrBufTrim(ri->description);
464                 }
465         }
466 }
467
468 void ATOM_item_published_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
469 {                 
470         if (StrLength(CData) > 0) {
471                 StrBufTrim(CData);
472                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
473         }
474 }
475
476 void ATOM_item_updated_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
477 {
478         if (StrLength(CData) > 0) {
479                 StrBufTrim(CData);
480                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
481         }
482 }
483
484 void RSS_item_pubdate_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
485 {
486         if (StrLength(CData) > 0) {
487                 StrBufTrim(CData);
488                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
489         }
490 }
491
492
493 void RSS_item_date_end (StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
494 {
495         if (StrLength(CData) > 0) {
496                 StrBufTrim(CData);
497                 ri->pubdate = rdf_parsedate(ChrPtr(CData));
498         }
499 }
500
501
502
503 void RSS_item_author_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
504 {
505         if (StrLength(CData) > 0) {
506                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
507                 StrBufTrim(ri->author_or_creator);
508         }
509 }
510
511
512 void ATOM_item_name_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
513 {
514         if (StrLength(CData) > 0) {
515                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
516                 StrBufTrim(ri->author_or_creator);
517         }
518 }
519
520 void ATOM_item_email_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
521 {
522         if (StrLength(CData) > 0) {
523                 NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0);
524                 StrBufTrim(ri->author_email);
525         }
526 }
527
528 void RSS_item_creator_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
529 {
530         if ((StrLength(CData) > 0) && 
531             (StrLength(ri->author_or_creator) == 0))
532         {
533                 NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
534                 StrBufTrim(ri->author_or_creator);
535         }
536 }
537
538
539 void ATOM_item_uri_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
540 {
541         if (StrLength(CData) > 0) {
542                 NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0);
543                 StrBufTrim(ri->author_url);
544         }
545 }
546
547 void RSS_item_item_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
548 {
549         --ri->item_tag_nesting;
550         rss_save_item(ri, Cfg);
551 }
552
553
554 void ATOM_item_entry_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
555 {
556         --ri->item_tag_nesting;
557         rss_save_item(ri, Cfg);
558 }
559
560 void RSS_item_rss_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
561 {
562 //              syslog(LOG_DEBUG, "End of feed detected.  Closing parser.\n");
563         ri->done_parsing = 1;
564         
565 }
566 void RSS_item_rdf_end(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
567 {
568 //              syslog(LOG_DEBUG, "End of feed detected.  Closing parser.\n");
569         ri->done_parsing = 1;
570 }
571
572
573 void RSSATOM_item_ignore(StrBuf *CData, rss_item *ri, rss_aggregator *Cfg, const char** Attr)
574 {
575 }
576
577
578
579 /*
580  * This callback stores up the data which appears in between tags.
581  */
582 void rss_xml_cdata_start(void *data) 
583 {
584         rss_aggregator *rssc = (rss_aggregator*) data;
585
586         FlushStrBuf(rssc->CData);
587 }
588
589 void rss_xml_cdata_end(void *data) 
590 {
591 }
592 void rss_xml_chardata(void *data, const XML_Char *s, int len) 
593 {
594         rss_aggregator *rssc = (rss_aggregator*) data;
595
596         StrBufAppendBufPlain (rssc->CData, s, len, 0);
597 }
598
599 /*
600  * Callback function for passing libcurl's output to expat for parsing
601  */
602 size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream)
603 {
604         XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0);
605         return (size*nmemb);
606 }
607
608
609
610 eNextState RSSAggregator_ParseReply(AsyncIO *IO)
611 {
612         StrBuf *Buf;
613         rss_aggregator *rssc;
614         rss_item *ri;
615         const char *at;
616         char *ptr;
617         long len;
618         const char *Key;
619
620
621         if (IO->HttpReq.httpcode != 200)
622         {
623
624                 EV_syslog(LOG_DEBUG, "need a 200, got a %ld !\n",
625                           IO->HttpReq.httpcode);
626 // TODO: aide error message with rate limit
627                 return eAbort;
628         }
629
630         rssc = IO->Data;
631         ri = rssc->Item;
632         rssc->CData = NewStrBufPlain(NULL, SIZ);
633         rssc->Key = NewStrBuf();
634         at = NULL;
635         StrBufSipLine(rssc->Key, IO->HttpReq.ReplyData, &at);
636         ptr = NULL;
637
638 #define encoding "encoding=\""
639         ptr = strstr(ChrPtr(rssc->Key), encoding);
640         if (ptr != NULL)
641         {
642                 char *pche;
643
644                 ptr += sizeof (encoding) - 1;
645                 pche = strchr(ptr, '"');
646                 if (pche != NULL)
647                         StrBufCutAt(rssc->Key, -1, pche);
648                 else 
649                         ptr = "UTF-8";
650         }
651         else
652                 ptr = "UTF-8";
653
654         syslog(LOG_DEBUG, "RSS: Now parsing [%s] \n", ChrPtr(rssc->Url));
655
656         rssc->xp = XML_ParserCreateNS(ptr, ':');
657         if (!rssc->xp) {
658                 syslog(LOG_DEBUG, "Cannot create XML parser!\n");
659                 return eAbort;
660         }
661         FlushStrBuf(rssc->Key);
662
663         rssc->Messages = NewHash(1, Flathash);
664         XML_SetElementHandler(rssc->xp, rss_xml_start, rss_xml_end);
665         XML_SetCharacterDataHandler(rssc->xp, rss_xml_chardata);
666         XML_SetUserData(rssc->xp, rssc);
667         XML_SetCdataSectionHandler(rssc->xp,
668                                    rss_xml_cdata_start,
669                                    rss_xml_cdata_end);
670
671
672         len = StrLength(IO->HttpReq.ReplyData);
673         ptr = SmashStrBuf(&IO->HttpReq.ReplyData);
674         XML_Parse(rssc->xp, ptr, len, 0);
675         free (ptr);
676         if (ri->done_parsing == 0)
677                 XML_Parse(rssc->xp, "", 0, 1);
678
679
680         syslog(LOG_DEBUG, "RSS: XML Status [%s] \n", 
681                       XML_ErrorString(
682                               XML_GetErrorCode(rssc->xp)));
683
684         XML_ParserFree(rssc->xp);
685         flush_rss_item(ri);
686         FreeStrBuf(&rssc->CData);
687         FreeStrBuf(&rssc->Key);
688
689         Buf = NewStrBufDup(rssc->rooms);
690         rssc->recp.recp_room = SmashStrBuf(&Buf);
691         rssc->recp.num_room = rssc->roomlist_parts;
692         rssc->recp.recptypes_magic = RECPTYPES_MAGIC;
693
694         rssc->Pos = GetNewHashPos(rssc->Messages, 1);
695
696         ///Cfg->next_poll = time(NULL) + config.c_net_freq; 
697         if (GetNextHashPos(rssc->Messages, rssc->Pos, &len, &Key, (void**) &rssc->ThisMsg))
698                 return QueueDBOperation(IO, RSS_FetchNetworkUsetableEntry);
699         else
700                 return eAbort;
701 }
702
703
704 void rss_parser_cleanup(void)
705 {
706         DeleteHash(&StartHandlers);
707         DeleteHash(&EndHandlers);
708         DeleteHash(&KnownNameSpaces);
709 }
710
711
712 CTDL_MODULE_INIT(rssparser)
713 {
714         if (!threading)
715         {
716                 StartHandlers = NewHash(1, NULL);
717                 EndHandlers = NewHash(1, NULL);
718
719                 AddRSSStartHandler(RSS_item_rss_start,     RSS_UNSET, HKEY("rss"));
720                 AddRSSStartHandler(RSS_item_rdf_start,     RSS_UNSET, HKEY("rdf"));
721                 AddRSSStartHandler(ATOM_item_feed_start,    RSS_UNSET, HKEY("feed"));
722                 AddRSSStartHandler(RSS_item_item_start,    RSS_RSS, HKEY("item"));
723                 AddRSSStartHandler(ATOM_item_entry_start,  RSS_ATOM, HKEY("entry"));
724                 AddRSSStartHandler(ATOM_item_link_start,   RSS_ATOM, HKEY("link"));
725
726                 AddRSSEndHandler(ATOMRSS_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
727                 AddRSSEndHandler(RSS_item_guid_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid"));
728                 AddRSSEndHandler(ATOM_item_id_end,         RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id"));
729                 AddRSSEndHandler(RSS_item_link_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("link"));
730 #if 0 
731 // hm, rss to the comments of that blog, might be interesting in future, but... 
732                 AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss"));
733 // comment count...
734                 AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments"));
735 #endif
736                 AddRSSEndHandler(RSSATOM_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
737                 AddRSSEndHandler(ATOM_item_content_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content"));
738                 AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded"));
739                 AddRSSEndHandler(ATOM_item_summary_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary"));
740                 AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description"));
741                 AddRSSEndHandler(ATOM_item_published_end,  RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published"));
742                 AddRSSEndHandler(ATOM_item_updated_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated"));
743                 AddRSSEndHandler(RSS_item_pubdate_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate"));
744                 AddRSSEndHandler(RSS_item_date_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("date"));
745                 AddRSSEndHandler(RSS_item_author_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("author"));
746                 AddRSSEndHandler(RSS_item_creator_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator"));
747 /* <author> */
748                 AddRSSEndHandler(ATOM_item_email_end,      RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email"));
749                 AddRSSEndHandler(ATOM_item_name_end,       RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name"));
750                 AddRSSEndHandler(ATOM_item_uri_end,        RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri"));
751 /* </author> */
752                 AddRSSEndHandler(RSS_item_item_end,        RSS_RSS, HKEY("item"));
753                 AddRSSEndHandler(RSS_item_rss_end,         RSS_RSS, HKEY("rss"));
754                 AddRSSEndHandler(RSS_item_rdf_end,         RSS_RSS, HKEY("rdf"));
755                 AddRSSEndHandler(ATOM_item_entry_end,      RSS_ATOM, HKEY("entry"));
756
757
758 /* at the start of atoms: <seq> <li>link to resource</li></seq> ignore them. */
759                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
760                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
761                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
762                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
763
764 /* links to other feed generators... */
765                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
766                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
767                 AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
768                 AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
769
770                 KnownNameSpaces = NewHash(1, NULL);
771                 Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler);
772                 Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler);
773                 Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler);
774                 Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler);
775                 Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler);
776                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
777                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler);
778                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler);
779                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler);
780                 Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
781                 Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler);
782                 Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler);
783                 Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler);
784                 Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler);
785                 Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler);
786                 Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler);
787                 Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler);
788                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler);
789                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
790                 Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
791                 Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler);
792                 Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler);
793                 Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler);
794 #if 0
795                 /* we don't like these namespaces because of they shadow our usefull parameters. */
796                 Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler);
797 #endif
798                 CtdlRegisterCleanupHook(rss_parser_cleanup);
799         }
800         return "rssparser";
801 }