4 * Copyright 2003-2004 Rene Puls <rpuls@gmx.net> and
5 * Oliver Feiler <kiza@kcore.de>
7 * http://kiza.kcore.de/software/snownews/
8 * http://home.kcore.de/~kianga/study/c/xmlparse.c
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 #include "conversions.h"
35 struct newsitem *copy;
36 struct newsitem *firstcopy;
38 /* During the parsens one calls, if we meet a <channel> element.
39 * The function returns a new Struct for the new feed. */
41 void parse_rdf10_channel(struct feed *feed, xmlDocPtr doc, xmlNodePtr node) {
44 /* Free everything before we write to it again. */
47 free (feed->description);
49 if (feed->items != NULL) {
50 while (feed->items->next_ptr != NULL) {
51 feed->items = feed->items->next_ptr;
52 free (feed->items->prev_ptr->data->title);
53 free (feed->items->prev_ptr->data->link);
54 free (feed->items->prev_ptr->data->guid);
55 free (feed->items->prev_ptr->data->description);
56 free (feed->items->prev_ptr->data);
57 free (feed->items->prev_ptr);
59 free (feed->items->data->title);
60 free (feed->items->data->link);
61 free (feed->items->data->guid);
62 free (feed->items->data->description);
63 free (feed->items->data);
67 /* At the moment we have still no Items, so set the list to null. */
71 feed->description = NULL;
73 /* Go through all <channel> tags and extract the information */
74 for (cur = node; cur != NULL; cur = cur->next) {
75 if (cur->type != XML_ELEMENT_NODE)
77 if (xmlStrcmp(cur->name, "title") == 0) {
78 feed->title = xmlNodeListGetString(doc, cur->children, 1);
79 CleanupString (feed->title, 1);
80 /* Remove trailing newline */
81 if (feed->title != NULL) {
82 if (strlen(feed->title) > 1) {
83 if (feed->title[strlen(feed->title)-1] == '\n')
84 feed->title[strlen(feed->title)-1] = '\0';
88 else if (xmlStrcmp(cur->name, "link") == 0) {
89 feed->link = xmlNodeListGetString(doc, cur->children, 1);
90 /* Remove trailing newline */
91 if (feed->link != NULL) {
92 if (strlen(feed->link) > 1) {
93 if (feed->link[strlen(feed->link)-1] == '\n')
94 feed->link[strlen(feed->link)-1] = '\0';
98 else if (xmlStrcmp(cur->name, "description") == 0) {
99 feed->description = xmlNodeListGetString(doc, cur->children, 1);
100 CleanupString (feed->description, 0);
106 void parse_rdf20_channel(struct feed *feed, xmlDocPtr doc, xmlNodePtr node)
110 /* Free everything before we write to it again. */
113 free (feed->description);
115 if (feed->items != NULL) {
116 while (feed->items->next_ptr != NULL) {
117 feed->items = feed->items->next_ptr;
118 free (feed->items->prev_ptr->data->title);
119 free (feed->items->prev_ptr->data->link);
120 free (feed->items->prev_ptr->data->guid);
121 free (feed->items->prev_ptr->data->description);
122 free (feed->items->prev_ptr->data);
123 free (feed->items->prev_ptr);
125 free (feed->items->data->title);
126 free (feed->items->data->link);
127 free (feed->items->data->guid);
128 free (feed->items->data->description);
129 free (feed->items->data);
133 /* Im Augenblick haben wir noch keine Items, also die Liste auf NULL setzen. */
137 feed->description = NULL;
139 /* Alle Tags im <channel> Tag durchgehen und die Informationen extrahieren */
140 for (cur = node; cur != NULL; cur = cur->next) {
141 if (cur->type != XML_ELEMENT_NODE)
143 if (xmlStrcmp(cur->name, "title") == 0) {
144 feed->title = xmlNodeListGetString(doc, cur->children, 1);
145 CleanupString (feed->title, 1);
146 /* Remove trailing newline */
147 if (feed->title != NULL) {
148 if (strlen(feed->title) > 1) {
149 if (feed->title[strlen(feed->title)-1] == '\n')
150 feed->title[strlen(feed->title)-1] = '\0';
154 else if (xmlStrcmp(cur->name, "link") == 0) {
155 feed->link = xmlNodeListGetString(doc, cur->children, 1);
156 /* Remove trailing newline */
157 if (feed->link != NULL) {
158 if (strlen(feed->link) > 1) {
159 if (feed->link[strlen(feed->link)-1] == '\n')
160 feed->link[strlen(feed->link)-1] = '\0';
164 else if (xmlStrcmp(cur->name, "description") == 0) {
165 feed->description = xmlNodeListGetString(doc, cur->children, 1);
166 CleanupString (feed->description, 0);
167 } else if (xmlStrcmp(cur->name, "item") == 0) {
168 parse_rdf10_item(feed, doc, cur->children);
173 /* This function is called each mark, if we meet on. As parameter it needs the
174 * current new feed (new feed struct *), as well as the current XML
175 * document-acts and the current element, both comes directly of libxml.
178 void parse_rdf10_item(struct feed *feed, xmlDocPtr doc, xmlNodePtr node)
181 xmlChar *readstatusstring;
183 struct newsitem *item;
184 struct newsitem *current;
186 /* Speicher für ein neues Newsitem reservieren */
187 item = malloc(sizeof (struct newsitem));
188 item->data = malloc (sizeof (struct newsdata));
190 item->data->title = NULL;
191 item->data->link = NULL;
192 item->data->guid = NULL;
193 item->data->description = NULL;
194 item->data->readstatus = 0;
195 item->data->parent = feed;
197 /* Alle Tags im <item> Tag durchgehen und die Informationen extrahieren.
198 Selbe Vorgehensweise wie in der parse_channel() Funktion */
199 for (cur = node; cur != NULL; cur = cur->next) {
200 if (cur->type != XML_ELEMENT_NODE)
202 if (xmlStrcmp(cur->name, "title") == 0) {
203 item->data->title = xmlNodeListGetString(doc, cur->children, 1);
204 CleanupString (item->data->title, 1);
205 /* Remove trailing newline */
206 if (item->data->title != NULL) {
207 if (strlen(item->data->title) > 1) {
208 if (item->data->title[strlen(item->data->title)-1] == '\n')
209 item->data->title[strlen(item->data->title)-1] = '\0';
213 else if (xmlStrcmp(cur->name, "link") == 0) {
214 item->data->link = xmlNodeListGetString(doc, cur->children, 1);
215 if (item->data->link == NULL) {
216 if (xmlStrcmp(cur->name, "guid") == 0)
217 item->data->link = xmlNodeListGetString(doc, cur->children, 1);
219 /* Remove trailing newline */
220 if (item->data->link != NULL) {
221 if (strlen(item->data->link) > 1) {
222 if (item->data->link[strlen(item->data->link)-1] == '\n')
223 item->data->link[strlen(item->data->link)-1] = '\0';
227 else if (xmlStrcmp(cur->name, "guid") == 0) {
228 item->data->guid = xmlNodeListGetString(doc, cur->children, 1);
229 if (item->data->guid == NULL) {
230 if (xmlStrcmp(cur->name, "guid") == 0)
231 item->data->guid = xmlNodeListGetString(doc, cur->children, 1);
233 /* Remove trailing newline */
234 if (item->data->guid != NULL) {
235 if (strlen(item->data->guid) > 1) {
236 if (item->data->guid[strlen(item->data->guid)-1] == '\n')
237 item->data->guid[strlen(item->data->guid)-1] = '\0';
241 else if (xmlStrcmp(cur->name, "description") == 0) {
242 item->data->description = xmlNodeListGetString(doc, cur->children, 1);
243 CleanupString (item->data->description, 0);
245 else if (xmlStrcmp(cur->name, "readstatus") == 0) {
246 /* Will cause memory leak otherwise, xmlNodeListGetString must be freed. */
247 readstatusstring = xmlNodeListGetString(doc, cur->children, 1);
248 item->data->readstatus = atoi (readstatusstring);
249 xmlFree (readstatusstring);
253 /* If saverestore == 1, restore readstatus. */
254 if (saverestore == 1) {
255 for (current = firstcopy; current != NULL; current = current->next_ptr) {
256 if ((current->data->link != NULL) && (item->data->link != NULL)) {
257 if ((current->data->title != NULL) && (item->data->title != NULL)) {
258 if ((strcmp(item->data->link, current->data->link) == 0) &&
259 (strcmp(item->data->title, current->data->title) == 0))
260 item->data->readstatus = current->data->readstatus;
262 if (strcmp(item->data->link, current->data->link) == 0)
263 item->data->readstatus = current->data->readstatus;
269 item->next_ptr = NULL;
270 if (feed->items == NULL) {
271 item->prev_ptr = NULL;
274 item->prev_ptr = feed->items;
275 while (item->prev_ptr->next_ptr != NULL)
276 item->prev_ptr = item->prev_ptr->next_ptr;
277 item->prev_ptr->next_ptr = item;
284 int DeXML (struct feed *cur_ptr) {
287 struct newsitem *cur_item;
289 if (cur_ptr->feed == NULL)
293 /* Wenn cur_ptr->items != NULL dann können wir uns item->readstatus
294 zwischenspeichern. */
295 if (cur_ptr->items != NULL) {
300 /* Copy current newsitem struct. */
301 for (cur_item = cur_ptr->items; cur_item != NULL; cur_item = cur_item->next_ptr) {
302 copy = malloc (sizeof(struct newsitem));
303 copy->data = malloc (sizeof (struct newsdata));
304 copy->data->title = NULL;
305 copy->data->link = NULL;
306 copy->data->guid = NULL;
307 copy->data->description = NULL;
308 copy->data->readstatus = cur_item->data->readstatus;
309 if (cur_item->data->link != NULL)
310 copy->data->link = strdup (cur_item->data->link);
311 if (cur_item->data->title != NULL)
312 copy->data->title = strdup (cur_item->data->title);
314 copy->next_ptr = NULL;
315 if (firstcopy == NULL) {
316 copy->prev_ptr = NULL;
319 copy->prev_ptr = firstcopy;
320 while (copy->prev_ptr->next_ptr != NULL)
321 copy->prev_ptr = copy->prev_ptr->next_ptr;
322 copy->prev_ptr->next_ptr = copy;
328 parse an XML in-memory document and build a tree.
329 In case the document is not Well Formed, a tree is built anyway. */
330 doc = xmlRecoverMemory(cur_ptr->feed, strlen(cur_ptr->feed));
335 /* Das Root-Element finden (in unserem Fall sollte es "<RDF:RDF>" heißen.
336 Dabei wird das RDF: Prefix fürs Erste ignoriert, bis der Jaguar
337 herausfindet, wie man das genau auslesen kann (jau). */
338 cur = xmlDocGetRootElement(doc);
345 /* Überprüfen, ob das Element auch wirklich <RDF> heißt */
346 if (xmlStrcmp(cur->name, "RDF") == 0) {
348 /* Jetzt gehen wir alle Elemente im Dokument durch. Diese Schleife
349 selbst läuft jedoch nur durch die Elemente auf höchster Ebene
350 (bei HTML wären das nur HEAD und BODY), wandert also nicht die
351 gesamte Struktur nach unten durch. Dafür sind die Funktionen zuständig,
352 die wir dann in der Schleife selbst aufrufen. */
353 for (cur = cur->children; cur != NULL; cur = cur->next) {
354 if (cur->type != XML_ELEMENT_NODE)
356 if (xmlStrcmp(cur->name, "channel") == 0)
357 parse_rdf10_channel(cur_ptr, doc, cur->children);
358 if (xmlStrcmp(cur->name, "item") == 0)
359 parse_rdf10_item(cur_ptr, doc, cur->children);
360 /* Last-Modified is only used when reading from internal feeds (disk cache). */
361 if (xmlStrcmp(cur->name, "lastmodified") == 0)
362 cur_ptr->lastmodified = xmlNodeListGetString(doc, cur->children, 1);
364 } else if (xmlStrcmp(cur->name, "rss") == 0) {
365 for (cur = cur->children; cur != NULL; cur = cur->next) {
366 if (cur->type != XML_ELEMENT_NODE)
368 if (xmlStrcmp(cur->name, "channel") == 0)
369 parse_rdf20_channel(cur_ptr, doc, cur->children);
378 if (saverestore == 1) {
379 /* free struct newsitem *copy. */
380 while (firstcopy->next_ptr != NULL) {
381 firstcopy = firstcopy->next_ptr;
382 free (firstcopy->prev_ptr->data->link);
383 free (firstcopy->prev_ptr->data->guid);
384 free (firstcopy->prev_ptr->data->title);
385 free (firstcopy->prev_ptr->data);
386 free (firstcopy->prev_ptr);
388 free (firstcopy->data->link);
389 free (firstcopy->data->guid);
390 free (firstcopy->data->title);
391 free (firstcopy->data);
395 if (cur_ptr->original != NULL)
396 free (cur_ptr->original);
398 /* Set -> title to something if it's a NULL pointer to avoid crash with strdup below. */
399 if (cur_ptr->title == NULL)
400 cur_ptr->title = strdup (cur_ptr->feedurl);
401 cur_ptr->original = strdup (cur_ptr->title);