]> code.citadel.org Git - citadel.git/blob - rss2ctdl/netio.c
Initial revision
[citadel.git] / rss2ctdl / netio.c
1 /*
2  * $Id$
3  * 
4  * Copyright 2003-2004 Oliver Feiler <kiza@kcore.de>
5  *
6  * netio.c
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  */
22
23 /* OS X needs this, otherwise socklen_t is not defined. */
24 #ifdef __APPLE__
25 #       define _BSD_SOCKLEN_T_
26 #endif
27
28 /* BeOS does not define socklen_t. Using uint as suggested by port creator. */
29 #ifdef __BEOS__
30 #       define socklen_t unsigned int
31 #endif
32
33 #include <string.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <unistd.h>
39 //#include <fcntl.h>
40 #include <sys/types.h>
41 #include <sys/socket.h>
42 #include <netinet/in.h>
43 #include <netdb.h>
44 #include <sys/stat.h>
45 #include <sys/time.h>
46 #include <assert.h>
47
48 #include "config.h"
49 #include "main.h"
50 #include "conversions.h"
51 #include "net-support.h"
52 #include "io-internal.h"
53 #include "zlib_interface.h"
54
55 static int const MAX_HTTP_REDIRECTS = 10;       /* Maximum number of redirects we will follow. */
56 static int const NET_TIMEOUT = 20;                      /* Global network timeout in sec */
57 static int const NET_READ = 1;
58 static int const NET_WRITE = 2;
59
60 extern char *proxyname;                                         /* Hostname of proxyserver. */
61 extern unsigned short proxyport;                        /* Port on proxyserver to use. */
62
63 /* Masquerade as Firefox on Linux to increase the share of both in web server statistics. */
64 char *useragent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0";
65
66 /* Waits NET_TIMEOUT seconds for the socket to return data.
67  *
68  * Returns
69  *
70  *      0       Socket is ready
71  *      -1      Error occured (netio_error is set)
72  */
73 int NetPoll (struct feed * cur_ptr, int * my_socket, int rw) {
74         fd_set rfdsr;
75         fd_set rfdsw;
76         struct timeval tv;
77         int retval;                             /* FD_ISSET + assert == Heisenbug? */
78         
79         /* Set global network timeout */
80         tv.tv_sec = NET_TIMEOUT;
81         tv.tv_usec = 0;
82         
83         FD_ZERO(&rfdsr);
84         FD_ZERO(&rfdsw);
85         
86         if (rw == NET_READ) {
87                 FD_SET(*my_socket, &rfdsr);
88                 if (select (*my_socket+1, &rfdsr, NULL, NULL, &tv) == 0) {
89                         /* Timed out */
90                         cur_ptr->netio_error = NET_ERR_TIMEOUT;
91                         return -1;
92                 }
93                 retval = FD_ISSET (*my_socket, &rfdsr);
94                 assert (retval);
95                 if (!retval) {
96                         /* Wtf? */
97                         cur_ptr->netio_error = NET_ERR_UNKNOWN;
98                         return -1;
99                 }
100         } else if (rw == NET_WRITE) {
101                 FD_SET(*my_socket, &rfdsw);
102                 if (select (*my_socket+1, NULL, &rfdsw, NULL, &tv) == 0) {
103                         /* Timed out */
104                         cur_ptr->netio_error = NET_ERR_TIMEOUT;
105                         return -1;
106                 }
107                 retval = FD_ISSET (*my_socket, &rfdsw);
108                 assert (retval);
109                 if (!retval) {
110                         /* Wtf? */
111                         cur_ptr->netio_error = NET_ERR_UNKNOWN;
112                         return -1;
113                 }
114         } else {
115                 cur_ptr->netio_error = NET_ERR_UNKNOWN;
116                 return -1;
117         }
118         
119         return 0;
120 }
121
122
123 /* Connect network sockets.
124  *
125  * Returns
126  *
127  *      0       Connected
128  *      -1      Error occured (netio_error is set)
129  */
130 int NetConnect (int * my_socket, char * host, struct feed * cur_ptr, int httpproto, int suppressoutput) {
131         char tmp[512];
132         struct sockaddr_in address;     
133         struct hostent *remotehost;
134         socklen_t len;
135         char *realhost;
136         unsigned short port;
137         
138         realhost = strdup(host);
139         if (sscanf (host, "%[^:]:%hd", realhost, &port) != 2) {
140                 port = 80;
141         }
142         
143         /* Create a inet stream TCP socket. */
144         *my_socket = socket (AF_INET, SOCK_STREAM, 0);
145         if (*my_socket == -1) {
146                 cur_ptr->netio_error = NET_ERR_SOCK_ERR;
147                 return -1;
148         }
149         
150         /* If proxyport is 0 we didn't execute the if http_proxy statement in main
151            so there is no proxy. On any other value of proxyport do proxyrequests instead. */
152         if (proxyport == 0) {
153                 /* Lookup remote IP. */
154                 remotehost = gethostbyname (realhost);
155                 if (remotehost == NULL) {
156                         close (*my_socket);
157                         free (realhost);
158                         cur_ptr->netio_error = NET_ERR_HOST_NOT_FOUND;
159                         return -1;
160                 }
161                 
162                 /* Set the remote address. */
163                 address.sin_family = AF_INET;
164                 address.sin_port = htons(port);
165                 memcpy (&address.sin_addr.s_addr, remotehost->h_addr_list[0], remotehost->h_length);
166                         
167                 /* Connect socket. */
168                 cur_ptr->connectresult = connect (*my_socket, (struct sockaddr *) &address, sizeof(address));
169                 
170                 /* Check if we're already connected.
171                    BSDs will return 0 on connect even in nonblock if connect was fast enough. */
172                 if (cur_ptr->connectresult != 0) {
173                         /* If errno is not EINPROGRESS, the connect went wrong. */
174                         if (errno != EINPROGRESS) {
175                                 close (*my_socket);
176                                 free (realhost);
177                                 cur_ptr->netio_error = NET_ERR_CONN_REFUSED;
178                                 return -1;
179                         }
180                         
181                         if ((NetPoll (cur_ptr, my_socket, NET_WRITE)) == -1) {
182                                 close (*my_socket);
183                                 free (realhost);
184                                 return -1;
185                         }
186                         
187                         /* We get errno of connect back via getsockopt SO_ERROR (into connectresult). */
188                         len = sizeof(cur_ptr->connectresult);
189                         getsockopt(*my_socket, SOL_SOCKET, SO_ERROR, &cur_ptr->connectresult, &len);
190                         
191                         if (cur_ptr->connectresult != 0) {
192                                 close (*my_socket);
193                                 free (realhost);
194                                 cur_ptr->netio_error = NET_ERR_CONN_FAILED;     /* ->strerror(cur_ptr->connectresult) */
195                                 return -1;
196                         }
197                 }
198         } else {
199                 /* Lookup proxyserver IP. */
200                 remotehost = gethostbyname (proxyname);
201                 if (remotehost == NULL) {
202                         close (*my_socket);
203                         free (realhost);
204                         cur_ptr->netio_error = NET_ERR_HOST_NOT_FOUND;
205                         return -1;
206                 }
207                 
208                 /* Set the remote address. */
209                 address.sin_family = AF_INET;
210                 address.sin_port = htons(proxyport);
211                 memcpy (&address.sin_addr.s_addr, remotehost->h_addr_list[0], remotehost->h_length);
212                 
213                 /* Connect socket. */
214                 cur_ptr->connectresult = connect (*my_socket, (struct sockaddr *) &address, sizeof(address));
215                 
216                 /* Check if we're already connected.
217                    BSDs will return 0 on connect even in nonblock if connect was fast enough. */
218                 if (cur_ptr->connectresult != 0) {
219                         if (errno != EINPROGRESS) {
220                                 close (*my_socket);
221                                 free (realhost);
222                                 cur_ptr->netio_error = NET_ERR_CONN_REFUSED;
223                                 return -1;
224                         }
225                 
226                         if ((NetPoll (cur_ptr, my_socket, NET_WRITE)) == -1) {
227                                 close (*my_socket);
228                                 free (realhost);
229                                 return -1;
230                         }
231                         
232                         len = sizeof(cur_ptr->connectresult);
233                         getsockopt(*my_socket, SOL_SOCKET, SO_ERROR, &cur_ptr->connectresult, &len);
234                         
235                         if (cur_ptr->connectresult != 0) {
236                                 close (*my_socket);
237                                 free (realhost);
238                                 cur_ptr->netio_error = NET_ERR_CONN_FAILED;     /* ->strerror(cur_ptr->connectresult) */
239                                 return -1;
240                         }
241                 }
242         }
243         
244         free (realhost);
245         return 0;
246 }
247
248
249 /*
250  * Main network function.
251  * (Now with a useful function description *g*)
252  *
253  * This function returns the HTTP request's body (deflating gzip encoded data
254  * if needed).
255  * Updates passed feed struct with values gathered from webserver.
256  * Handles all redirection and HTTP status decoding.
257  * Returns NULL pointer if no data was received and sets netio_error.
258  */
259 char * NetIO (int * my_socket, char * host, char * url, struct feed * cur_ptr, char * authdata, int httpproto, int suppressoutput) {
260         char netbuf[4096];                      /* Network read buffer. */
261         char *body;                                     /* XML body. */
262         unsigned int length;
263         FILE *stream;                           /* Stream socket. */
264         int chunked = 0;                        /* Content-Encoding: chunked received? */
265         int redirectcount;                      /* Number of HTTP redirects followed. */
266         char httpstatus[4];                     /* HTTP status sent by server. */
267         char servreply[128];                    /* First line of server reply */
268         char *tmpstatus;
269         char *savestart;                        /* Save start position of pointers. */
270         char *tmphost;                          /* Pointers needed to strsep operation. */
271         char *newhost;                          /* New hostname if we need to redirect. */
272         char *newurl;                           /* New document name ". */
273         char *newlocation;
274         char *tmpstring;                        /* Temp pointers. */
275         char *freeme, *freeme2;
276         char *redirecttarget;
277         int retval;
278         int handled;
279         int tmphttpstatus;
280         int inflate = 0;                        /* Whether feed data needs decompressed with zlib. */
281         int len;
282         char * inflatedbody;
283         int quirksmode = 0;                     /* IIS operation mode. */
284         int authfailed = 0;                     /* Avoid repeating failed auth requests endlessly. */
285
286         
287         if (!suppressoutput) {
288                 if (cur_ptr->title == NULL)
289                         fprintf(stderr, "Downloading http://%s%s\n", host, url);
290                 else
291                         fprintf(stderr, "Downloading %s\n", cur_ptr->title);
292
293         }
294         
295         redirectcount = 0;
296         
297         /* Goto label to redirect reconnect. */
298         tryagain:
299         
300         /* Reconstruct digest authinfo for every request so we don't reuse
301            the same nonce value for more than one request.
302            This happens one superflous time on 303 redirects. */
303         if ((cur_ptr->authinfo != NULL) && (cur_ptr->servauth != NULL)) {
304                 if (strstr (cur_ptr->authinfo, " Digest ") != NULL) {
305                         NetSupportAuth(cur_ptr, authdata, url, cur_ptr->servauth);
306                 }
307         }
308         
309         /* Open socket. */      
310         stream = fdopen (*my_socket, "r+");
311         if (stream == NULL) {
312                 /* This is a serious non-continueable OS error as it will probably not
313                    go away if we retry.
314                    
315                    BeOS will stupidly return SUCCESS here making this code silently fail on BeOS. */
316                 cur_ptr->netio_error = NET_ERR_SOCK_ERR;
317                 return NULL;
318         }
319         
320         /* Again is proxyport == 0, non proxy mode, otherwise make proxy requests. */
321         if (proxyport == 0) {
322                 /* Request URL from HTTP server. */
323                 if (cur_ptr->lastmodified != NULL) {
324                         fprintf(stream,
325                                         "GET %s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\nIf-Modified-Since: %s\r\n%s%s\r\n",
326                                         url,
327                                         useragent,
328                                         host,
329                                         cur_ptr->lastmodified,
330                                         (cur_ptr->authinfo ? cur_ptr->authinfo : ""),
331                                         (cur_ptr->cookies ? cur_ptr->cookies : ""));
332                 } else {
333                         fprintf(stream,
334                                         "GET %s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\n%s%s\r\n",
335                                         url,
336                                         useragent,
337                                         host,
338                                         (cur_ptr->authinfo ? cur_ptr->authinfo : ""),
339                                         (cur_ptr->cookies ? cur_ptr->cookies : ""));
340                 }
341                 fflush(stream);         /* We love Solaris, don't we? */
342         } else {
343                 /* Request URL from HTTP server. */
344                 if (cur_ptr->lastmodified != NULL) {
345                         fprintf(stream,
346                                         "GET http://%s%s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\nIf-Modified-Since: %s\r\n%s%s\r\n",
347                                         host,
348                                         url,
349                                         useragent,
350                                         host,
351                                         cur_ptr->lastmodified,
352                                         (cur_ptr->authinfo ? cur_ptr->authinfo : ""),
353                                         (cur_ptr->cookies ? cur_ptr->cookies : ""));
354                 } else {
355                         fprintf(stream,
356                                         "GET http://%s%s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\n%s%s\r\n",
357                                         host,
358                                         url,
359                                         useragent,
360                                         host,
361                                         (cur_ptr->authinfo ? cur_ptr->authinfo : ""),
362                                         (cur_ptr->cookies ? cur_ptr->cookies : ""));
363                 }
364                 fflush(stream);         /* We love Solaris, don't we? */
365         }
366         
367         if ((NetPoll (cur_ptr, my_socket, NET_READ)) == -1) {
368                 fclose (stream);
369                 return NULL;
370         }
371         
372         if ((fgets (servreply, sizeof(servreply), stream)) == NULL) {
373                 fclose (stream);
374                 return NULL;
375         }
376         if (checkValidHTTPHeader(servreply, sizeof(servreply)) != 0) {
377                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
378                 fclose (stream);
379                 return NULL;
380         }
381         
382         tmpstatus = strdup(servreply);
383         savestart = tmpstatus;
384
385         memset (httpstatus, 0, 4);      /* Nullify string so valgrind shuts up. */
386         /* Set pointer to char after first space.
387            HTTP/1.0 200 OK
388                     ^
389            Copy three bytes into httpstatus. */
390         strsep (&tmpstatus, " ");
391         if (tmpstatus == NULL) {
392                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
393                 fclose (stream);
394                 free (savestart);       /* Probably more leaks when doing auth and abort here. */
395                 return NULL;
396         }
397         strncpy (httpstatus, tmpstatus, 3);
398         free (savestart);
399         
400         cur_ptr->lasthttpstatus = atoi (httpstatus);
401         
402         /* If the redirectloop was run newhost and newurl were allocated.
403            We need to free them here. */
404         if ((redirectcount > 0) && (authdata == NULL)) {
405                 free (host);
406                 free (url);
407         }
408         
409         tmphttpstatus = cur_ptr->lasthttpstatus;
410         handled = 1;
411         /* Check HTTP server response and handle redirects. */
412         do {
413                 switch (tmphttpstatus) {
414                         case 200:       /* OK */
415                                 /* Received good status from server, clear problem field. */
416                                 cur_ptr->netio_error = NET_ERR_OK;
417                                 cur_ptr->problem = 0;
418                                 break;
419                         case 300:       /* Multiple choice and everything 300 not handled is fatal. */
420                                 cur_ptr->netio_error = NET_ERR_HTTP_NON_200;
421                                 fclose (stream);
422                                 return NULL;
423                         case 301:
424                                 /* Permanent redirect. Change feed->feedurl to new location.
425                                    Done some way down when we have extracted the new url. */
426                         case 302:       /* Found */
427                         case 303:       /* See Other */
428                         case 307:       /* Temp redirect. This is HTTP/1.1 */
429                                 redirectcount++;
430                         
431                                 /* Give up if we reach MAX_HTTP_REDIRECTS to avoid loops. */
432                                 if (redirectcount > MAX_HTTP_REDIRECTS) {
433                                         cur_ptr->netio_error = NET_ERR_REDIRECT_COUNT_ERR;
434                                         fclose (stream);
435                                         return NULL;
436                                 }
437                                 
438                                 while (!feof(stream)) {
439                                         if ((fgets (netbuf, sizeof(netbuf), stream)) == NULL) {
440                                                 /* Something bad happened. Server sent stupid stuff. */
441                                                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
442                                                 fclose (stream);
443                                                 return NULL;
444                                         }
445                                         
446                                         if (checkValidHTTPHeader(netbuf, sizeof(netbuf)) != 0) {
447                                                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
448                                                 fclose (stream);
449                                                 return NULL;
450                                         }
451         
452                                         /* Split netbuf into hostname and trailing url.
453                                            Place hostname in *newhost and tail into *newurl.
454                                            Close old connection and reconnect to server.
455                                            
456                                            Do not touch any of the following code! :P */
457                                         if (strncasecmp (netbuf, "Location", 8) == 0) {
458                                                 redirecttarget = strdup (netbuf);
459                                                 freeme = redirecttarget;
460                                                 
461                                                 /* Remove trailing \r\n from line. */
462                                                 redirecttarget[strlen(redirecttarget)-2] = 0;
463                                                 
464                                                 /* In theory pointer should now be after the space char
465                                                    after the word "Location:" */
466                                                 strsep (&redirecttarget, " ");
467                                                 
468                                                 if (redirecttarget == NULL) {
469                                                         cur_ptr->problem = 1;
470                                                         cur_ptr->netio_error = NET_ERR_REDIRECT_ERR;
471                                                         free (freeme);
472                                                         fclose (stream);
473                                                         return NULL;
474                                                 }
475                                                 
476                                                 /* Location must start with "http", otherwise switch on quirksmode. */
477                                                 if (strncmp(redirecttarget, "http", 4) != 0)
478                                                         quirksmode = 1;
479                                                 
480                                                 /* If the Location header is invalid we need to construct
481                                                    a correct one here before proceeding with the program.
482                                                    This makes headers like
483                                                    "Location: fuck-the-protocol.rdf" work.
484                                                    In violalation of RFC1945, RFC2616. */
485                                                 if (quirksmode) {
486                                                         len = 7 + strlen(host) + strlen(redirecttarget) + 3;
487                                                         newlocation = malloc(len);
488                                                         memset (newlocation, 0, len);
489                                                         strcat (newlocation, "http://");
490                                                         strcat (newlocation, host);
491                                                         if (redirecttarget[0] != '/')
492                                                                 strcat (newlocation, "/");
493                                                         strcat (newlocation, redirecttarget);
494                                                 } else
495                                                         newlocation = strdup (redirecttarget);
496                                                 
497                                                 /* This also frees redirecttarget. */
498                                                 free (freeme);
499                                                 
500                                                 /* Change cur_ptr->feedurl on 301. */
501                                                 if (cur_ptr->lasthttpstatus == 301) {
502                                                         /* Check for valid redirection URL */
503                                                         if (checkValidHTTPURL(newlocation) != 0) {
504                                                                 cur_ptr->problem = 1;
505                                                                 cur_ptr->netio_error = NET_ERR_REDIRECT_ERR;
506                                                                 fclose (stream);
507                                                                 return NULL;
508                                                         }
509                                                         if (!suppressoutput) {
510                                                                 fprintf(stderr, "URL points to permanent redirect, updating with new location...\n");
511                                                         }
512                                                         free (cur_ptr->feedurl);
513                                                         if (authdata == NULL)
514                                                                 cur_ptr->feedurl = strdup (newlocation);
515                                                         else {
516                                                                 /* Include authdata in newly constructed URL. */
517                                                                 len = strlen(authdata) + strlen(newlocation) + 2;
518                                                                 cur_ptr->feedurl = malloc (len);
519                                                                 newurl = strdup(newlocation);
520                                                                 freeme2 = newurl;
521                                                                 strsep (&newurl, "/");
522                                                                 strsep (&newurl, "/");
523                                                                 snprintf (cur_ptr->feedurl, len, "http://%s@%s", authdata, newurl);
524                                                                 free (freeme2);
525                                                         }
526                                                 }
527                                                 
528                                                 freeme = newlocation;
529                                                 strsep (&newlocation, "/");
530                                                 strsep (&newlocation, "/");
531                                                 tmphost = newlocation;
532                                                 /* The following line \0-terminates tmphost in overwriting the first
533                                                    / after the hostname. */
534                                                 strsep (&newlocation, "/");
535                                                 
536                                                 /* newlocation must now be the absolute path on newhost.
537                                                    If not we've been redirected to somewhere totally stupid
538                                                    (oh yeah, no offsite linking, go to our fucking front page).
539                                                    Say goodbye to the webserver in this case. In fact, we don't
540                                                    even say goodbye, but just drop the connection. */
541                                                 if (newlocation == NULL) {
542                                                         cur_ptr->netio_error = NET_ERR_REDIRECT_ERR;
543                                                         fclose (stream);
544                                                         return NULL;
545                                                 }
546                                                 
547                                                 newhost = strdup (tmphost);
548                                                 newlocation--;
549                                                 newlocation[0] = '/';
550                                                 newurl = strdup (newlocation);
551                                         
552                                                 free (freeme);
553                                                 
554                                                 /* Close connection. */ 
555                                                 fclose (stream);
556                                                 
557                                                 /* Reconnect to server. */
558                                                 if ((NetConnect (my_socket, newhost, cur_ptr, httpproto, suppressoutput)) != 0) {
559                                                         return NULL;
560                                                 }
561                                         
562                                                 host = newhost;
563                                                 url = newurl;
564                                                 
565                                                 goto tryagain;
566                                         }
567                                 }
568                                 break;
569                         case 304:
570                                 /* Not modified received. We can close stream and return from here.
571                                    Not very friendly though. :) */
572                                 fclose (stream);
573                                 /* Received good status from server, clear problem field. */
574                                 cur_ptr->netio_error = NET_ERR_OK;
575                                 cur_ptr->problem = 0;
576                                 
577                                 /* This should be freed everywhere where we return
578                                    and current feed uses auth. */
579                                 if ((redirectcount > 0) && (authdata != NULL)) {
580                                         free (host);
581                                         free (url);
582                                 }
583                                 return NULL;
584                         case 401:
585                                 /* Authorization.
586                                    Parse rest of header and rerequest URL from server using auth mechanism
587                                    requested in WWW-Authenticate header field. (Basic or Digest) */
588                                 break;
589                         case 404:
590                                 cur_ptr->netio_error = NET_ERR_HTTP_404;
591                                 fclose (stream);
592                                 return NULL;
593                         case 410: /* The feed is gone. Politely remind the user to unsubscribe. */
594                                 cur_ptr->netio_error = NET_ERR_HTTP_410;
595                                 fclose (stream);
596                                 return NULL;
597                         case 400:
598                                 cur_ptr->netio_error = NET_ERR_HTTP_NON_200;
599                                 fclose (stream);
600                                 return NULL;
601                         default:
602                                 /* unknown error codes have to be treated like the base class */
603                                 if (handled) {
604                                         /* first pass, modify error code to base class */
605                                         handled = 0;
606                                         tmphttpstatus -= tmphttpstatus % 100;
607                                 } else {
608                                         /* second pass, give up on unknown error base class */
609                                         cur_ptr->netio_error = NET_ERR_HTTP_NON_200;
610                                         fclose (stream);
611                                         return NULL;
612                                 }
613                 }
614         } while(!handled);
615         
616         /* Read rest of HTTP header and parse what we need. */
617         while (!feof(stream)) { 
618                 if ((NetPoll (cur_ptr, my_socket, NET_READ)) == -1) {
619                         fclose (stream);
620                         return NULL;
621                 }
622
623                 if ((fgets (netbuf, sizeof(netbuf), stream)) == NULL)
624                         break;
625                 
626                 if (checkValidHTTPHeader(netbuf, sizeof(netbuf)) != 0) {
627                         cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
628                         fclose (stream);
629                         return NULL;
630                 }
631                 
632                 if (strncasecmp (netbuf, "Transfer-Encoding", 17) == 0) {
633                         /* Chunked transfer encoding. HTTP/1.1 extension.
634                            http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1 */
635                         if (strstr (netbuf, "chunked") != NULL)
636                                 chunked = 1;
637                 }
638                 /* Get last modified date. This is only relevant on HTTP 200. */
639                 if ((strncasecmp (netbuf, "Last-Modified", 13) == 0) &&
640                         (cur_ptr->lasthttpstatus == 200)) {
641                         tmpstring = strdup(netbuf);
642                         freeme = tmpstring;
643                         strsep (&tmpstring, " ");
644                         if (tmpstring == NULL)
645                                 free (freeme);
646                         else {
647                                 free(cur_ptr->lastmodified);
648                                 cur_ptr->lastmodified = strdup(tmpstring);
649                                 if (cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] == '\n')
650                                         cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] = '\0';
651                                 if (cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] == '\r')
652                                         cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] = '\0';
653                                 free(freeme);
654                         }
655                 }
656                 if (strncasecmp (netbuf, "Content-Encoding", 16) == 0) {
657                         if (strstr (netbuf, "gzip") != NULL)
658                                 inflate = 1;
659                 }
660                 if (strncasecmp (netbuf, "Content-Type", 12) == 0) {
661                         tmpstring = strdup(netbuf);
662                         freeme = tmpstring;
663                         strsep(&tmpstring, " ");
664                         if (tmpstring == NULL)
665                                 free (freeme);
666                         else {
667                                 freeme2 = NULL;
668                                 freeme2 = strstr(tmpstring, ";");
669                                 if (freeme2 != NULL)
670                                         freeme2[0] = '\0';
671                                 free(cur_ptr->content_type);
672                                 cur_ptr->content_type = strdup(tmpstring);
673                                 if (cur_ptr->content_type[strlen(cur_ptr->content_type)-1] == '\n')
674                                         cur_ptr->content_type[strlen(cur_ptr->content_type)-1] = '\0';
675                                 if (cur_ptr->content_type[strlen(cur_ptr->content_type)-1] == '\r')
676                                         cur_ptr->content_type[strlen(cur_ptr->content_type)-1] = '\0';
677                                 free(freeme);
678                         }
679                 }
680                 /* HTTP authentication
681                  *
682                  * RFC 2617 */
683                 if ((strncasecmp (netbuf, "WWW-Authenticate", 16) == 0) &&
684                         (cur_ptr->lasthttpstatus == 401)) {
685                         if (authfailed) {
686                                 /* Don't repeat authrequest if it already failed before! */
687                                 cur_ptr->netio_error = NET_ERR_AUTH_FAILED;
688                                 fclose (stream);
689                                 return NULL;
690                         }
691
692                         /* Remove trailing \r\n from line. */
693                         if (netbuf[strlen(netbuf)-1] == '\n')
694                                 netbuf[strlen(netbuf)-1] = '\0';
695                         if (netbuf[strlen(netbuf)-1] == '\r')
696                                 netbuf[strlen(netbuf)-1] = '\0';
697                         
698                         authfailed++;
699                         
700                         /* Make a copy of the WWW-Authenticate header. We use it to
701                            reconstruct a new auth reply on every loop. */
702                         free (cur_ptr->servauth);
703                         
704                         cur_ptr->servauth = strdup (netbuf);
705                         
706                         /* Load authinfo into cur_ptr->authinfo. */
707                         retval = NetSupportAuth(cur_ptr, authdata, url, netbuf);
708                         
709                         switch (retval) {
710                                 case 1:
711                                         cur_ptr->netio_error = NET_ERR_AUTH_NO_AUTHINFO;
712                                         fclose (stream);
713                                         return NULL;
714                                         break;
715                                 case 2:
716                                         cur_ptr->netio_error = NET_ERR_AUTH_GEN_AUTH_ERR;
717                                         fclose (stream);
718                                         return NULL;
719                                         break;
720                                 case -1:
721                                         cur_ptr->netio_error = NET_ERR_AUTH_UNSUPPORTED;
722                                         fclose (stream);
723                                         return NULL;
724                                         break;
725                                 default:
726                                         break;
727                         }
728                         
729                         /* Close current connection and reconnect to server. */
730                         fclose (stream);
731                         if ((NetConnect (my_socket, host, cur_ptr, httpproto, suppressoutput)) != 0) {
732                                 return NULL;
733                         }
734
735                         /* Now that we have an authinfo, repeat the current request. */
736                         goto tryagain;
737                 }
738                 /* This seems to be optional and probably not worth the effort since we
739                    don't issue a lot of consecutive requests. */
740                 /*if ((strncasecmp (netbuf, "Authentication-Info", 19) == 0) ||
741                         (cur_ptr->lasthttpstatus == 200)) {
742                 
743                 }*/
744                 
745                 /* HTTP RFC 2616, Section 19.3 Tolerant Applications.
746                    Accept CRLF and LF line ends in the header field. */
747                 if ((strcmp(netbuf, "\r\n") == 0) || (strcmp(netbuf, "\n") == 0))
748                         break;
749         }
750         
751         /* If the redirectloop was run newhost and newurl were allocated.
752            We need to free them here.
753            But _after_ the authentication code since it needs these values! */
754         if ((redirectcount > 0) && (authdata != NULL)) {
755                 free (host);
756                 free (url);
757         }
758         
759         /**********************
760          * End of HTTP header *
761          **********************/
762         
763         /* Init pointer so strncat works.
764            Workaround class hack. */
765         body = malloc(1);
766         body[0] = '\0';
767         
768         length = 0;
769
770         /* Read stream until EOF and return it to parent. */
771         while (!feof(stream)) {
772                 if ((NetPoll (cur_ptr, my_socket, NET_READ)) == -1) {
773                         fclose (stream);
774                         return NULL;
775                 }
776                 
777                 /* Since we handle binary data if we read compressed input we
778                    need to use fread instead of fgets after reading the header. */ 
779                 retval = fread (netbuf, 1, sizeof(netbuf), stream);
780                 if (retval == 0)
781                         break;
782                 body = realloc (body, length+retval);
783                 memcpy (body+length, netbuf, retval);
784                 length += retval;
785                 if (retval != 4096)
786                         break;
787         }
788         body = realloc(body, length+1);
789         body[length] = '\0';
790         
791         cur_ptr->content_length = length;
792         
793         /* Close connection. */
794         fclose (stream);
795         
796         if (chunked) {
797                 if (decodechunked(body, &length) == NULL) {
798                         free (body);
799                         cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
800                         return NULL;
801                 }
802         }
803         
804         /* If inflate==1 we need to decompress the content.. */
805         if (inflate == 1) {
806                 /* gzipinflate */
807                 /*inflatedbody = gzip_uncompress (body, length, &cur_ptr->content_length);
808                 if (inflatedbody == NULL) {
809                         free (body);
810                         cur_ptr->netio_error = NET_ERR_GZIP_ERR;
811                         return NULL;
812                 }*/
813                 if (jg_gzip_uncompress (body, length, (void **)&inflatedbody, &cur_ptr->content_length) != 0) {
814                         free (body);
815                         cur_ptr->netio_error = NET_ERR_GZIP_ERR;
816                         return NULL;
817                 }
818                 
819                 /* Copy uncompressed data back to body. */
820                 free (body);
821                 body = inflatedbody;
822         }
823         
824         return body;
825 }
826
827 /* Returns allocated string with body of webserver reply.
828    Various status info put into struct feed *cur_ptr.
829    Set suppressoutput=1 to disable diagnostic output. */
830 char *DownloadFeed(char *url, struct feed *cur_ptr, int suppressoutput) {
831         int my_socket = 0;
832         int url_fixup = 0;
833         char *host;                             /* Needs to freed. */
834         char *tmphost;
835         char *freeme;
836         char *returndata;
837         char *authdata = NULL;
838         char *tmpstr;
839         int httpproto = 0;                      /* 0: http; 1: https */
840         
841         if (checkValidHTTPURL(url) != 0) {
842                 cur_ptr->problem = 1;
843                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
844                 return NULL;
845         }
846         /* strstr will match _any_ substring. Not good, use strncasecmp with length 5! */
847         if (strncasecmp (url, "https", 5) == 0)
848                 httpproto = 1;
849         else
850                 httpproto = 0;
851         
852         strsep (&url, "/");
853         strsep (&url, "/");
854         tmphost = url;
855         strsep (&url, "/");
856         if (url == NULL) {
857                 /* Assume "/" is input is exhausted. */
858                 url = strdup("/");
859                 url_fixup = 1;
860         }
861         
862         /* If tmphost contains an '@', extract username and pwd. */
863         if (strchr (tmphost, '@') != NULL) {
864                 tmpstr = tmphost;
865                 strsep (&tmphost, "@");
866                 authdata = strdup (tmpstr);
867         }
868         
869         host = strdup (tmphost);
870         
871         /* netio() might change pointer of host to something else if redirect
872            loop is executed. Make a copy so we can correctly free everything. */
873         freeme = host;
874         /* Only run if url was != NULL above. */
875         if (!url_fixup) {
876                 url--;
877                 url[0] = '/';
878                 if (url[strlen(url)-1] == '\n') {
879                         url[strlen(url)-1] = '\0';
880                 }
881         }
882         
883         if ((NetConnect (&my_socket, host, cur_ptr, httpproto, suppressoutput)) != 0) {
884                 free (freeme);
885                 free (authdata);
886                 if (url_fixup)
887                         free(url);
888                 cur_ptr->problem = 1;
889                 return NULL;
890         }
891         returndata = NetIO (&my_socket, host, url, cur_ptr, authdata, httpproto, suppressoutput);
892         if ((returndata == NULL) && (cur_ptr->netio_error != NET_ERR_OK)) {
893                 cur_ptr->problem = 1;
894         }
895         
896         /* url will be freed in the calling function. */
897         free (freeme);          /* This is *host. */
898         free (authdata);
899         if (url_fixup)
900                 free(url);
901         
902         return returndata;
903 }