]> code.citadel.org Git - citadel.git/blob - rss2ctdl/netio.c
347854d092cb9bedbb952898c85c3b9734cb70b7
[citadel.git] / rss2ctdl / netio.c
1 /*
2  * $Id$
3  * 
4  * Copyright 2003-2004 Oliver Feiler <kiza@kcore.de>
5  *
6  * netio.c
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  */
22
23 /* OS X needs this, otherwise socklen_t is not defined. */
24 #ifdef __APPLE__
25 #       define _BSD_SOCKLEN_T_
26 #endif
27
28 /* BeOS does not define socklen_t. Using uint as suggested by port creator. */
29 #ifdef __BEOS__
30 #       define socklen_t unsigned int
31 #endif
32
33 #include <string.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <unistd.h>
39 //#include <fcntl.h>
40 #include <sys/types.h>
41 #include <sys/socket.h>
42 #include <netinet/in.h>
43 #include <netdb.h>
44 #include <sys/stat.h>
45 #include <sys/time.h>
46 #include <assert.h>
47
48 #include "config.h"
49 #include "main.h"
50 #include "conversions.h"
51 #include "net-support.h"
52 #include "io-internal.h"
53 #include "zlib_interface.h"
54
55 static int const MAX_HTTP_REDIRECTS = 10;       /* Maximum number of redirects we will follow. */
56 static int const NET_TIMEOUT = 20;                      /* Global network timeout in sec */
57 static int const NET_READ = 1;
58 static int const NET_WRITE = 2;
59
60 extern char *proxyname;                                         /* Hostname of proxyserver. */
61 extern unsigned short proxyport;                        /* Port on proxyserver to use. */
62
63 /* Masquerade as Firefox on Linux to increase the share of both in web server statistics. */
64 /* char *useragent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0"; */
65 /* On second thought, let's be honest about this. */
66 char *useragent = "Citadel RSS Service/0.1 (multiple subscribers) (+http://www.citadel.org/)";
67
68 /* Waits NET_TIMEOUT seconds for the socket to return data.
69  *
70  * Returns
71  *
72  *      0       Socket is ready
73  *      -1      Error occured (netio_error is set)
74  */
75 int NetPoll (struct feed * cur_ptr, int * my_socket, int rw) {
76         fd_set rfdsr;
77         fd_set rfdsw;
78         struct timeval tv;
79         int retval;                             /* FD_ISSET + assert == Heisenbug? */
80         
81         /* Set global network timeout */
82         tv.tv_sec = NET_TIMEOUT;
83         tv.tv_usec = 0;
84         
85         FD_ZERO(&rfdsr);
86         FD_ZERO(&rfdsw);
87         
88         if (rw == NET_READ) {
89                 FD_SET(*my_socket, &rfdsr);
90                 if (select (*my_socket+1, &rfdsr, NULL, NULL, &tv) == 0) {
91                         /* Timed out */
92                         cur_ptr->netio_error = NET_ERR_TIMEOUT;
93                         return -1;
94                 }
95                 retval = FD_ISSET (*my_socket, &rfdsr);
96                 assert (retval);
97                 if (!retval) {
98                         /* Wtf? */
99                         cur_ptr->netio_error = NET_ERR_UNKNOWN;
100                         return -1;
101                 }
102         } else if (rw == NET_WRITE) {
103                 FD_SET(*my_socket, &rfdsw);
104                 if (select (*my_socket+1, NULL, &rfdsw, NULL, &tv) == 0) {
105                         /* Timed out */
106                         cur_ptr->netio_error = NET_ERR_TIMEOUT;
107                         return -1;
108                 }
109                 retval = FD_ISSET (*my_socket, &rfdsw);
110                 assert (retval);
111                 if (!retval) {
112                         /* Wtf? */
113                         cur_ptr->netio_error = NET_ERR_UNKNOWN;
114                         return -1;
115                 }
116         } else {
117                 cur_ptr->netio_error = NET_ERR_UNKNOWN;
118                 return -1;
119         }
120         
121         return 0;
122 }
123
124
125 /* Connect network sockets.
126  *
127  * Returns
128  *
129  *      0       Connected
130  *      -1      Error occured (netio_error is set)
131  */
132 int NetConnect (int * my_socket, char * host, struct feed * cur_ptr, int httpproto, int suppressoutput) {
133         char tmp[512];
134         struct sockaddr_in address;     
135         struct hostent *remotehost;
136         socklen_t len;
137         char *realhost;
138         unsigned short port;
139         
140         realhost = strdup(host);
141         if (sscanf (host, "%[^:]:%hd", realhost, &port) != 2) {
142                 port = 80;
143         }
144         
145         /* Create a inet stream TCP socket. */
146         *my_socket = socket (AF_INET, SOCK_STREAM, 0);
147         if (*my_socket == -1) {
148                 cur_ptr->netio_error = NET_ERR_SOCK_ERR;
149                 return -1;
150         }
151         
152         /* If proxyport is 0 we didn't execute the if http_proxy statement in main
153            so there is no proxy. On any other value of proxyport do proxyrequests instead. */
154         if (proxyport == 0) {
155                 /* Lookup remote IP. */
156                 remotehost = gethostbyname (realhost);
157                 if (remotehost == NULL) {
158                         close (*my_socket);
159                         free (realhost);
160                         cur_ptr->netio_error = NET_ERR_HOST_NOT_FOUND;
161                         return -1;
162                 }
163                 
164                 /* Set the remote address. */
165                 address.sin_family = AF_INET;
166                 address.sin_port = htons(port);
167                 memcpy (&address.sin_addr.s_addr, remotehost->h_addr_list[0], remotehost->h_length);
168                         
169                 /* Connect socket. */
170                 cur_ptr->connectresult = connect (*my_socket, (struct sockaddr *) &address, sizeof(address));
171                 
172                 /* Check if we're already connected.
173                    BSDs will return 0 on connect even in nonblock if connect was fast enough. */
174                 if (cur_ptr->connectresult != 0) {
175                         /* If errno is not EINPROGRESS, the connect went wrong. */
176                         if (errno != EINPROGRESS) {
177                                 close (*my_socket);
178                                 free (realhost);
179                                 cur_ptr->netio_error = NET_ERR_CONN_REFUSED;
180                                 return -1;
181                         }
182                         
183                         if ((NetPoll (cur_ptr, my_socket, NET_WRITE)) == -1) {
184                                 close (*my_socket);
185                                 free (realhost);
186                                 return -1;
187                         }
188                         
189                         /* We get errno of connect back via getsockopt SO_ERROR (into connectresult). */
190                         len = sizeof(cur_ptr->connectresult);
191                         getsockopt(*my_socket, SOL_SOCKET, SO_ERROR, &cur_ptr->connectresult, &len);
192                         
193                         if (cur_ptr->connectresult != 0) {
194                                 close (*my_socket);
195                                 free (realhost);
196                                 cur_ptr->netio_error = NET_ERR_CONN_FAILED;     /* ->strerror(cur_ptr->connectresult) */
197                                 return -1;
198                         }
199                 }
200         } else {
201                 /* Lookup proxyserver IP. */
202                 remotehost = gethostbyname (proxyname);
203                 if (remotehost == NULL) {
204                         close (*my_socket);
205                         free (realhost);
206                         cur_ptr->netio_error = NET_ERR_HOST_NOT_FOUND;
207                         return -1;
208                 }
209                 
210                 /* Set the remote address. */
211                 address.sin_family = AF_INET;
212                 address.sin_port = htons(proxyport);
213                 memcpy (&address.sin_addr.s_addr, remotehost->h_addr_list[0], remotehost->h_length);
214                 
215                 /* Connect socket. */
216                 cur_ptr->connectresult = connect (*my_socket, (struct sockaddr *) &address, sizeof(address));
217                 
218                 /* Check if we're already connected.
219                    BSDs will return 0 on connect even in nonblock if connect was fast enough. */
220                 if (cur_ptr->connectresult != 0) {
221                         if (errno != EINPROGRESS) {
222                                 close (*my_socket);
223                                 free (realhost);
224                                 cur_ptr->netio_error = NET_ERR_CONN_REFUSED;
225                                 return -1;
226                         }
227                 
228                         if ((NetPoll (cur_ptr, my_socket, NET_WRITE)) == -1) {
229                                 close (*my_socket);
230                                 free (realhost);
231                                 return -1;
232                         }
233                         
234                         len = sizeof(cur_ptr->connectresult);
235                         getsockopt(*my_socket, SOL_SOCKET, SO_ERROR, &cur_ptr->connectresult, &len);
236                         
237                         if (cur_ptr->connectresult != 0) {
238                                 close (*my_socket);
239                                 free (realhost);
240                                 cur_ptr->netio_error = NET_ERR_CONN_FAILED;     /* ->strerror(cur_ptr->connectresult) */
241                                 return -1;
242                         }
243                 }
244         }
245         
246         free (realhost);
247         return 0;
248 }
249
250
251 /*
252  * Main network function.
253  * (Now with a useful function description *g*)
254  *
255  * This function returns the HTTP request's body (deflating gzip encoded data
256  * if needed).
257  * Updates passed feed struct with values gathered from webserver.
258  * Handles all redirection and HTTP status decoding.
259  * Returns NULL pointer if no data was received and sets netio_error.
260  */
261 char * NetIO (int * my_socket, char * host, char * url, struct feed * cur_ptr, char * authdata, int httpproto, int suppressoutput) {
262         char netbuf[4096];                      /* Network read buffer. */
263         char *body;                                     /* XML body. */
264         unsigned int length;
265         FILE *stream;                           /* Stream socket. */
266         int chunked = 0;                        /* Content-Encoding: chunked received? */
267         int redirectcount;                      /* Number of HTTP redirects followed. */
268         char httpstatus[4];                     /* HTTP status sent by server. */
269         char servreply[128];                    /* First line of server reply */
270         char *tmpstatus;
271         char *savestart;                        /* Save start position of pointers. */
272         char *tmphost;                          /* Pointers needed to strsep operation. */
273         char *newhost;                          /* New hostname if we need to redirect. */
274         char *newurl;                           /* New document name ". */
275         char *newlocation;
276         char *tmpstring;                        /* Temp pointers. */
277         char *freeme, *freeme2;
278         char *redirecttarget;
279         int retval;
280         int handled;
281         int tmphttpstatus;
282         int inflate = 0;                        /* Whether feed data needs decompressed with zlib. */
283         int len;
284         char * inflatedbody;
285         int quirksmode = 0;                     /* IIS operation mode. */
286         int authfailed = 0;                     /* Avoid repeating failed auth requests endlessly. */
287
288         
289         if (!suppressoutput) {
290                 if (cur_ptr->title == NULL)
291                         fprintf(stderr, "Downloading http://%s%s\n", host, url);
292                 else
293                         fprintf(stderr, "Downloading %s\n", cur_ptr->title);
294
295         }
296         
297         redirectcount = 0;
298         
299         /* Goto label to redirect reconnect. */
300         tryagain:
301         
302         /* Reconstruct digest authinfo for every request so we don't reuse
303            the same nonce value for more than one request.
304            This happens one superflous time on 303 redirects. */
305         if ((cur_ptr->authinfo != NULL) && (cur_ptr->servauth != NULL)) {
306                 if (strstr (cur_ptr->authinfo, " Digest ") != NULL) {
307                         NetSupportAuth(cur_ptr, authdata, url, cur_ptr->servauth);
308                 }
309         }
310         
311         /* Open socket. */      
312         stream = fdopen (*my_socket, "r+");
313         if (stream == NULL) {
314                 /* This is a serious non-continueable OS error as it will probably not
315                    go away if we retry.
316                    
317                    BeOS will stupidly return SUCCESS here making this code silently fail on BeOS. */
318                 cur_ptr->netio_error = NET_ERR_SOCK_ERR;
319                 return NULL;
320         }
321         
322         /* Again is proxyport == 0, non proxy mode, otherwise make proxy requests. */
323         if (proxyport == 0) {
324                 /* Request URL from HTTP server. */
325                 if (cur_ptr->lastmodified != NULL) {
326                         fprintf(stream,
327                                         "GET %s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\nIf-Modified-Since: %s\r\n%s%s\r\n",
328                                         url,
329                                         useragent,
330                                         host,
331                                         cur_ptr->lastmodified,
332                                         (cur_ptr->authinfo ? cur_ptr->authinfo : ""),
333                                         (cur_ptr->cookies ? cur_ptr->cookies : ""));
334                 } else {
335                         fprintf(stream,
336                                         "GET %s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\n%s%s\r\n",
337                                         url,
338                                         useragent,
339                                         host,
340                                         (cur_ptr->authinfo ? cur_ptr->authinfo : ""),
341                                         (cur_ptr->cookies ? cur_ptr->cookies : ""));
342                 }
343                 fflush(stream);         /* We love Solaris, don't we? */
344         } else {
345                 /* Request URL from HTTP server. */
346                 if (cur_ptr->lastmodified != NULL) {
347                         fprintf(stream,
348                                         "GET http://%s%s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\nIf-Modified-Since: %s\r\n%s%s\r\n",
349                                         host,
350                                         url,
351                                         useragent,
352                                         host,
353                                         cur_ptr->lastmodified,
354                                         (cur_ptr->authinfo ? cur_ptr->authinfo : ""),
355                                         (cur_ptr->cookies ? cur_ptr->cookies : ""));
356                 } else {
357                         fprintf(stream,
358                                         "GET http://%s%s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\n%s%s\r\n",
359                                         host,
360                                         url,
361                                         useragent,
362                                         host,
363                                         (cur_ptr->authinfo ? cur_ptr->authinfo : ""),
364                                         (cur_ptr->cookies ? cur_ptr->cookies : ""));
365                 }
366                 fflush(stream);         /* We love Solaris, don't we? */
367         }
368         
369         if ((NetPoll (cur_ptr, my_socket, NET_READ)) == -1) {
370                 fclose (stream);
371                 return NULL;
372         }
373         
374         if ((fgets (servreply, sizeof(servreply), stream)) == NULL) {
375                 fclose (stream);
376                 return NULL;
377         }
378         if (checkValidHTTPHeader(servreply, sizeof(servreply)) != 0) {
379                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
380                 fclose (stream);
381                 return NULL;
382         }
383         
384         tmpstatus = strdup(servreply);
385         savestart = tmpstatus;
386
387         memset (httpstatus, 0, 4);      /* Nullify string so valgrind shuts up. */
388         /* Set pointer to char after first space.
389            HTTP/1.0 200 OK
390                     ^
391            Copy three bytes into httpstatus. */
392         strsep (&tmpstatus, " ");
393         if (tmpstatus == NULL) {
394                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
395                 fclose (stream);
396                 free (savestart);       /* Probably more leaks when doing auth and abort here. */
397                 return NULL;
398         }
399         strncpy (httpstatus, tmpstatus, 3);
400         free (savestart);
401         
402         cur_ptr->lasthttpstatus = atoi (httpstatus);
403         
404         /* If the redirectloop was run newhost and newurl were allocated.
405            We need to free them here. */
406         if ((redirectcount > 0) && (authdata == NULL)) {
407                 free (host);
408                 free (url);
409         }
410         
411         tmphttpstatus = cur_ptr->lasthttpstatus;
412         handled = 1;
413         /* Check HTTP server response and handle redirects. */
414         do {
415                 switch (tmphttpstatus) {
416                         case 200:       /* OK */
417                                 /* Received good status from server, clear problem field. */
418                                 cur_ptr->netio_error = NET_ERR_OK;
419                                 cur_ptr->problem = 0;
420                                 break;
421                         case 300:       /* Multiple choice and everything 300 not handled is fatal. */
422                                 cur_ptr->netio_error = NET_ERR_HTTP_NON_200;
423                                 fclose (stream);
424                                 return NULL;
425                         case 301:
426                                 /* Permanent redirect. Change feed->feedurl to new location.
427                                    Done some way down when we have extracted the new url. */
428                         case 302:       /* Found */
429                         case 303:       /* See Other */
430                         case 307:       /* Temp redirect. This is HTTP/1.1 */
431                                 redirectcount++;
432                         
433                                 /* Give up if we reach MAX_HTTP_REDIRECTS to avoid loops. */
434                                 if (redirectcount > MAX_HTTP_REDIRECTS) {
435                                         cur_ptr->netio_error = NET_ERR_REDIRECT_COUNT_ERR;
436                                         fclose (stream);
437                                         return NULL;
438                                 }
439                                 
440                                 while (!feof(stream)) {
441                                         if ((fgets (netbuf, sizeof(netbuf), stream)) == NULL) {
442                                                 /* Something bad happened. Server sent stupid stuff. */
443                                                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
444                                                 fclose (stream);
445                                                 return NULL;
446                                         }
447                                         
448                                         if (checkValidHTTPHeader(netbuf, sizeof(netbuf)) != 0) {
449                                                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
450                                                 fclose (stream);
451                                                 return NULL;
452                                         }
453         
454                                         /* Split netbuf into hostname and trailing url.
455                                            Place hostname in *newhost and tail into *newurl.
456                                            Close old connection and reconnect to server.
457                                            
458                                            Do not touch any of the following code! :P */
459                                         if (strncasecmp (netbuf, "Location", 8) == 0) {
460                                                 redirecttarget = strdup (netbuf);
461                                                 freeme = redirecttarget;
462                                                 
463                                                 /* Remove trailing \r\n from line. */
464                                                 redirecttarget[strlen(redirecttarget)-2] = 0;
465                                                 
466                                                 /* In theory pointer should now be after the space char
467                                                    after the word "Location:" */
468                                                 strsep (&redirecttarget, " ");
469                                                 
470                                                 if (redirecttarget == NULL) {
471                                                         cur_ptr->problem = 1;
472                                                         cur_ptr->netio_error = NET_ERR_REDIRECT_ERR;
473                                                         free (freeme);
474                                                         fclose (stream);
475                                                         return NULL;
476                                                 }
477                                                 
478                                                 /* Location must start with "http", otherwise switch on quirksmode. */
479                                                 if (strncmp(redirecttarget, "http", 4) != 0)
480                                                         quirksmode = 1;
481                                                 
482                                                 /* If the Location header is invalid we need to construct
483                                                    a correct one here before proceeding with the program.
484                                                    This makes headers like
485                                                    "Location: fuck-the-protocol.rdf" work.
486                                                    In violalation of RFC1945, RFC2616. */
487                                                 if (quirksmode) {
488                                                         len = 7 + strlen(host) + strlen(redirecttarget) + 3;
489                                                         newlocation = malloc(len);
490                                                         memset (newlocation, 0, len);
491                                                         strcat (newlocation, "http://");
492                                                         strcat (newlocation, host);
493                                                         if (redirecttarget[0] != '/')
494                                                                 strcat (newlocation, "/");
495                                                         strcat (newlocation, redirecttarget);
496                                                 } else
497                                                         newlocation = strdup (redirecttarget);
498                                                 
499                                                 /* This also frees redirecttarget. */
500                                                 free (freeme);
501                                                 
502                                                 /* Change cur_ptr->feedurl on 301. */
503                                                 if (cur_ptr->lasthttpstatus == 301) {
504                                                         /* Check for valid redirection URL */
505                                                         if (checkValidHTTPURL(newlocation) != 0) {
506                                                                 cur_ptr->problem = 1;
507                                                                 cur_ptr->netio_error = NET_ERR_REDIRECT_ERR;
508                                                                 fclose (stream);
509                                                                 return NULL;
510                                                         }
511                                                         if (!suppressoutput) {
512                                                                 fprintf(stderr, "URL points to permanent redirect, updating with new location...\n");
513                                                         }
514                                                         free (cur_ptr->feedurl);
515                                                         if (authdata == NULL)
516                                                                 cur_ptr->feedurl = strdup (newlocation);
517                                                         else {
518                                                                 /* Include authdata in newly constructed URL. */
519                                                                 len = strlen(authdata) + strlen(newlocation) + 2;
520                                                                 cur_ptr->feedurl = malloc (len);
521                                                                 newurl = strdup(newlocation);
522                                                                 freeme2 = newurl;
523                                                                 strsep (&newurl, "/");
524                                                                 strsep (&newurl, "/");
525                                                                 snprintf (cur_ptr->feedurl, len, "http://%s@%s", authdata, newurl);
526                                                                 free (freeme2);
527                                                         }
528                                                 }
529                                                 
530                                                 freeme = newlocation;
531                                                 strsep (&newlocation, "/");
532                                                 strsep (&newlocation, "/");
533                                                 tmphost = newlocation;
534                                                 /* The following line \0-terminates tmphost in overwriting the first
535                                                    / after the hostname. */
536                                                 strsep (&newlocation, "/");
537                                                 
538                                                 /* newlocation must now be the absolute path on newhost.
539                                                    If not we've been redirected to somewhere totally stupid
540                                                    (oh yeah, no offsite linking, go to our fucking front page).
541                                                    Say goodbye to the webserver in this case. In fact, we don't
542                                                    even say goodbye, but just drop the connection. */
543                                                 if (newlocation == NULL) {
544                                                         cur_ptr->netio_error = NET_ERR_REDIRECT_ERR;
545                                                         fclose (stream);
546                                                         return NULL;
547                                                 }
548                                                 
549                                                 newhost = strdup (tmphost);
550                                                 newlocation--;
551                                                 newlocation[0] = '/';
552                                                 newurl = strdup (newlocation);
553                                         
554                                                 free (freeme);
555                                                 
556                                                 /* Close connection. */ 
557                                                 fclose (stream);
558                                                 
559                                                 /* Reconnect to server. */
560                                                 if ((NetConnect (my_socket, newhost, cur_ptr, httpproto, suppressoutput)) != 0) {
561                                                         return NULL;
562                                                 }
563                                         
564                                                 host = newhost;
565                                                 url = newurl;
566                                                 
567                                                 goto tryagain;
568                                         }
569                                 }
570                                 break;
571                         case 304:
572                                 /* Not modified received. We can close stream and return from here.
573                                    Not very friendly though. :) */
574                                 fclose (stream);
575                                 /* Received good status from server, clear problem field. */
576                                 cur_ptr->netio_error = NET_ERR_OK;
577                                 cur_ptr->problem = 0;
578                                 
579                                 /* This should be freed everywhere where we return
580                                    and current feed uses auth. */
581                                 if ((redirectcount > 0) && (authdata != NULL)) {
582                                         free (host);
583                                         free (url);
584                                 }
585                                 return NULL;
586                         case 401:
587                                 /* Authorization.
588                                    Parse rest of header and rerequest URL from server using auth mechanism
589                                    requested in WWW-Authenticate header field. (Basic or Digest) */
590                                 break;
591                         case 404:
592                                 cur_ptr->netio_error = NET_ERR_HTTP_404;
593                                 fclose (stream);
594                                 return NULL;
595                         case 410: /* The feed is gone. Politely remind the user to unsubscribe. */
596                                 cur_ptr->netio_error = NET_ERR_HTTP_410;
597                                 fclose (stream);
598                                 return NULL;
599                         case 400:
600                                 cur_ptr->netio_error = NET_ERR_HTTP_NON_200;
601                                 fclose (stream);
602                                 return NULL;
603                         default:
604                                 /* unknown error codes have to be treated like the base class */
605                                 if (handled) {
606                                         /* first pass, modify error code to base class */
607                                         handled = 0;
608                                         tmphttpstatus -= tmphttpstatus % 100;
609                                 } else {
610                                         /* second pass, give up on unknown error base class */
611                                         cur_ptr->netio_error = NET_ERR_HTTP_NON_200;
612                                         fclose (stream);
613                                         return NULL;
614                                 }
615                 }
616         } while(!handled);
617         
618         /* Read rest of HTTP header and parse what we need. */
619         while (!feof(stream)) { 
620                 if ((NetPoll (cur_ptr, my_socket, NET_READ)) == -1) {
621                         fclose (stream);
622                         return NULL;
623                 }
624
625                 if ((fgets (netbuf, sizeof(netbuf), stream)) == NULL)
626                         break;
627                 
628                 if (checkValidHTTPHeader(netbuf, sizeof(netbuf)) != 0) {
629                         cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
630                         fclose (stream);
631                         return NULL;
632                 }
633                 
634                 if (strncasecmp (netbuf, "Transfer-Encoding", 17) == 0) {
635                         /* Chunked transfer encoding. HTTP/1.1 extension.
636                            http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1 */
637                         if (strstr (netbuf, "chunked") != NULL)
638                                 chunked = 1;
639                 }
640                 /* Get last modified date. This is only relevant on HTTP 200. */
641                 if ((strncasecmp (netbuf, "Last-Modified", 13) == 0) &&
642                         (cur_ptr->lasthttpstatus == 200)) {
643                         tmpstring = strdup(netbuf);
644                         freeme = tmpstring;
645                         strsep (&tmpstring, " ");
646                         if (tmpstring == NULL)
647                                 free (freeme);
648                         else {
649                                 free(cur_ptr->lastmodified);
650                                 cur_ptr->lastmodified = strdup(tmpstring);
651                                 if (cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] == '\n')
652                                         cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] = '\0';
653                                 if (cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] == '\r')
654                                         cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] = '\0';
655                                 free(freeme);
656                         }
657                 }
658                 if (strncasecmp (netbuf, "Content-Encoding", 16) == 0) {
659                         if (strstr (netbuf, "gzip") != NULL)
660                                 inflate = 1;
661                 }
662                 if (strncasecmp (netbuf, "Content-Type", 12) == 0) {
663                         tmpstring = strdup(netbuf);
664                         freeme = tmpstring;
665                         strsep(&tmpstring, " ");
666                         if (tmpstring == NULL)
667                                 free (freeme);
668                         else {
669                                 freeme2 = NULL;
670                                 freeme2 = strstr(tmpstring, ";");
671                                 if (freeme2 != NULL)
672                                         freeme2[0] = '\0';
673                                 free(cur_ptr->content_type);
674                                 cur_ptr->content_type = strdup(tmpstring);
675                                 if (cur_ptr->content_type[strlen(cur_ptr->content_type)-1] == '\n')
676                                         cur_ptr->content_type[strlen(cur_ptr->content_type)-1] = '\0';
677                                 if (cur_ptr->content_type[strlen(cur_ptr->content_type)-1] == '\r')
678                                         cur_ptr->content_type[strlen(cur_ptr->content_type)-1] = '\0';
679                                 free(freeme);
680                         }
681                 }
682                 /* HTTP authentication
683                  *
684                  * RFC 2617 */
685                 if ((strncasecmp (netbuf, "WWW-Authenticate", 16) == 0) &&
686                         (cur_ptr->lasthttpstatus == 401)) {
687                         if (authfailed) {
688                                 /* Don't repeat authrequest if it already failed before! */
689                                 cur_ptr->netio_error = NET_ERR_AUTH_FAILED;
690                                 fclose (stream);
691                                 return NULL;
692                         }
693
694                         /* Remove trailing \r\n from line. */
695                         if (netbuf[strlen(netbuf)-1] == '\n')
696                                 netbuf[strlen(netbuf)-1] = '\0';
697                         if (netbuf[strlen(netbuf)-1] == '\r')
698                                 netbuf[strlen(netbuf)-1] = '\0';
699                         
700                         authfailed++;
701                         
702                         /* Make a copy of the WWW-Authenticate header. We use it to
703                            reconstruct a new auth reply on every loop. */
704                         free (cur_ptr->servauth);
705                         
706                         cur_ptr->servauth = strdup (netbuf);
707                         
708                         /* Load authinfo into cur_ptr->authinfo. */
709                         retval = NetSupportAuth(cur_ptr, authdata, url, netbuf);
710                         
711                         switch (retval) {
712                                 case 1:
713                                         cur_ptr->netio_error = NET_ERR_AUTH_NO_AUTHINFO;
714                                         fclose (stream);
715                                         return NULL;
716                                         break;
717                                 case 2:
718                                         cur_ptr->netio_error = NET_ERR_AUTH_GEN_AUTH_ERR;
719                                         fclose (stream);
720                                         return NULL;
721                                         break;
722                                 case -1:
723                                         cur_ptr->netio_error = NET_ERR_AUTH_UNSUPPORTED;
724                                         fclose (stream);
725                                         return NULL;
726                                         break;
727                                 default:
728                                         break;
729                         }
730                         
731                         /* Close current connection and reconnect to server. */
732                         fclose (stream);
733                         if ((NetConnect (my_socket, host, cur_ptr, httpproto, suppressoutput)) != 0) {
734                                 return NULL;
735                         }
736
737                         /* Now that we have an authinfo, repeat the current request. */
738                         goto tryagain;
739                 }
740                 /* This seems to be optional and probably not worth the effort since we
741                    don't issue a lot of consecutive requests. */
742                 /*if ((strncasecmp (netbuf, "Authentication-Info", 19) == 0) ||
743                         (cur_ptr->lasthttpstatus == 200)) {
744                 
745                 }*/
746                 
747                 /* HTTP RFC 2616, Section 19.3 Tolerant Applications.
748                    Accept CRLF and LF line ends in the header field. */
749                 if ((strcmp(netbuf, "\r\n") == 0) || (strcmp(netbuf, "\n") == 0))
750                         break;
751         }
752         
753         /* If the redirectloop was run newhost and newurl were allocated.
754            We need to free them here.
755            But _after_ the authentication code since it needs these values! */
756         if ((redirectcount > 0) && (authdata != NULL)) {
757                 free (host);
758                 free (url);
759         }
760         
761         /**********************
762          * End of HTTP header *
763          **********************/
764         
765         /* Init pointer so strncat works.
766            Workaround class hack. */
767         body = malloc(1);
768         body[0] = '\0';
769         
770         length = 0;
771
772         /* Read stream until EOF and return it to parent. */
773         while (!feof(stream)) {
774                 if ((NetPoll (cur_ptr, my_socket, NET_READ)) == -1) {
775                         fclose (stream);
776                         return NULL;
777                 }
778                 
779                 /* Since we handle binary data if we read compressed input we
780                    need to use fread instead of fgets after reading the header. */ 
781                 retval = fread (netbuf, 1, sizeof(netbuf), stream);
782                 if (retval == 0)
783                         break;
784                 body = realloc (body, length+retval);
785                 memcpy (body+length, netbuf, retval);
786                 length += retval;
787                 if (retval != 4096)
788                         break;
789         }
790         body = realloc(body, length+1);
791         body[length] = '\0';
792         
793         cur_ptr->content_length = length;
794         
795         /* Close connection. */
796         fclose (stream);
797         
798         if (chunked) {
799                 if (decodechunked(body, &length) == NULL) {
800                         free (body);
801                         cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
802                         return NULL;
803                 }
804         }
805         
806         /* If inflate==1 we need to decompress the content.. */
807         if (inflate == 1) {
808                 /* gzipinflate */
809                 /*inflatedbody = gzip_uncompress (body, length, &cur_ptr->content_length);
810                 if (inflatedbody == NULL) {
811                         free (body);
812                         cur_ptr->netio_error = NET_ERR_GZIP_ERR;
813                         return NULL;
814                 }*/
815                 if (jg_gzip_uncompress (body, length, (void **)&inflatedbody, &cur_ptr->content_length) != 0) {
816                         free (body);
817                         cur_ptr->netio_error = NET_ERR_GZIP_ERR;
818                         return NULL;
819                 }
820                 
821                 /* Copy uncompressed data back to body. */
822                 free (body);
823                 body = inflatedbody;
824         }
825         
826         return body;
827 }
828
829 /* Returns allocated string with body of webserver reply.
830    Various status info put into struct feed *cur_ptr.
831    Set suppressoutput=1 to disable diagnostic output. */
832 char *DownloadFeed(char *url, struct feed *cur_ptr, int suppressoutput) {
833         int my_socket = 0;
834         int url_fixup = 0;
835         char *host;                             /* Needs to freed. */
836         char *tmphost;
837         char *freeme;
838         char *returndata;
839         char *authdata = NULL;
840         char *tmpstr;
841         int httpproto = 0;                      /* 0: http; 1: https */
842         
843         if (checkValidHTTPURL(url) != 0) {
844                 cur_ptr->problem = 1;
845                 cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR;
846                 return NULL;
847         }
848         /* strstr will match _any_ substring. Not good, use strncasecmp with length 5! */
849         if (strncasecmp (url, "https", 5) == 0)
850                 httpproto = 1;
851         else
852                 httpproto = 0;
853         
854         strsep (&url, "/");
855         strsep (&url, "/");
856         tmphost = url;
857         strsep (&url, "/");
858         if (url == NULL) {
859                 /* Assume "/" is input is exhausted. */
860                 url = strdup("/");
861                 url_fixup = 1;
862         }
863         
864         /* If tmphost contains an '@', extract username and pwd. */
865         if (strchr (tmphost, '@') != NULL) {
866                 tmpstr = tmphost;
867                 strsep (&tmphost, "@");
868                 authdata = strdup (tmpstr);
869         }
870         
871         host = strdup (tmphost);
872         
873         /* netio() might change pointer of host to something else if redirect
874            loop is executed. Make a copy so we can correctly free everything. */
875         freeme = host;
876         /* Only run if url was != NULL above. */
877         if (!url_fixup) {
878                 url--;
879                 url[0] = '/';
880                 if (url[strlen(url)-1] == '\n') {
881                         url[strlen(url)-1] = '\0';
882                 }
883         }
884         
885         if ((NetConnect (&my_socket, host, cur_ptr, httpproto, suppressoutput)) != 0) {
886                 free (freeme);
887                 free (authdata);
888                 if (url_fixup)
889                         free(url);
890                 cur_ptr->problem = 1;
891                 return NULL;
892         }
893         returndata = NetIO (&my_socket, host, url, cur_ptr, authdata, httpproto, suppressoutput);
894         if ((returndata == NULL) && (cur_ptr->netio_error != NET_ERR_OK)) {
895                 cur_ptr->problem = 1;
896         }
897         
898         /* url will be freed in the calling function. */
899         free (freeme);          /* This is *host. */
900         free (authdata);
901         if (url_fixup)
902                 free(url);
903         
904         return returndata;
905 }