From 6ecb720a1c5a2b5b4088d250d0287d064e5ff2e7 Mon Sep 17 00:00:00 2001 From: Art Cancro Date: Sat, 1 Jan 2005 17:32:35 +0000 Subject: [PATCH] Initial revision --- rss2ctdl/COPYING.txt | 339 ++++++++++++++ rss2ctdl/Makefile | 12 + rss2ctdl/README.txt | 39 ++ rss2ctdl/config.h | 82 ++++ rss2ctdl/conversions.c | 156 +++++++ rss2ctdl/conversions.h | 31 ++ rss2ctdl/digcalc.c | 123 ++++++ rss2ctdl/digcalc.h | 47 ++ rss2ctdl/do_feeds.sh | 19 + rss2ctdl/io-internal.c | 195 ++++++++ rss2ctdl/io-internal.h | 28 ++ rss2ctdl/main.c | 197 +++++++++ rss2ctdl/main.h | 28 ++ rss2ctdl/md5.c | 270 ++++++++++++ rss2ctdl/md5.h | 56 +++ rss2ctdl/net-support.c | 282 ++++++++++++ rss2ctdl/net-support.h | 30 ++ rss2ctdl/netio.c | 903 ++++++++++++++++++++++++++++++++++++++ rss2ctdl/netio.h | 55 +++ rss2ctdl/os-support.c | 92 ++++ rss2ctdl/os-support.h | 35 ++ rss2ctdl/rss2ctdl | Bin 0 -> 39984 bytes rss2ctdl/setup.h | 35 ++ rss2ctdl/xmlparse.c | 404 +++++++++++++++++ rss2ctdl/xmlparse.h | 33 ++ rss2ctdl/zlib_interface.c | 165 +++++++ rss2ctdl/zlib_interface.h | 22 + 27 files changed, 3678 insertions(+) create mode 100644 rss2ctdl/COPYING.txt create mode 100644 rss2ctdl/Makefile create mode 100644 rss2ctdl/README.txt create mode 100644 rss2ctdl/config.h create mode 100644 rss2ctdl/conversions.c create mode 100644 rss2ctdl/conversions.h create mode 100644 rss2ctdl/digcalc.c create mode 100644 rss2ctdl/digcalc.h create mode 100755 rss2ctdl/do_feeds.sh create mode 100644 rss2ctdl/io-internal.c create mode 100644 rss2ctdl/io-internal.h create mode 100644 rss2ctdl/main.c create mode 100644 rss2ctdl/main.h create mode 100644 rss2ctdl/md5.c create mode 100644 rss2ctdl/md5.h create mode 100644 rss2ctdl/net-support.c create mode 100644 rss2ctdl/net-support.h create mode 100644 rss2ctdl/netio.c create mode 100644 rss2ctdl/netio.h create mode 100644 rss2ctdl/os-support.c create mode 100644 rss2ctdl/os-support.h create mode 100755 rss2ctdl/rss2ctdl create mode 100644 rss2ctdl/setup.h create mode 100644 rss2ctdl/xmlparse.c create mode 100644 rss2ctdl/xmlparse.h create mode 100644 rss2ctdl/zlib_interface.c create mode 100644 rss2ctdl/zlib_interface.h diff --git a/rss2ctdl/COPYING.txt b/rss2ctdl/COPYING.txt new file mode 100644 index 000000000..a43ea2126 --- /dev/null +++ b/rss2ctdl/COPYING.txt @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/rss2ctdl/Makefile b/rss2ctdl/Makefile new file mode 100644 index 000000000..026521307 --- /dev/null +++ b/rss2ctdl/Makefile @@ -0,0 +1,12 @@ +CC=gcc +OBJFILES=zlib_interface.o os-support.o io-internal.o md5.o digcalc.o \ + net-support.o conversions.o xmlparse.o netio.o \ + main.o +CFLAGS=`xml2-config --cflags` +LDFLAGS=`xml2-config --libs` + +rss2ctdl: $(OBJFILES) + $(CC) $(CFLAGS) $(OBJFILES) $(LDFLAGS) -o rss2ctdl + +clean: + rm -f *.o rss2ctdl diff --git a/rss2ctdl/README.txt b/rss2ctdl/README.txt new file mode 100644 index 000000000..3e3faa63d --- /dev/null +++ b/rss2ctdl/README.txt @@ -0,0 +1,39 @@ + RSS2CTDL -- an RSS to Citadel gateway + +Main program (c)2004 by Art Cancro +RSS parser (c)2003-2004 by Oliver Feiler + and Rene Puls + +RSS2CTDL is an RSS-to-Citadel gateway. It allows you to pull external RSS +feeds into Citadel rooms. Feed URL's are polled whenever you run the program. +Each item is converted to a Citadel message and submitted into the network +queue. The message-ID is derived from a unique hash of the GUID tag of +each item. If there is no GUID tag (which, unfortunately, is the case for +the vast majority of feeds) then we hash the title/description/link tags +instead. We then dump it all into the queue and let the loopzapper handle +the dupes. + +We are distributing RSS2CTDL as a standalone utility, only as a temporary +measure. Eventually it will be bundled with the Citadel server, and it will +be invoked by the main server process. At that time, this standalone +distribution will be discontinued. + +RSS2CTDL requires the "libxml2" library, which is probably already installed +on your host system. If not, get it from http://www.xmlsoft.org + +Here's how to make it work: + +1. Run "make" to build the program. + (There is no "configure" and there is no "make install" either. The + makefile will produce an "rss2ctdl" binary, which is all you need.) +2. Edit the "do_feeds.sh" script to your liking. Tell it the feeds you + want to receive, and the rooms you want to deposit them into. +3. Create those rooms if they do not already exist. +4. Configure your crontab to run do_feeds.sh every half hour. (You can go + more or less often if you wish, but once every half hour seems to be the + frequency generally agreed upon in the community to be optimal.) + +Do be aware that rss2ctdl must have write access to $CTDL/network/spoolin +in order to submit its messages into the Citadel spool. In practice, this +generally means that it should be run by the crontab of the user under which +the Citadel service is running ... or by root if you wish. diff --git a/rss2ctdl/config.h b/rss2ctdl/config.h new file mode 100644 index 000000000..807edc471 --- /dev/null +++ b/rss2ctdl/config.h @@ -0,0 +1,82 @@ +/* + * $Id$ + * + * Copyright 2003 Oliver Feiler + * + * config.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include "netio.h" + +/* Set your charset here. ISO-8859-1 is default. */ +#ifndef TARGET_CHARSET +#define TARGET_CHARSET "ISO-8859-1" +#endif + +struct feed { + char *feedurl; /* Non hashified URL */ + char *feed; /* Raw XML */ + int content_length; + char *title; + char *link; + char *description; + char *lastmodified; /* Content of header as sent by the server. */ + int lasthttpstatus; + char *content_type; + netio_error_type netio_error; /* See netio.h */ + int connectresult; /* Socket errno */ + char *cookies; /* Login cookies for this feed. */ + char *authinfo; /* HTTP authinfo string. */ + char *servauth; /* Server supplied authorization header. */ + struct newsitem *items; + int problem; /* Set if there was a problem + * downloading the feed. */ + char *original; /* Original feed title. */ +}; + +struct newsitem { + struct newsdata *data; + struct newsitem *next_ptr, *prev_ptr; /* Pointer to next/prev item in double linked list */ +}; + +struct newsdata { + struct feed *parent; + int readstatus; /* 0: unread, 1: read */ + char *title; + char *link; + char *guid; /* Not always present */ + char *description; +}; + +extern struct feed *first_ptr; + +#ifdef LOCALEPATH +# include +# include +#endif + +#ifdef LOCALEPATH +# define _(String) gettext (String) +#else +# define _(String) (String)s +# define ngettext(Singular, Plural, n) (Plural) +#endif + +#endif diff --git a/rss2ctdl/conversions.c b/rss2ctdl/conversions.c new file mode 100644 index 000000000..62e01c6cb --- /dev/null +++ b/rss2ctdl/conversions.c @@ -0,0 +1,156 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler and + * Rene Puls + * + * conversions.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "setup.h" +#include "conversions.h" +#include "config.h" + +extern struct entity *first_entity; + + +char *base64encode(char const *inbuf, unsigned int inbuf_size) { + static unsigned char const alphabet[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + char *outbuf = NULL; + unsigned int inbuf_pos = 0; + unsigned int outbuf_pos = 0; + unsigned int outbuf_size = 0; + int bits = 0; + int char_count = 0; + + outbuf = malloc(1); + + while (inbuf_pos < inbuf_size) { + + bits |= *inbuf; + char_count++; + + if (char_count == 3) { + outbuf = realloc(outbuf, outbuf_size+4); + outbuf_size += 4; + outbuf[outbuf_pos+0] = alphabet[bits >> 18]; + outbuf[outbuf_pos+1] = alphabet[(bits >> 12) & 0x3f]; + outbuf[outbuf_pos+2] = alphabet[(bits >> 6) & 0x3f]; + outbuf[outbuf_pos+3] = alphabet[bits & 0x3f]; + outbuf_pos += 4; + bits = 0; + char_count = 0; + } + + inbuf++; + inbuf_pos++; + bits <<= 8; + } + + if (char_count > 0) { + bits <<= 16 - (8 * char_count); + outbuf = realloc(outbuf, outbuf_size+4); + outbuf_size += 4; + outbuf[outbuf_pos+0] = alphabet[bits >> 18]; + outbuf[outbuf_pos+1] = alphabet[(bits >> 12) & 0x3f]; + if (char_count == 1) { + outbuf[outbuf_pos+2] = '='; + outbuf[outbuf_pos+3] = '='; + } else { + outbuf[outbuf_pos+2] = alphabet[(bits >> 6) & 0x3f]; + outbuf[outbuf_pos+3] = '='; + } + outbuf_pos += 4; + } + + outbuf = realloc(outbuf, outbuf_size+1); + outbuf[outbuf_pos] = 0; + + return outbuf; +} + +/* Returns NULL on invalid input */ +char* decodechunked(char * chunked, unsigned int *inputlen) { + char *orig = chunked, *dest = chunked; + unsigned long chunklen; + + /* We can reuse the same buffer to dechunkify it: + * the data size will never increase. */ + while((chunklen = strtoul(orig, &orig, 16))) { + /* process one more chunk: */ + /* skip chunk-extension part */ + while(*orig && (*orig != '\r')) + orig++; + /* skip '\r\n' after chunk length */ + orig += 2; + if(( chunklen > (chunked + *inputlen - orig))) + /* insane chunk length. Well... */ + return NULL; + memmove(dest, orig, chunklen); + dest += chunklen; + orig += chunklen; + /* and go to the next chunk */ + } + *dest = '\0'; + *inputlen = dest - chunked; + + return chunked; +} + +/* Remove leading whitspaces, newlines, tabs. + * This function should be safe for working on UTF-8 strings. + * tidyness: 0 = only suck chars from beginning of string + * 1 = extreme, vacuum everything along the string. + */ +void CleanupString (char * string, int tidyness) { + int len, i; + + /* If we are passed a NULL pointer, leave it alone and return. */ + if (string == NULL) + return; + + len = strlen(string); + + while ((string[0] == '\n' || string [0] == ' ' || string [0] == '\t') && + (len > 0)) { + /* len=strlen(string) does not include \0 of string. + But since we copy from *string+1 \0 gets included. + Delicate code. Think twice before it ends in buffer overflows. */ + memmove (string, string+1, len); + len--; + } + + len = strlen(string); + /* Eat newlines and tabs along the whole string. */ + if (tidyness == 1) { + for (i = 0; i < len; i++) { + if ((string[i] == '\t') || (string[i] == '\n')) + string[i] = ' '; + } + } +} diff --git a/rss2ctdl/conversions.h b/rss2ctdl/conversions.h new file mode 100644 index 000000000..a281625c9 --- /dev/null +++ b/rss2ctdl/conversions.h @@ -0,0 +1,31 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler and + * Rene Puls + * + * conversions.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef CONVERSIONS_H +#define CONVERSIONS_H + +char *base64encode(char const *inbuf, unsigned int inbuf_size); +char* decodechunked(char * chunked, unsigned int *inputlen); +void CleanupString (char * string, int tidyness); + +#endif diff --git a/rss2ctdl/digcalc.c b/rss2ctdl/digcalc.c new file mode 100644 index 000000000..4114c0626 --- /dev/null +++ b/rss2ctdl/digcalc.c @@ -0,0 +1,123 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * digcalc.c + * + * This is the sample implementation from RFC 2617. + * The code has been modified to work with Colin Plumb's + * MD5 implementation rather than using RSA's. + */ + +#include "md5.h" + +#include +#include "digcalc.h" + +void CvtHex( + IN HASH Bin, + OUT HASHHEX Hex + ) +{ + unsigned short i; + unsigned char j; + + for (i = 0; i < HASHLEN; i++) { + j = (Bin[i] >> 4) & 0xf; + if (j <= 9) + Hex[i*2] = (j + '0'); + else + Hex[i*2] = (j + 'a' - 10); + j = Bin[i] & 0xf; + if (j <= 9) + Hex[i*2+1] = (j + '0'); + else + Hex[i*2+1] = (j + 'a' - 10); + }; + Hex[HASHHEXLEN] = '\0'; +}; + +/* calculate H(A1) as per spec */ +void DigestCalcHA1( + IN char * pszAlg, + IN char * pszUserName, + IN char * pszRealm, + IN char * pszPassword, + IN char * pszNonce, + IN char * pszCNonce, + OUT HASHHEX SessionKey + ) +{ + struct MD5Context Md5Ctx; + HASH HA1; + + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, pszUserName, strlen(pszUserName)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszRealm, strlen(pszRealm)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszPassword, strlen(pszPassword)); + MD5Final(HA1, &Md5Ctx); + if (strcmp(pszAlg, "md5-sess") == 0) { + + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, HA1, HASHLEN); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce)); + MD5Final(HA1, &Md5Ctx); + }; + CvtHex(HA1, SessionKey); +}; + +/* calculate request-digest/response-digest as per HTTP Digest spec */ +void DigestCalcResponse( + IN HASHHEX HA1, /* H(A1) */ + IN char * pszNonce, /* nonce from server */ + IN char * pszNonceCount, /* 8 hex digits */ + IN char * pszCNonce, /* client nonce */ + IN char * pszQop, /* qop-value: "", "auth", "auth-int" */ + IN char * pszMethod, /* method from the request */ + IN char * pszDigestUri, /* requested URL */ + IN HASHHEX HEntity, /* H(entity body) if qop="auth-int" */ + OUT HASHHEX Response /* request-digest or response-digest */ + ) +{ + struct MD5Context Md5Ctx; + HASH HA2; + HASH RespHash; + HASHHEX HA2Hex; + + /* calculate H(A2) */ + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, pszMethod, strlen(pszMethod)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszDigestUri, strlen(pszDigestUri)); + if (strcmp(pszQop, "auth-int") == 0) { + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, HEntity, HASHHEXLEN); + }; + MD5Final(HA2, &Md5Ctx); + CvtHex(HA2, HA2Hex); + + /* calculate response */ + MD5Init(&Md5Ctx); + MD5Update(&Md5Ctx, HA1, HASHHEXLEN); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszNonce, strlen(pszNonce)); + MD5Update(&Md5Ctx, ":", 1); + if (*pszQop) { + + MD5Update(&Md5Ctx, pszNonceCount, strlen(pszNonceCount)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszCNonce, strlen(pszCNonce)); + MD5Update(&Md5Ctx, ":", 1); + MD5Update(&Md5Ctx, pszQop, strlen(pszQop)); + MD5Update(&Md5Ctx, ":", 1); + }; + MD5Update(&Md5Ctx, HA2Hex, HASHHEXLEN); + MD5Final(RespHash, &Md5Ctx); + CvtHex(RespHash, Response); +}; diff --git a/rss2ctdl/digcalc.h b/rss2ctdl/digcalc.h new file mode 100644 index 000000000..225163945 --- /dev/null +++ b/rss2ctdl/digcalc.h @@ -0,0 +1,47 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * digcalc.h + * + * This is the sample implementation from RFC 2617. + * The code has been modified to work with Colin Plumb's + * MD5 implementation rather than using RSA's. + */ + +#define HASHLEN 16 +typedef char HASH[HASHLEN]; +#define HASHHEXLEN 32 +typedef char HASHHEX[HASHHEXLEN+1]; +#define IN +#define OUT + +void CvtHex( + IN HASH Bin, + OUT HASHHEX Hex + ); + +/* calculate H(A1) as per HTTP Digest spec */ +void DigestCalcHA1( + IN char * pszAlg, + IN char * pszUserName, + IN char * pszRealm, + IN char * pszPassword, + IN char * pszNonce, + IN char * pszCNonce, + OUT HASHHEX SessionKey + ); + +/* calculate request-digest/response-digest as per HTTP Digest spec */ +void DigestCalcResponse( + IN HASHHEX HA1, /* H(A1) */ + IN char * pszNonce, /* nonce from server */ + IN char * pszNonceCount, /* 8 hex digits */ + IN char * pszCNonce, /* client nonce */ + IN char * pszQop, /* qop-value: "", "auth", "auth-int" */ + IN char * pszMethod, /* method from the request */ + IN char * pszDigestUri, /* requested URL */ + IN HASHHEX HEntity, /* H(entity body) if qop="auth-int" */ + OUT HASHHEX Response /* request-digest or response-digest */ + ); diff --git a/rss2ctdl/do_feeds.sh b/rss2ctdl/do_feeds.sh new file mode 100755 index 000000000..5c2e35953 --- /dev/null +++ b/rss2ctdl/do_feeds.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Temporary RSS feed suck-o-matic script for Uncensored. +# +# This script is UNSUPPORTED. It is part of a technology preview for +# functionality which will eventually ship as part of the Citadel system. + +# Paths to the RSS2CTDL binary and to the Citadel directory +PROG=/usr/local/rss2ctdl/rss2ctdl +CTDL=/appl/citadel + +# Do one of these for each feed. You need the URL of the feed, the name +# of the room to dump it into, and a domain name to stamp onto messages +# and message ID's. +# +$PROG http://lxer.com/module/newswire/headlines.rss LXer lxer.com $CTDL +$PROG http://slashdot.org/index.rss Slashdot slashdot.org $CTDL +$PROG http://www.groklaw.net/backend/GrokLaw.rdf Groklaw groklaw.net $CTDL +$PROG http://www.ioerror.us/feed/rss2/ Lizard ioerror.us $CTDL diff --git a/rss2ctdl/io-internal.c b/rss2ctdl/io-internal.c new file mode 100644 index 000000000..7d9cb21a9 --- /dev/null +++ b/rss2ctdl/io-internal.c @@ -0,0 +1,195 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * io-internal.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#include "main.h" +#include "conversions.h" +#include "netio.h" +#include "xmlparse.h" +#include "io-internal.h" + +extern char *browser; + +void GetHTTPErrorString (char * errorstring, int size, int httpstatus) { + switch (httpstatus) { + case 400: + snprintf(errorstring, size, "Bad request"); + break; + case 402: + snprintf(errorstring, size, "Payment required"); + break; + case 403: + snprintf(errorstring, size, "Access denied"); + break; + case 500: + snprintf(errorstring, size, "Internal server error"); + break; + case 501: + snprintf(errorstring, size, "Not implemented"); + break; + case 502: + case 503: + snprintf(errorstring, size, "Service unavailable"); + break; + default: + sprintf(errorstring, "HTTP %d!", httpstatus); + } +} + +void PrintUpdateError (int suppressoutput, struct feed * cur_ptr) { + netio_error_type err; + char errstr[256]; + char httperrstr[64]; + + err = cur_ptr->netio_error; + + if (!suppressoutput) { + switch (err) { + case NET_ERR_OK: + break; + case NET_ERR_URL_INVALID: + fprintf(stderr, "%s: Invalid URL!\n", cur_ptr->title); + break; + case NET_ERR_SOCK_ERR: + fprintf(stderr, "%s: Couldn't create network socket!\n", cur_ptr->title); + break; + case NET_ERR_HOST_NOT_FOUND: + fprintf(stderr, "%s: Can't resolve host!\n", cur_ptr->title); + break; + case NET_ERR_CONN_REFUSED: + fprintf(stderr, "%s: Connection refused!\n", cur_ptr->title); + break; + case NET_ERR_CONN_FAILED: + fprintf(stderr, "%s: Couldn't connect to server: %s\n", + cur_ptr->title, + (strerror(cur_ptr->connectresult) ? strerror(cur_ptr->connectresult) : "(null)")); + break; + case NET_ERR_TIMEOUT: + fprintf(stderr, "%s: Connection timed out.\n", cur_ptr->title); + break; + case NET_ERR_UNKNOWN: + break; + case NET_ERR_REDIRECT_COUNT_ERR: + fprintf(stderr, "%s: Too many HTTP redirects encountered! Giving up.\n", cur_ptr->title); + break; + case NET_ERR_REDIRECT_ERR: + fprintf(stderr, "%s: Server sent an invalid redirect!\n", cur_ptr->title); + break; + case NET_ERR_HTTP_410: + case NET_ERR_HTTP_404: + fprintf(stderr, "%s: This feed no longer exists. Please unsubscribe!\n", cur_ptr->title); + break; + case NET_ERR_HTTP_NON_200: + GetHTTPErrorString(httperrstr, sizeof(httperrstr), cur_ptr->lasthttpstatus); + fprintf(stderr, "%s: Could not download feed: %s\n", cur_ptr->title, httperrstr); + break; + case NET_ERR_HTTP_PROTO_ERR: + fprintf(stderr, "%s: Error in server reply.\n", cur_ptr->title); + break; + case NET_ERR_AUTH_FAILED: + fprintf(stderr, "%s: Authentication failed!\n", cur_ptr->title); + break; + case NET_ERR_AUTH_NO_AUTHINFO: + fprintf(stderr, "%s: URL does not contain authentication information!\n", cur_ptr->title); + break; + case NET_ERR_AUTH_GEN_AUTH_ERR: + fprintf(stderr, "%s: Could not generate authentication information!\n", cur_ptr->title); + break; + case NET_ERR_AUTH_UNSUPPORTED: + fprintf(stderr, "%s: Unsupported authentication method requested by server!\n", cur_ptr->title); + break; + case NET_ERR_GZIP_ERR: + fprintf(stderr, "%s: Error decompressing server reply!\n", cur_ptr->title); + break; + default: + break; + } + /* Must be inside if(!suppressoutput) statement! */ + } +} + + +/* Update given feed from server. + * Reload XML document and replace in memory cur_ptr->feed with it. + */ +int UpdateFeed (struct feed * cur_ptr) { + char *tmpname; + char *freeme; + + if (cur_ptr == NULL) { + return 1; + } + + /* Need to work on a copy of ->feedurl, because DownloadFeed() changes the pointer. */ + tmpname = strdup (cur_ptr->feedurl); + freeme = tmpname; /* Need to make a copy, otherwise we cannot free all RAM. */ + free (cur_ptr->feed); + + cur_ptr->feed = DownloadFeed (tmpname, cur_ptr, 0); + free (freeme); + + /* Set title and link structure to something. + * To the feedurl in this case so the program show something + * as placeholder instead of crash. */ + if (cur_ptr->title == NULL) + cur_ptr->title = strdup (cur_ptr->feedurl); + if (cur_ptr->link == NULL) + cur_ptr->link = strdup (cur_ptr->feedurl); + + /* If the download function returns a NULL pointer return from here. */ + if (cur_ptr->feed == NULL) { + if (cur_ptr->problem == 1) + PrintUpdateError (0, cur_ptr); + return 1; + } + + /* If there is no feed, return. */ + if (cur_ptr->feed == NULL) + return 1; + + if ((DeXML (cur_ptr)) != 0) { + fprintf(stderr, "Invalid XML! Cannot parse this feed!\n"); + + /* Activate feed problem flag. */ + cur_ptr->problem = 1; + return 1; + } + + /* We don't need these anymore. Free the raw XML to save some memory. */ + free (cur_ptr->feed); + cur_ptr->feed = NULL; + + return 0; +} + + diff --git a/rss2ctdl/io-internal.h b/rss2ctdl/io-internal.h new file mode 100644 index 000000000..fd7f2c727 --- /dev/null +++ b/rss2ctdl/io-internal.h @@ -0,0 +1,28 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * io-internal.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef IO_INTERNAL_H +#define IO_INTERNAL_H + +int UpdateFeed (struct feed * cur_ptr); + +#endif diff --git a/rss2ctdl/main.c b/rss2ctdl/main.c new file mode 100644 index 000000000..c4e8190fb --- /dev/null +++ b/rss2ctdl/main.c @@ -0,0 +1,197 @@ +/* + * $Id$ + * + * rss2ctdl -- a utility to pull RSS feeds into Citadel rooms. + * + * Main program is (c)2004 by Art Cancro + * RSS parser is (c)2003-2004 by Oliver Feiler + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "main.h" +#include "io-internal.h" +#include "conversions.h" +#include "md5.h" +#include "digcalc.h" + +struct feed *first_ptr = NULL; +struct entity *first_entity = NULL; + +/* + * If you want to use a proxy server, you can hack the following two lines. + */ +char *proxyname = ""; +unsigned short proxyport = 0; + +/* + * Main function of program. + */ +int main (int argc, char *argv[]) { + struct feed *new_ptr; + char *url; + char tmp[512]; + struct newsitem *itemptr; + FILE *fp; + char md5msgid[256]; + MD5_CTX md5context; + HASHHEX md5context_hex; + +#ifdef LOCALEPATH + setlocale (LC_ALL, ""); + bindtextdomain ("rss2ctdl", LOCALEPATH); + textdomain ("rss2ctdl"); +#endif + + if (argc != 5) { + fprintf(stderr, + "%s: usage:\n %s \n", + argv[0], argv[0]); + exit(1); + } + + /* Init the pRNG. See about.c for usages of rand() ;) */ + srand(time(0)); + + url = strdup(argv[1]); + CleanupString(url, 0); + + /* Support that stupid feed:// "protocol" */ + if (strncasecmp (url, "feed://", 7) == 0) + memcpy (url, "http", 4); + + /* If URL does not start with the procotol specification, + assume http:// + -> tmp[512] -> we can "only" use max 504 chars from url ("http://" == 7). */ + if ((strncasecmp (url, "http://", 7) != 0) && + (strncasecmp (url, "https://", 8) != 0)) { + if (strlen (url) < 504) { + strcpy (tmp, "http://"); + strncat (tmp, url, 504); + free (url); + url = strdup (tmp); + } else { + free (url); + return 2; + } + } + + new_ptr = malloc (sizeof(struct feed)); + new_ptr->feedurl = strdup(url); + new_ptr->feed = NULL; + new_ptr->content_length = 0; + new_ptr->title = NULL; + new_ptr->link = NULL; + new_ptr->description = NULL; + new_ptr->lastmodified = NULL; + new_ptr->lasthttpstatus = 0; + new_ptr->content_type = NULL; + new_ptr->netio_error = NET_ERR_OK; + new_ptr->connectresult = 0; + new_ptr->cookies = NULL; + new_ptr->authinfo = NULL; + new_ptr->servauth = NULL; + new_ptr->items = NULL; + new_ptr->problem = 0; + new_ptr->original = NULL; + + /* Don't need url text anymore. */ + free (url); + + /* Download new feed and DeXMLize it. */ + if ((UpdateFeed (new_ptr)) != 0) { + exit(1); + } + + sprintf(tmp, "%s/network/spoolin/rssfeed.%ld", argv[4], time(NULL)); + fp = fopen(tmp, "w"); + if (fp == NULL) { + fprintf(stderr, "%s: cannot open %s: %s\n", + argv[0], tmp, strerror(errno)); + exit(errno); + } + + for (itemptr = new_ptr->items; itemptr != NULL; itemptr = itemptr->next_ptr) { + fprintf(stderr, "--> %s\n", itemptr->data->title); + fprintf(fp, "%c", 255); /* Start of message */ + fprintf(fp, "A"); /* Non-anonymous */ + fprintf(fp, "%c", 4); /* MIME */ + fprintf(fp, "Prss%c", 0); /* path */ + + /* The message ID will be an MD5 hash of the GUID. + * If there is no GUID present, we construct a message ID based + * on an MD5 hash of each item. Citadel's loopzapper will automatically + * reject items with message ID's which have already been submitted. + */ + MD5Init(&md5context); + if (itemptr->data->guid != NULL) { + MD5Update(&md5context, itemptr->data->guid, strlen(itemptr->data->guid)); + } + else { + if (itemptr->data->title != NULL) { + MD5Update(&md5context, itemptr->data->title, strlen(itemptr->data->title)); + } + if (itemptr->data->description != NULL) { + MD5Update(&md5context, itemptr->data->description, strlen(itemptr->data->description)); + } + if (itemptr->data->link != NULL) { + MD5Update(&md5context, itemptr->data->link, strlen(itemptr->data->link)); + } + } + MD5Final(md5msgid, &md5context); + CvtHex(md5msgid, md5context_hex); + + fprintf(fp, "I%s@%s%c", md5context_hex, argv[3], 0); /* ID */ + + fprintf(fp, "T%ld%c", time(NULL), 0); /* time */ + fprintf(fp, "Arss%c", 0); /* author */ + fprintf(fp, "O%s%c", argv[2], 0); /* room */ + fprintf(fp, "C%s%c", argv[2], 0); /* room */ + fprintf(fp, "N%s%c", argv[3], 0); /* orig node */ + if (itemptr->data->title != NULL) { + fprintf(fp, "U%s%c", itemptr->data->title, 0); /* subject */ + } + + fprintf(fp, "M"); /* msg text */ + fprintf(fp, "Content-type: text/html\r\n\r\n"); + fprintf(fp, "\r\n"); + fprintf(fp, "%s\n", itemptr->data->description); + if (itemptr->data->link != NULL) { + fprintf(fp, "

\r\n"); + fprintf(fp, "%s\n", + itemptr->data->link, + itemptr->data->link); + } + fprintf(fp, "\r\n"); + fprintf(fp, "%c", 0); + } + + fclose(fp); + + /* Be lazy and let the operating system free all the memory. */ + return(0); +} diff --git a/rss2ctdl/main.h b/rss2ctdl/main.h new file mode 100644 index 000000000..a5440f4b5 --- /dev/null +++ b/rss2ctdl/main.h @@ -0,0 +1,28 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * main.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef MAIN_H +#define MAIN_H + +#include "config.h" + +#endif diff --git a/rss2ctdl/md5.c b/rss2ctdl/md5.c new file mode 100644 index 000000000..676881210 --- /dev/null +++ b/rss2ctdl/md5.c @@ -0,0 +1,270 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * md5.c + * + * This code has been slightly modified from its original. + * The endian check via evaluating endian.h has been + * replaced with the code in void byteReverse(). + */ + +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +#include /* for memcpy() */ +#include "md5.h" + +void byteReverse(unsigned char *buf, unsigned longs); + +static int endian_check = -1; + +/* + * Note: this code is harmless on little-endian machines. + */ +void byteReverse(unsigned char *buf, unsigned longs) +{ + uint32 t; + static uint32 d = 0xdeadbeef; + unsigned char *b = (unsigned char *) &d; + + if (endian_check == -1) { + if (b[0] == 0xde) + endian_check = 1; + else + endian_check = 0; + } + + if (endian_check == 0) + return; + + do { + t = (uint32) ((unsigned) buf[3] << 8 | buf[2]) << 16 | + ((unsigned) buf[1] << 8 | buf[0]); + *(uint32 *) buf = t; + buf += 4; + } while (--longs); +} + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +void MD5Init(struct MD5Context *ctx) +{ + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; + + ctx->bits[0] = 0; + ctx->bits[1] = 0; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +void MD5Update(struct MD5Context *ctx, unsigned char const *buf, unsigned len) +{ + uint32 t; + + /* Update bitcount */ + + t = ctx->bits[0]; + if ((ctx->bits[0] = t + ((uint32) len << 3)) < t) + ctx->bits[1]++; /* Carry from low to high */ + ctx->bits[1] += len >> 29; + + t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ + + /* Handle any leading odd-sized chunks */ + + if (t) { + unsigned char *p = (unsigned char *) ctx->in + t; + + t = 64 - t; + if (len < t) { + memcpy(p, buf, len); + return; + } + memcpy(p, buf, t); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *) ctx->in); + buf += t; + len -= t; + } + /* Process data in 64-byte chunks */ + + while (len >= 64) { + memcpy(ctx->in, buf, 64); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *) ctx->in); + buf += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + + memcpy(ctx->in, buf, len); +} + +/* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ +void MD5Final(unsigned char digest[16], struct MD5Context *ctx) +{ + unsigned count; + unsigned char *p; + + /* Compute number of bytes mod 64 */ + count = (ctx->bits[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = ctx->in + count; + *p++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, count); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *) ctx->in); + + /* Now fill the next block with 56 bytes */ + memset(ctx->in, 0, 56); + } else { + /* Pad block to 56 bytes */ + memset(p, 0, count - 8); + } + byteReverse(ctx->in, 14); + + /* Append length in bits and transform */ + ((uint32 *) ctx->in)[14] = ctx->bits[0]; + ((uint32 *) ctx->in)[15] = ctx->bits[1]; + + MD5Transform(ctx->buf, (uint32 *) ctx->in); + byteReverse((unsigned char *) ctx->buf, 4); + memcpy(digest, ctx->buf, 16); + memset(ctx, 0, sizeof(ctx)); /* In case it's sensitive */ +} + +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +void MD5Transform(uint32 buf[4], uint32 const in[16]) +{ + register uint32 a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} diff --git a/rss2ctdl/md5.h b/rss2ctdl/md5.h new file mode 100644 index 000000000..5003e6d09 --- /dev/null +++ b/rss2ctdl/md5.h @@ -0,0 +1,56 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * md5.h + * + * This code has been slightly modified from its original. + * The endian check via evaluating endian.h has been + * replaced with the code in void byteReverse(). + */ + +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +#ifndef MD5_H +#define MD5_H + +#ifdef __alpha +typedef unsigned int uint32; +#else +typedef unsigned long uint32; +#endif + +struct MD5Context { + uint32 buf[4]; + uint32 bits[2]; + unsigned char in[64]; +}; + +void MD5Init(struct MD5Context *context); +void MD5Update(struct MD5Context *context, unsigned char const *buf, + unsigned len); +void MD5Final(unsigned char digest[16], struct MD5Context *context); +void MD5Transform(uint32 buf[4], uint32 const in[16]); + +/* + * This is needed to make RSAREF happy on some MS-DOS compilers. + */ +typedef struct MD5Context MD5_CTX; + +#endif /* !MD5_H */ diff --git a/rss2ctdl/net-support.c b/rss2ctdl/net-support.c new file mode 100644 index 000000000..907f25077 --- /dev/null +++ b/rss2ctdl/net-support.c @@ -0,0 +1,282 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * net-support.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include + +#include "config.h" +#include "conversions.h" + +#include "digcalc.h" + +char * ConstructBasicAuth (char * username, char * password) { + int len; + char * authinfo; + char * authstring; + char * tmpstr; + + /* Create base64 authinfo. + + RFC 2617. Basic HTTP authentication. + Authorization: Basic username:password[base64 encoded] */ + + /* Construct the cleartext authstring. */ + len = strlen(username) + 1 + strlen(password) + 1; + authstring = malloc (len); + snprintf (authstring, len, "%s:%s", username, password); + + tmpstr = base64encode (authstring, len-1); + + /* "Authorization: Basic " + base64str + \r\n\0 */ + len = 21 + strlen(tmpstr) + 3; + authinfo = malloc (len); + snprintf (authinfo, len, "Authorization: Basic %s\r\n", tmpstr); + + free (tmpstr); + free (authstring); + + return authinfo; +} + +char * GetRandomBytes (void) { + char * randomness = NULL; + char raw[8]; + int i; + FILE * devrandom; + + devrandom = fopen ("/dev/random", "r"); + if (devrandom == NULL) { + /* Use rand() if we don't have access to /dev/random. */ + for (i = 0; i <= 7; i++) { + raw[i] = 1+(float)rand() / (float)RAND_MAX * 255; + } + } else { + fread (raw, 8, 1, devrandom); + fclose (devrandom); + } + + randomness = malloc (17); + snprintf (randomness, 17, "%hhx%hhx%hhx%hhx%hhx%hhx%hhx%hhx", + raw[0], raw[1], raw[2], raw[3], raw[4], raw[5], raw[6], raw[7]); + + return randomness; +} + +char * ConstructDigestAuth (char * username, char * password, char * url, char * authdata) { + char * authinfo; /* Authorization header as sent to the server. */ + char * token; + int len; + char * realm = NULL; /* Variables for the overcomplicated and annoying HTTP digest algo. */ + char * qop = NULL; + char * nonce = NULL; + char * opaque = NULL; + char * cnonce; + char szNonceCount[9] = "00000001"; /* Can be always 1 if we never use the same cnonce twice. */ + HASHHEX HA1; + HASHHEX HA2 = ""; + HASHHEX Response; + + cnonce = GetRandomBytes(); + + while (1) { + token = strsep (&authdata, ", "); + + if (token == NULL) + break; + + if (strncasecmp (token, "realm", 5) == 0) { + len = strlen(token)-8; + memmove (token, token+7, len); + token[len] = '\0'; + realm = strdup (token); + } else if (strncasecmp (token, "qop", 3) == 0) { + len = strlen(token)-6; + memmove (token, token+5, len); + token[len] = '\0'; + qop = strdup (token); + } else if (strncasecmp (token, "nonce", 5) == 0) { + len = strlen(token)-8; + memmove (token, token+7, len); + token[len] = '\0'; + nonce = strdup (token); + } else if (strncasecmp (token, "opaque", 6) == 0) { + len = strlen(token)-9; + memmove (token, token+8, len); + token[len] = '\0'; + opaque = strdup (token); + } + } + + DigestCalcHA1 ("md5", username, realm, password, nonce, cnonce, HA1); + DigestCalcResponse(HA1, nonce, szNonceCount, cnonce, "auth", "GET", url, HA2, Response); + + /* Determine length of Authorize header. + * + * Authorization: Digest username="(username)", realm="(realm)", + * nonce="(nonce)", uri="(url)", algorithm=MD5, response="(Response)", + * qop=(auth), nc=(szNonceCount), cnonce="deadbeef" + */ + if (opaque == NULL) + len = 32 + strlen(username) + 10 + strlen(realm) + 10 + strlen(nonce) + 8 + strlen(url) + 28 + strlen(Response) + 16 + strlen(szNonceCount) + 10 + strlen(cnonce) + 4 ; + else + len = 32 + strlen(username) + 10 + strlen(realm) + 10 + strlen(nonce) + 8 + strlen(url) + 28 + strlen(Response) + 16 + strlen(szNonceCount) + 10 + strlen(cnonce) + 10 + strlen(opaque) + 4; + + authinfo = malloc (len); + + if (opaque == NULL) { + snprintf (authinfo, len, "Authorization: Digest username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", algorithm=MD5, response=\"%s\", qop=auth, nc=%s, cnonce=\"%s\"\r\n", + username, realm, nonce, url, Response, szNonceCount, cnonce); + } else { + snprintf (authinfo, len, "Authorization: Digest username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", algorithm=MD5, response=\"%s\", qop=auth, nc=%s, cnonce=\"%s\", opaque=\"%s\"\r\n", + username, realm, nonce, url, Response, szNonceCount, cnonce, opaque); + } + + free (realm); + free (qop); + free (nonce); + free (cnonce); + free (opaque); + + return authinfo; +} + + +/* +Authorization: Digest username="(username)", realm="(realm)", +nonce="(nonce)", uri="(url)", algorithm=MD5, response="(Response)", +qop=(auth), nc=(szNonceCount), cnonce="deadbeef" +*/ +int NetSupportAuth (struct feed * cur_ptr, char * authdata, char * url, char * netbuf) { + char * header; + char * tmpstr; + char * freeme; + char * username = NULL; + char * password = NULL; + char * authtype = NULL; + + /* Reset cur_ptr->authinfo. */ + free (cur_ptr->authinfo); + cur_ptr->authinfo = NULL; + + /* Catch invalid authdata. */ + if (authdata == NULL) { + return 1; + } else if (strchr (authdata, ':') == NULL){ + /* No authinfo found in URL. This should not happen. */ + return 1; + } + + tmpstr = strdup (authdata); + freeme = tmpstr; + + strsep (&tmpstr, ":"); + username = strdup (freeme); + password = strdup (tmpstr); + + /* Free allocated string in tmpstr. */ + free (freeme); + + /* Extract requested auth type from webserver reply. */ + header = strdup (netbuf); + freeme = header; + strsep (&header, " "); + authtype = header; + + /* Catch invalid server replies. authtype should contain at least _something_. */ + if (authtype == NULL) { + free (freeme); + free (username); + free (password); + return -1; + } + + strsep (&header, " "); + /* header now contains: + Basic auth: realm + Digest auth: realm + a lot of other stuff somehow needed by digest auth. */ + + /* Determine auth type the server requests. */ + if (strncasecmp (authtype, "Basic", 5) == 0) { + /* Basic auth. */ + cur_ptr->authinfo = ConstructBasicAuth (username, password); + } else if (strncasecmp (authtype, "Digest", 6) == 0) { + /* Digest auth. */ + cur_ptr->authinfo = ConstructDigestAuth (username, password, url, header); + } else { + /* Unkown auth type. */ + free (freeme); + free (username); + free (password); + return -1; + } + + free (username); + free (password); + free (freeme); + + if (cur_ptr->authinfo == NULL) { + return 2; + } + + return 0; +} + +/* HTTP header may only contain ASCII characters. + * + * Ensure that we don't hit the terminating \0 in a string + * with the for loop. + * The function also ensures that there is no NULL byte in the string. + * If given binary data return at once if we read beyond + * the boundary of sizeof(header). + */ +int checkValidHTTPHeader (const unsigned char * header, int size) { + int i, len; + + len = strlen(header); + if (len > size) + return -1; + + for (i = 0; i < len; i++) { + if (((header[i] < 32) || (header[i] > 127)) && + (header[i] != '\r') && (header[i] != '\n')) + return -1; + } + return 0; +} + +int checkValidHTTPURL (const unsigned char * url) { + int i, len; + + if (strncasecmp(url, "http://", 7) != 0) + return -1; + + len = strlen(url); + + for (i = 0; i < len; i++) { + if ((url[i] < 32) || (url[i] > 126)) + return -1; + } + + return 0; +} + diff --git a/rss2ctdl/net-support.h b/rss2ctdl/net-support.h new file mode 100644 index 000000000..a0ba70b6b --- /dev/null +++ b/rss2ctdl/net-support.h @@ -0,0 +1,30 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * net-support.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef NET_SUPPORT_H +#define NET_SUPPORT_H + +int NetSupportAuth (struct feed * cur_ptr, char * authdata, char * url, char * netbuf); +int checkValidHTTPHeader (const unsigned char * header, int size); +int checkValidHTTPURL (const unsigned char * url); + +#endif diff --git a/rss2ctdl/netio.c b/rss2ctdl/netio.c new file mode 100644 index 000000000..f9a63709f --- /dev/null +++ b/rss2ctdl/netio.c @@ -0,0 +1,903 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * netio.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* OS X needs this, otherwise socklen_t is not defined. */ +#ifdef __APPLE__ +# define _BSD_SOCKLEN_T_ +#endif + +/* BeOS does not define socklen_t. Using uint as suggested by port creator. */ +#ifdef __BEOS__ +# define socklen_t unsigned int +#endif + +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "main.h" +#include "conversions.h" +#include "net-support.h" +#include "io-internal.h" +#include "zlib_interface.h" + +static int const MAX_HTTP_REDIRECTS = 10; /* Maximum number of redirects we will follow. */ +static int const NET_TIMEOUT = 20; /* Global network timeout in sec */ +static int const NET_READ = 1; +static int const NET_WRITE = 2; + +extern char *proxyname; /* Hostname of proxyserver. */ +extern unsigned short proxyport; /* Port on proxyserver to use. */ + +/* Masquerade as Firefox on Linux to increase the share of both in web server statistics. */ +char *useragent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0"; + +/* Waits NET_TIMEOUT seconds for the socket to return data. + * + * Returns + * + * 0 Socket is ready + * -1 Error occured (netio_error is set) + */ +int NetPoll (struct feed * cur_ptr, int * my_socket, int rw) { + fd_set rfdsr; + fd_set rfdsw; + struct timeval tv; + int retval; /* FD_ISSET + assert == Heisenbug? */ + + /* Set global network timeout */ + tv.tv_sec = NET_TIMEOUT; + tv.tv_usec = 0; + + FD_ZERO(&rfdsr); + FD_ZERO(&rfdsw); + + if (rw == NET_READ) { + FD_SET(*my_socket, &rfdsr); + if (select (*my_socket+1, &rfdsr, NULL, NULL, &tv) == 0) { + /* Timed out */ + cur_ptr->netio_error = NET_ERR_TIMEOUT; + return -1; + } + retval = FD_ISSET (*my_socket, &rfdsr); + assert (retval); + if (!retval) { + /* Wtf? */ + cur_ptr->netio_error = NET_ERR_UNKNOWN; + return -1; + } + } else if (rw == NET_WRITE) { + FD_SET(*my_socket, &rfdsw); + if (select (*my_socket+1, NULL, &rfdsw, NULL, &tv) == 0) { + /* Timed out */ + cur_ptr->netio_error = NET_ERR_TIMEOUT; + return -1; + } + retval = FD_ISSET (*my_socket, &rfdsw); + assert (retval); + if (!retval) { + /* Wtf? */ + cur_ptr->netio_error = NET_ERR_UNKNOWN; + return -1; + } + } else { + cur_ptr->netio_error = NET_ERR_UNKNOWN; + return -1; + } + + return 0; +} + + +/* Connect network sockets. + * + * Returns + * + * 0 Connected + * -1 Error occured (netio_error is set) + */ +int NetConnect (int * my_socket, char * host, struct feed * cur_ptr, int httpproto, int suppressoutput) { + char tmp[512]; + struct sockaddr_in address; + struct hostent *remotehost; + socklen_t len; + char *realhost; + unsigned short port; + + realhost = strdup(host); + if (sscanf (host, "%[^:]:%hd", realhost, &port) != 2) { + port = 80; + } + + /* Create a inet stream TCP socket. */ + *my_socket = socket (AF_INET, SOCK_STREAM, 0); + if (*my_socket == -1) { + cur_ptr->netio_error = NET_ERR_SOCK_ERR; + return -1; + } + + /* If proxyport is 0 we didn't execute the if http_proxy statement in main + so there is no proxy. On any other value of proxyport do proxyrequests instead. */ + if (proxyport == 0) { + /* Lookup remote IP. */ + remotehost = gethostbyname (realhost); + if (remotehost == NULL) { + close (*my_socket); + free (realhost); + cur_ptr->netio_error = NET_ERR_HOST_NOT_FOUND; + return -1; + } + + /* Set the remote address. */ + address.sin_family = AF_INET; + address.sin_port = htons(port); + memcpy (&address.sin_addr.s_addr, remotehost->h_addr_list[0], remotehost->h_length); + + /* Connect socket. */ + cur_ptr->connectresult = connect (*my_socket, (struct sockaddr *) &address, sizeof(address)); + + /* Check if we're already connected. + BSDs will return 0 on connect even in nonblock if connect was fast enough. */ + if (cur_ptr->connectresult != 0) { + /* If errno is not EINPROGRESS, the connect went wrong. */ + if (errno != EINPROGRESS) { + close (*my_socket); + free (realhost); + cur_ptr->netio_error = NET_ERR_CONN_REFUSED; + return -1; + } + + if ((NetPoll (cur_ptr, my_socket, NET_WRITE)) == -1) { + close (*my_socket); + free (realhost); + return -1; + } + + /* We get errno of connect back via getsockopt SO_ERROR (into connectresult). */ + len = sizeof(cur_ptr->connectresult); + getsockopt(*my_socket, SOL_SOCKET, SO_ERROR, &cur_ptr->connectresult, &len); + + if (cur_ptr->connectresult != 0) { + close (*my_socket); + free (realhost); + cur_ptr->netio_error = NET_ERR_CONN_FAILED; /* ->strerror(cur_ptr->connectresult) */ + return -1; + } + } + } else { + /* Lookup proxyserver IP. */ + remotehost = gethostbyname (proxyname); + if (remotehost == NULL) { + close (*my_socket); + free (realhost); + cur_ptr->netio_error = NET_ERR_HOST_NOT_FOUND; + return -1; + } + + /* Set the remote address. */ + address.sin_family = AF_INET; + address.sin_port = htons(proxyport); + memcpy (&address.sin_addr.s_addr, remotehost->h_addr_list[0], remotehost->h_length); + + /* Connect socket. */ + cur_ptr->connectresult = connect (*my_socket, (struct sockaddr *) &address, sizeof(address)); + + /* Check if we're already connected. + BSDs will return 0 on connect even in nonblock if connect was fast enough. */ + if (cur_ptr->connectresult != 0) { + if (errno != EINPROGRESS) { + close (*my_socket); + free (realhost); + cur_ptr->netio_error = NET_ERR_CONN_REFUSED; + return -1; + } + + if ((NetPoll (cur_ptr, my_socket, NET_WRITE)) == -1) { + close (*my_socket); + free (realhost); + return -1; + } + + len = sizeof(cur_ptr->connectresult); + getsockopt(*my_socket, SOL_SOCKET, SO_ERROR, &cur_ptr->connectresult, &len); + + if (cur_ptr->connectresult != 0) { + close (*my_socket); + free (realhost); + cur_ptr->netio_error = NET_ERR_CONN_FAILED; /* ->strerror(cur_ptr->connectresult) */ + return -1; + } + } + } + + free (realhost); + return 0; +} + + +/* + * Main network function. + * (Now with a useful function description *g*) + * + * This function returns the HTTP request's body (deflating gzip encoded data + * if needed). + * Updates passed feed struct with values gathered from webserver. + * Handles all redirection and HTTP status decoding. + * Returns NULL pointer if no data was received and sets netio_error. + */ +char * NetIO (int * my_socket, char * host, char * url, struct feed * cur_ptr, char * authdata, int httpproto, int suppressoutput) { + char netbuf[4096]; /* Network read buffer. */ + char *body; /* XML body. */ + unsigned int length; + FILE *stream; /* Stream socket. */ + int chunked = 0; /* Content-Encoding: chunked received? */ + int redirectcount; /* Number of HTTP redirects followed. */ + char httpstatus[4]; /* HTTP status sent by server. */ + char servreply[128]; /* First line of server reply */ + char *tmpstatus; + char *savestart; /* Save start position of pointers. */ + char *tmphost; /* Pointers needed to strsep operation. */ + char *newhost; /* New hostname if we need to redirect. */ + char *newurl; /* New document name ". */ + char *newlocation; + char *tmpstring; /* Temp pointers. */ + char *freeme, *freeme2; + char *redirecttarget; + int retval; + int handled; + int tmphttpstatus; + int inflate = 0; /* Whether feed data needs decompressed with zlib. */ + int len; + char * inflatedbody; + int quirksmode = 0; /* IIS operation mode. */ + int authfailed = 0; /* Avoid repeating failed auth requests endlessly. */ + + + if (!suppressoutput) { + if (cur_ptr->title == NULL) + fprintf(stderr, "Downloading http://%s%s\n", host, url); + else + fprintf(stderr, "Downloading %s\n", cur_ptr->title); + + } + + redirectcount = 0; + + /* Goto label to redirect reconnect. */ + tryagain: + + /* Reconstruct digest authinfo for every request so we don't reuse + the same nonce value for more than one request. + This happens one superflous time on 303 redirects. */ + if ((cur_ptr->authinfo != NULL) && (cur_ptr->servauth != NULL)) { + if (strstr (cur_ptr->authinfo, " Digest ") != NULL) { + NetSupportAuth(cur_ptr, authdata, url, cur_ptr->servauth); + } + } + + /* Open socket. */ + stream = fdopen (*my_socket, "r+"); + if (stream == NULL) { + /* This is a serious non-continueable OS error as it will probably not + go away if we retry. + + BeOS will stupidly return SUCCESS here making this code silently fail on BeOS. */ + cur_ptr->netio_error = NET_ERR_SOCK_ERR; + return NULL; + } + + /* Again is proxyport == 0, non proxy mode, otherwise make proxy requests. */ + if (proxyport == 0) { + /* Request URL from HTTP server. */ + if (cur_ptr->lastmodified != NULL) { + fprintf(stream, + "GET %s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\nIf-Modified-Since: %s\r\n%s%s\r\n", + url, + useragent, + host, + cur_ptr->lastmodified, + (cur_ptr->authinfo ? cur_ptr->authinfo : ""), + (cur_ptr->cookies ? cur_ptr->cookies : "")); + } else { + fprintf(stream, + "GET %s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\n%s%s\r\n", + url, + useragent, + host, + (cur_ptr->authinfo ? cur_ptr->authinfo : ""), + (cur_ptr->cookies ? cur_ptr->cookies : "")); + } + fflush(stream); /* We love Solaris, don't we? */ + } else { + /* Request URL from HTTP server. */ + if (cur_ptr->lastmodified != NULL) { + fprintf(stream, + "GET http://%s%s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\nIf-Modified-Since: %s\r\n%s%s\r\n", + host, + url, + useragent, + host, + cur_ptr->lastmodified, + (cur_ptr->authinfo ? cur_ptr->authinfo : ""), + (cur_ptr->cookies ? cur_ptr->cookies : "")); + } else { + fprintf(stream, + "GET http://%s%s HTTP/1.0\r\nAccept-Encoding: gzip\r\nUser-Agent: %s\r\nConnection: close\r\nHost: %s\r\n%s%s\r\n", + host, + url, + useragent, + host, + (cur_ptr->authinfo ? cur_ptr->authinfo : ""), + (cur_ptr->cookies ? cur_ptr->cookies : "")); + } + fflush(stream); /* We love Solaris, don't we? */ + } + + if ((NetPoll (cur_ptr, my_socket, NET_READ)) == -1) { + fclose (stream); + return NULL; + } + + if ((fgets (servreply, sizeof(servreply), stream)) == NULL) { + fclose (stream); + return NULL; + } + if (checkValidHTTPHeader(servreply, sizeof(servreply)) != 0) { + cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR; + fclose (stream); + return NULL; + } + + tmpstatus = strdup(servreply); + savestart = tmpstatus; + + memset (httpstatus, 0, 4); /* Nullify string so valgrind shuts up. */ + /* Set pointer to char after first space. + HTTP/1.0 200 OK + ^ + Copy three bytes into httpstatus. */ + strsep (&tmpstatus, " "); + if (tmpstatus == NULL) { + cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR; + fclose (stream); + free (savestart); /* Probably more leaks when doing auth and abort here. */ + return NULL; + } + strncpy (httpstatus, tmpstatus, 3); + free (savestart); + + cur_ptr->lasthttpstatus = atoi (httpstatus); + + /* If the redirectloop was run newhost and newurl were allocated. + We need to free them here. */ + if ((redirectcount > 0) && (authdata == NULL)) { + free (host); + free (url); + } + + tmphttpstatus = cur_ptr->lasthttpstatus; + handled = 1; + /* Check HTTP server response and handle redirects. */ + do { + switch (tmphttpstatus) { + case 200: /* OK */ + /* Received good status from server, clear problem field. */ + cur_ptr->netio_error = NET_ERR_OK; + cur_ptr->problem = 0; + break; + case 300: /* Multiple choice and everything 300 not handled is fatal. */ + cur_ptr->netio_error = NET_ERR_HTTP_NON_200; + fclose (stream); + return NULL; + case 301: + /* Permanent redirect. Change feed->feedurl to new location. + Done some way down when we have extracted the new url. */ + case 302: /* Found */ + case 303: /* See Other */ + case 307: /* Temp redirect. This is HTTP/1.1 */ + redirectcount++; + + /* Give up if we reach MAX_HTTP_REDIRECTS to avoid loops. */ + if (redirectcount > MAX_HTTP_REDIRECTS) { + cur_ptr->netio_error = NET_ERR_REDIRECT_COUNT_ERR; + fclose (stream); + return NULL; + } + + while (!feof(stream)) { + if ((fgets (netbuf, sizeof(netbuf), stream)) == NULL) { + /* Something bad happened. Server sent stupid stuff. */ + cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR; + fclose (stream); + return NULL; + } + + if (checkValidHTTPHeader(netbuf, sizeof(netbuf)) != 0) { + cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR; + fclose (stream); + return NULL; + } + + /* Split netbuf into hostname and trailing url. + Place hostname in *newhost and tail into *newurl. + Close old connection and reconnect to server. + + Do not touch any of the following code! :P */ + if (strncasecmp (netbuf, "Location", 8) == 0) { + redirecttarget = strdup (netbuf); + freeme = redirecttarget; + + /* Remove trailing \r\n from line. */ + redirecttarget[strlen(redirecttarget)-2] = 0; + + /* In theory pointer should now be after the space char + after the word "Location:" */ + strsep (&redirecttarget, " "); + + if (redirecttarget == NULL) { + cur_ptr->problem = 1; + cur_ptr->netio_error = NET_ERR_REDIRECT_ERR; + free (freeme); + fclose (stream); + return NULL; + } + + /* Location must start with "http", otherwise switch on quirksmode. */ + if (strncmp(redirecttarget, "http", 4) != 0) + quirksmode = 1; + + /* If the Location header is invalid we need to construct + a correct one here before proceeding with the program. + This makes headers like + "Location: fuck-the-protocol.rdf" work. + In violalation of RFC1945, RFC2616. */ + if (quirksmode) { + len = 7 + strlen(host) + strlen(redirecttarget) + 3; + newlocation = malloc(len); + memset (newlocation, 0, len); + strcat (newlocation, "http://"); + strcat (newlocation, host); + if (redirecttarget[0] != '/') + strcat (newlocation, "/"); + strcat (newlocation, redirecttarget); + } else + newlocation = strdup (redirecttarget); + + /* This also frees redirecttarget. */ + free (freeme); + + /* Change cur_ptr->feedurl on 301. */ + if (cur_ptr->lasthttpstatus == 301) { + /* Check for valid redirection URL */ + if (checkValidHTTPURL(newlocation) != 0) { + cur_ptr->problem = 1; + cur_ptr->netio_error = NET_ERR_REDIRECT_ERR; + fclose (stream); + return NULL; + } + if (!suppressoutput) { + fprintf(stderr, "URL points to permanent redirect, updating with new location...\n"); + } + free (cur_ptr->feedurl); + if (authdata == NULL) + cur_ptr->feedurl = strdup (newlocation); + else { + /* Include authdata in newly constructed URL. */ + len = strlen(authdata) + strlen(newlocation) + 2; + cur_ptr->feedurl = malloc (len); + newurl = strdup(newlocation); + freeme2 = newurl; + strsep (&newurl, "/"); + strsep (&newurl, "/"); + snprintf (cur_ptr->feedurl, len, "http://%s@%s", authdata, newurl); + free (freeme2); + } + } + + freeme = newlocation; + strsep (&newlocation, "/"); + strsep (&newlocation, "/"); + tmphost = newlocation; + /* The following line \0-terminates tmphost in overwriting the first + / after the hostname. */ + strsep (&newlocation, "/"); + + /* newlocation must now be the absolute path on newhost. + If not we've been redirected to somewhere totally stupid + (oh yeah, no offsite linking, go to our fucking front page). + Say goodbye to the webserver in this case. In fact, we don't + even say goodbye, but just drop the connection. */ + if (newlocation == NULL) { + cur_ptr->netio_error = NET_ERR_REDIRECT_ERR; + fclose (stream); + return NULL; + } + + newhost = strdup (tmphost); + newlocation--; + newlocation[0] = '/'; + newurl = strdup (newlocation); + + free (freeme); + + /* Close connection. */ + fclose (stream); + + /* Reconnect to server. */ + if ((NetConnect (my_socket, newhost, cur_ptr, httpproto, suppressoutput)) != 0) { + return NULL; + } + + host = newhost; + url = newurl; + + goto tryagain; + } + } + break; + case 304: + /* Not modified received. We can close stream and return from here. + Not very friendly though. :) */ + fclose (stream); + /* Received good status from server, clear problem field. */ + cur_ptr->netio_error = NET_ERR_OK; + cur_ptr->problem = 0; + + /* This should be freed everywhere where we return + and current feed uses auth. */ + if ((redirectcount > 0) && (authdata != NULL)) { + free (host); + free (url); + } + return NULL; + case 401: + /* Authorization. + Parse rest of header and rerequest URL from server using auth mechanism + requested in WWW-Authenticate header field. (Basic or Digest) */ + break; + case 404: + cur_ptr->netio_error = NET_ERR_HTTP_404; + fclose (stream); + return NULL; + case 410: /* The feed is gone. Politely remind the user to unsubscribe. */ + cur_ptr->netio_error = NET_ERR_HTTP_410; + fclose (stream); + return NULL; + case 400: + cur_ptr->netio_error = NET_ERR_HTTP_NON_200; + fclose (stream); + return NULL; + default: + /* unknown error codes have to be treated like the base class */ + if (handled) { + /* first pass, modify error code to base class */ + handled = 0; + tmphttpstatus -= tmphttpstatus % 100; + } else { + /* second pass, give up on unknown error base class */ + cur_ptr->netio_error = NET_ERR_HTTP_NON_200; + fclose (stream); + return NULL; + } + } + } while(!handled); + + /* Read rest of HTTP header and parse what we need. */ + while (!feof(stream)) { + if ((NetPoll (cur_ptr, my_socket, NET_READ)) == -1) { + fclose (stream); + return NULL; + } + + if ((fgets (netbuf, sizeof(netbuf), stream)) == NULL) + break; + + if (checkValidHTTPHeader(netbuf, sizeof(netbuf)) != 0) { + cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR; + fclose (stream); + return NULL; + } + + if (strncasecmp (netbuf, "Transfer-Encoding", 17) == 0) { + /* Chunked transfer encoding. HTTP/1.1 extension. + http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1 */ + if (strstr (netbuf, "chunked") != NULL) + chunked = 1; + } + /* Get last modified date. This is only relevant on HTTP 200. */ + if ((strncasecmp (netbuf, "Last-Modified", 13) == 0) && + (cur_ptr->lasthttpstatus == 200)) { + tmpstring = strdup(netbuf); + freeme = tmpstring; + strsep (&tmpstring, " "); + if (tmpstring == NULL) + free (freeme); + else { + free(cur_ptr->lastmodified); + cur_ptr->lastmodified = strdup(tmpstring); + if (cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] == '\n') + cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] = '\0'; + if (cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] == '\r') + cur_ptr->lastmodified[strlen(cur_ptr->lastmodified)-1] = '\0'; + free(freeme); + } + } + if (strncasecmp (netbuf, "Content-Encoding", 16) == 0) { + if (strstr (netbuf, "gzip") != NULL) + inflate = 1; + } + if (strncasecmp (netbuf, "Content-Type", 12) == 0) { + tmpstring = strdup(netbuf); + freeme = tmpstring; + strsep(&tmpstring, " "); + if (tmpstring == NULL) + free (freeme); + else { + freeme2 = NULL; + freeme2 = strstr(tmpstring, ";"); + if (freeme2 != NULL) + freeme2[0] = '\0'; + free(cur_ptr->content_type); + cur_ptr->content_type = strdup(tmpstring); + if (cur_ptr->content_type[strlen(cur_ptr->content_type)-1] == '\n') + cur_ptr->content_type[strlen(cur_ptr->content_type)-1] = '\0'; + if (cur_ptr->content_type[strlen(cur_ptr->content_type)-1] == '\r') + cur_ptr->content_type[strlen(cur_ptr->content_type)-1] = '\0'; + free(freeme); + } + } + /* HTTP authentication + * + * RFC 2617 */ + if ((strncasecmp (netbuf, "WWW-Authenticate", 16) == 0) && + (cur_ptr->lasthttpstatus == 401)) { + if (authfailed) { + /* Don't repeat authrequest if it already failed before! */ + cur_ptr->netio_error = NET_ERR_AUTH_FAILED; + fclose (stream); + return NULL; + } + + /* Remove trailing \r\n from line. */ + if (netbuf[strlen(netbuf)-1] == '\n') + netbuf[strlen(netbuf)-1] = '\0'; + if (netbuf[strlen(netbuf)-1] == '\r') + netbuf[strlen(netbuf)-1] = '\0'; + + authfailed++; + + /* Make a copy of the WWW-Authenticate header. We use it to + reconstruct a new auth reply on every loop. */ + free (cur_ptr->servauth); + + cur_ptr->servauth = strdup (netbuf); + + /* Load authinfo into cur_ptr->authinfo. */ + retval = NetSupportAuth(cur_ptr, authdata, url, netbuf); + + switch (retval) { + case 1: + cur_ptr->netio_error = NET_ERR_AUTH_NO_AUTHINFO; + fclose (stream); + return NULL; + break; + case 2: + cur_ptr->netio_error = NET_ERR_AUTH_GEN_AUTH_ERR; + fclose (stream); + return NULL; + break; + case -1: + cur_ptr->netio_error = NET_ERR_AUTH_UNSUPPORTED; + fclose (stream); + return NULL; + break; + default: + break; + } + + /* Close current connection and reconnect to server. */ + fclose (stream); + if ((NetConnect (my_socket, host, cur_ptr, httpproto, suppressoutput)) != 0) { + return NULL; + } + + /* Now that we have an authinfo, repeat the current request. */ + goto tryagain; + } + /* This seems to be optional and probably not worth the effort since we + don't issue a lot of consecutive requests. */ + /*if ((strncasecmp (netbuf, "Authentication-Info", 19) == 0) || + (cur_ptr->lasthttpstatus == 200)) { + + }*/ + + /* HTTP RFC 2616, Section 19.3 Tolerant Applications. + Accept CRLF and LF line ends in the header field. */ + if ((strcmp(netbuf, "\r\n") == 0) || (strcmp(netbuf, "\n") == 0)) + break; + } + + /* If the redirectloop was run newhost and newurl were allocated. + We need to free them here. + But _after_ the authentication code since it needs these values! */ + if ((redirectcount > 0) && (authdata != NULL)) { + free (host); + free (url); + } + + /********************** + * End of HTTP header * + **********************/ + + /* Init pointer so strncat works. + Workaround class hack. */ + body = malloc(1); + body[0] = '\0'; + + length = 0; + + /* Read stream until EOF and return it to parent. */ + while (!feof(stream)) { + if ((NetPoll (cur_ptr, my_socket, NET_READ)) == -1) { + fclose (stream); + return NULL; + } + + /* Since we handle binary data if we read compressed input we + need to use fread instead of fgets after reading the header. */ + retval = fread (netbuf, 1, sizeof(netbuf), stream); + if (retval == 0) + break; + body = realloc (body, length+retval); + memcpy (body+length, netbuf, retval); + length += retval; + if (retval != 4096) + break; + } + body = realloc(body, length+1); + body[length] = '\0'; + + cur_ptr->content_length = length; + + /* Close connection. */ + fclose (stream); + + if (chunked) { + if (decodechunked(body, &length) == NULL) { + free (body); + cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR; + return NULL; + } + } + + /* If inflate==1 we need to decompress the content.. */ + if (inflate == 1) { + /* gzipinflate */ + /*inflatedbody = gzip_uncompress (body, length, &cur_ptr->content_length); + if (inflatedbody == NULL) { + free (body); + cur_ptr->netio_error = NET_ERR_GZIP_ERR; + return NULL; + }*/ + if (jg_gzip_uncompress (body, length, (void **)&inflatedbody, &cur_ptr->content_length) != 0) { + free (body); + cur_ptr->netio_error = NET_ERR_GZIP_ERR; + return NULL; + } + + /* Copy uncompressed data back to body. */ + free (body); + body = inflatedbody; + } + + return body; +} + +/* Returns allocated string with body of webserver reply. + Various status info put into struct feed *cur_ptr. + Set suppressoutput=1 to disable diagnostic output. */ +char *DownloadFeed(char *url, struct feed *cur_ptr, int suppressoutput) { + int my_socket = 0; + int url_fixup = 0; + char *host; /* Needs to freed. */ + char *tmphost; + char *freeme; + char *returndata; + char *authdata = NULL; + char *tmpstr; + int httpproto = 0; /* 0: http; 1: https */ + + if (checkValidHTTPURL(url) != 0) { + cur_ptr->problem = 1; + cur_ptr->netio_error = NET_ERR_HTTP_PROTO_ERR; + return NULL; + } + /* strstr will match _any_ substring. Not good, use strncasecmp with length 5! */ + if (strncasecmp (url, "https", 5) == 0) + httpproto = 1; + else + httpproto = 0; + + strsep (&url, "/"); + strsep (&url, "/"); + tmphost = url; + strsep (&url, "/"); + if (url == NULL) { + /* Assume "/" is input is exhausted. */ + url = strdup("/"); + url_fixup = 1; + } + + /* If tmphost contains an '@', extract username and pwd. */ + if (strchr (tmphost, '@') != NULL) { + tmpstr = tmphost; + strsep (&tmphost, "@"); + authdata = strdup (tmpstr); + } + + host = strdup (tmphost); + + /* netio() might change pointer of host to something else if redirect + loop is executed. Make a copy so we can correctly free everything. */ + freeme = host; + /* Only run if url was != NULL above. */ + if (!url_fixup) { + url--; + url[0] = '/'; + if (url[strlen(url)-1] == '\n') { + url[strlen(url)-1] = '\0'; + } + } + + if ((NetConnect (&my_socket, host, cur_ptr, httpproto, suppressoutput)) != 0) { + free (freeme); + free (authdata); + if (url_fixup) + free(url); + cur_ptr->problem = 1; + return NULL; + } + returndata = NetIO (&my_socket, host, url, cur_ptr, authdata, httpproto, suppressoutput); + if ((returndata == NULL) && (cur_ptr->netio_error != NET_ERR_OK)) { + cur_ptr->problem = 1; + } + + /* url will be freed in the calling function. */ + free (freeme); /* This is *host. */ + free (authdata); + if (url_fixup) + free(url); + + return returndata; +} diff --git a/rss2ctdl/netio.h b/rss2ctdl/netio.h new file mode 100644 index 000000000..2ef3638e7 --- /dev/null +++ b/rss2ctdl/netio.h @@ -0,0 +1,55 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * netio.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef NETIO_H +#define NETIO_H + +struct feed; + +char *DownloadFeed (char *url, struct feed *cur_ptr, int suppressoutput); + +typedef enum { + NET_ERR_OK, + /* Init errors */ + NET_ERR_URL_INVALID, + /* Connect errors */ + NET_ERR_SOCK_ERR, + NET_ERR_HOST_NOT_FOUND, + NET_ERR_CONN_REFUSED, + NET_ERR_CONN_FAILED, + NET_ERR_TIMEOUT, + NET_ERR_UNKNOWN, + /* Transfer errors */ + NET_ERR_REDIRECT_COUNT_ERR, + NET_ERR_REDIRECT_ERR, + NET_ERR_HTTP_410, + NET_ERR_HTTP_404, + NET_ERR_HTTP_NON_200, + NET_ERR_HTTP_PROTO_ERR, + NET_ERR_AUTH_FAILED, + NET_ERR_AUTH_NO_AUTHINFO, + NET_ERR_AUTH_GEN_AUTH_ERR, + NET_ERR_AUTH_UNSUPPORTED, + NET_ERR_GZIP_ERR +} netio_error_type; + +#endif diff --git a/rss2ctdl/os-support.c b/rss2ctdl/os-support.c new file mode 100644 index 000000000..808aeb5de --- /dev/null +++ b/rss2ctdl/os-support.c @@ -0,0 +1,92 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * os-support.c + * + * Library support functions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include + +#include "main.h" + +/****************************************************************************** + * This is a replacement for strsep which is not portable (missing on Solaris). + * + * http://www.winehq.com/hypermail/wine-patches/2001/11/0024.html + * + * The following function was written by François Gouget. + */ +#ifdef SUN +char* strsep(char** str, const char* delims) +{ + char* token; + + if (*str==NULL) { + /* No more tokens */ + return NULL; + } + + token=*str; + while (**str!='\0') { + if (strchr(delims,**str)!=NULL) { + **str='\0'; + (*str)++; + return token; + } + (*str)++; + } + /* There is no other token */ + *str=NULL; + return token; +} +#endif + +/* strcasestr stolen from: http://www.unixpapa.com/incnote/string.html */ +char *s_strcasestr(char *a, char *b) { + size_t l; + char f[3]; + int lena = strlen(a); + int lenb = strlen(b); + + snprintf(f, sizeof(f), "%c%c", tolower(*b), toupper(*b)); + for (l = strcspn(a, f); l != lena; l += strcspn(a + l + 1, f) + 1) + if (strncasecmp(a + l, b, lenb) == 0) + return(a + l); + return(NULL); +} + + +/* Private malloc wrapper. Aborts program execution if malloc fails. */ +void * s_malloc (size_t size) { + void *newmem; + + newmem = malloc (size); + + if (newmem == NULL) { + fprintf(stderr, "Error allocating memory: %s\n", strerror(errno)); + abort(); + } + + return newmem; +} diff --git a/rss2ctdl/os-support.h b/rss2ctdl/os-support.h new file mode 100644 index 000000000..270583fb9 --- /dev/null +++ b/rss2ctdl/os-support.h @@ -0,0 +1,35 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Oliver Feiler + * + * os-support.h + * + * Library support functions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef OS_SUPPORT_H +#define OS_SUPPORT_H + +#ifdef SUN +char* strsep(char** str, const char* delims); +#endif + +char *s_strcasestr(char *a, char *b); +void * s_malloc (size_t size); + +#endif diff --git a/rss2ctdl/rss2ctdl b/rss2ctdl/rss2ctdl new file mode 100755 index 0000000000000000000000000000000000000000..ec813ca1169c66b160e4e1072e9e76d3c003cb25 GIT binary patch literal 39984 zcmeIb4R}=5wKsl}Oklt~B-*H`R7Zsh3Lz*cB5EKEAIitbho99kBoj_B$;8YI1{H-4 z(=r`nvFfe2)fQ`Oty;Cc+Nu?sh!SnBMQbg#T#7AiPnxJvQ;mwu`}?iE_n9*x>Am-T z-v9sqJpboCu(S8tYwfkyT6^t}bM~HdZVoP zHmsi7SU07yp*hi35sOxwuF9fnk$u6^RZ^C6Fi`Glgart^=Bmj0JmM1&=4<#U#I#K* z!dVC>A0b>?KmQxT3ppf8jooQH54!r2J_(`(d!vp~rpy2A&- zM+2s>v0ZqvZAMG#&_5G&7V+X+h)*3vHqrx6LV66sF$i!8@df&Xxuzg-4#F1@jx(8v zt906r_!NW+4SS=P3KaAJBHA>q6+@Xgqz8_!~6*u7=|pzC*)(8eW8$^e1chM;iYTVqS(n z+lkjjTAzzFeHeKxe-e?njE|j#bW1zN3tUH_MX%EI59#`hZ(k^Q%X(Gg57YQFG<>PX zb3EZiUuAu#0&XiU_OnkMZXIpis_8$`<)>--TQz-;#L;0mAb%{sLY8J`L~J z`0F*lM>Twsh9A-JgPMMqhAB6%Yc;>SwY^G!;6@ixz`GmhOkoprqG)~iV-XPw>oYvOVc)FU-!t*X$I%mnEY71HOTO*Nq z(OM=EEr`UIN2BpzW27n49Jj&^%?)v)R>WItn_8G!9;uD4i?l9@G(}t2Gh=DAF0!~G z7DrJ~H8iiW!u2R-g%>PdHh0eA@Ur>yR|HpvSI(KcI2dLD)@X$wU9p?X>Dzeh8v@`HSvaMGkG?Vb&RVoJETGm^LKpbtg>g%E{k!GtY($oY^ ztypslw2IeT^=l&W7_&(ok2XfHi-0&1t+$$L8c_o|#G{EuD;BN2CK9(|kw(f;U*DLB z*+e$i*2E$-W6kPlD-fD5>#qTMycPcy9cY9e6+tP*JXTxNT(8-nFqI`#muR6(F;>`G z1Ixr4nqb~AO$syC*EB#N)q0TMj>h7v*EiRIBxcmAZH&eMf+p6|jJOVFAQ+3ZSX3F> zL|fvP9gjB0tVkOiqFxMC6OW>m7cE1B)ipGS6R}7g>ZM(E6E@W}kY8&ftO>)kw7_1h z#)jF%2O@Pf@fws_9gC?VRyYC;7A#&gw>o?}+R>z@(S-}~uLz+4fshZu{A0gkuECjn z#;T;sQ4}goTJp>NE0FTcSN~XpS`RBOst*1o<)6Tr<9~p0Lj#r(KI=y$#6=!Qpbrwy z>u|vrSw91wd5XT!rLV)-HX^9TykaacICd0cGKnL9B>1#3M%fz~7g^Ru#!z}QV=P{_ zFveo#7RDGgw=%|}>JG+`>@LPwINr?|i>Pfn|9gxvNIt;0)UqCC3;`cwe7I#j&KLv8 zlZ>&*dxkLrCnh36`~= zaRBRg#wS`CxRk&YbJjb#^j4`>aWIWHZE@eF5vR0-4n(XyEZ=ngD*=RrK-CE0fX@`}a z(Fo156I$>W+F98NainY}yZ2>rO=u%bT=y=4ZzN1t>h2Jj9$}*etnSAIrf1jz!rKI< zhuE-~)xA~VTL{AvR`*7Mw-ScNR(FfQcMz^3e6_%L5nf0*B=FsYLxigYrbpSA60Q`O zo@HN6I3O@R%%(!!Wdc7;xP`DK@MDDA2=^UC#)QWSZzQ}=;3o-hA-qfAX9#a4+#xVM z(!PuEV*=AN?QMj&2}}>QA0WI{;Fk$MMtGyZy9hr?xJBUC33m{_THxJ;UnCq7cn{%S zgsTLmN87szR|-tew)YVZ2uu&R9l~V-_Y&?SYzf>)cz|%h zA0S*txI!0eIsU4$PKm_5_pMtGaR?4kApgtrRJo@zfvc%#7V zvG$XMTLfm$wL1u3Eiikq{UYHI;AwAPm3}L^zb~|Mxm}5MpY8Pd{1;tp`~KncJLjW8 z`?qdIUthrnx31WkwzkQm@62 zPJfu}@ZCo^`+=3*I{^( zdN%di7{I$ykAIrYlC&%Jn48+|ruMk0eL9s1`qQZ=0c3(D>C`hww5J~E$!623=aKG8 zy-2z;%Svu2x8kER!SXJ02RPP>2iDHCUxazqPHh(!A?H5W24baC-vf)D;|=~jgHL+# z$?c@F;?tAoZl;0a=VqicY$qoMjNu7Wsm?oyHa-p!K*ufsd6g}yVf#%KmEj0 zZ$@fQc8o};?n05|cK-F3e&sI6ow<`h#&_rR_ff&Vw{){Awq+vr^tk+OV3{D}%d?Z#)7|IA~k@!T&=~ASb zI~4`pqo8c)#_n0_F~xU85sxVjDn_Q&j)G`*3C*e6>pLHdNJ>F={+^g>>4MVK0=9q7 z%JU3s=tg;}j9PAcQZ26Q$E_o$8}8 zdrQ_@*+h9ZQI@%rrpACWX;;$mqfLo-E+NtLfpm)2%E(`LjGW2tl<+(EDKXQ**CnIG z`MSy2g$!^kebx#&zm`m2$dmQWkEAI=_V1wc+M@R0?vOJ=95H}2JTyf1D-E~)OtAl) z;Ol7I1xP_lcAdnoY@#IOgq8oMIueH|Tsru2&nfMzb}I-y-@iQMj8d$Mg8fzLV1Lh} zT|qwlfN#AlJ_jPLQb0+T4jxEuIAA5Fqb5Q6oSh$1W?#0_d00{X z?#U6{uL)?<9EYFg2vVBhbs?ZTXImyZkE&cw^2(u^`AwCHi4-Gd&*(0Re?wMJFh`5O zJI8yH=FK%$uJQvp0>37>B#&ThziWWMf1nI-W*$K(N3c^99GOSZmLvGSCg@jwB0`Q^ zTZK6WB8Qyw)D)^gt-@ou;`O@t6Q1HF_NzI9peDE@kKpYb!FWX=-l+MPu*FpikI>jl zbM~2+!#eMa`aQF9*tQ(@mm13@ist-y4*RgeiZ`xE_LkEH6D3{2K3K}?o+S1{bNkWN z=P1&2a3B*Lpto{Z^JiOp&JR=rWv=94#Gw@3Z@_u~LbNAVRMi+Y&>8!kjhYN&R$Gac zI7;X>^ZoCM4(V#b$+ogV=obvSOwc|nagrov7bGyfqdKgpoQKq$tcCB18q!fF=Sn@W zJ&zl6NCv;1gu?d34_!mwqv}D)uBzEIfhl@YPTQwcV+ajqXQ5$~3HGVQvl{EVf&-0M z1SZe5@i&_|9!A?Zb1!UL%nb5I=a4iDD>4hi`m(K*yQ8S|r&jv8&Oc+!1XKuU)UI@A z=UX&Dl?Nw?Q7>Xx&8!0LfplRq==ZgwgZh&jVCA1k(^6Qg;N;*MCaQethkpI8r2Gf> zJbjm>O4hABoxHoMuzaI3NMTvdhX7{m_GhX%d`mrLo%@Q4P}|(H%-phbf(K$FP$FAZ zo>}78c<1v)bIa3n${|yyQZUE0nS|=W8qFuRw3gwFaz(PFvD=dF;fQd z0p~^4DBaJ1Ob`b&;!%Y_8?F_<=olMvZst0T%U@-_00u`4{D%+ae zPeTjbbv@9=i(t4e_<%b%M@d9O|mFnzX15PAQKo$-;DYNj0=8^-2oJVLg zn!~qt=32C>BK?9Y-g7kaQ9YMBitp$g-!X=GisGADRg!!zgiLw?R<3|n3RDi4qqW^S z@i0gUhEOC0xTG_W&HL2hE=exR?p|`mwsj1-V@(Q|7qGs=?#4uwA_ZiqVv)R3;L#Px#G@=ogE_!cBS8# z_R5Jn)4wq!$(=q!^}(i|2N%vfx^qWK!H)FKX>XjkH~sq@nctATx2f}wErIc!orf2^ znch2X=ZUYRyK-c1o$qh@#}U)!p4a)@Q3byoELU=8nc>zq<)Xgi=wU=FH&h>N+Hva6 zuPhiww6TWly-nS3KlPhSh7oO?A%keywNDieBU-?a^%d{FYh%MOqK!9Hu-ZlbpT~z0 zZK5HAXv@B5S;L4n*^oiBxfAbOH;ibNhODpnq{FvfGmL1{4HZOdx^+$AFrv*gWDu?O znJ-^9jA*kB+54NGJJJ96A|)EVXz!3-)HmhZe|T$DXGbx7jZXB6*NNPgf@mF!e{p{2 zj#2P6I?)?mCvwdJU%Tk?tG>|LIR?H)=iYg!bHij8&s=m==X2$7v_s@_U8rx$eedp? zG>lwsGr`eTl+T07hLp>-1x$AHF;|^3q+A~7hG;W=lgADtn(IP+#ee(tTPF@9n%hh; z*{K6B&K^cIx8Fgu_3xj0*f65G&W+Z5yZrLtFrv9G)HmhRqsCo4jA-tV0MYzkEGrsD zG`HWOHQy|Zt{Fx&*STS}b^r0ZR#&uXd(#PyX33qN#(Z{XvDMk(Q%*MR6+_|b2a#?b zb!%Jaj>D9LO?$(Tc%AE{ntPf$JIj=FP1|WmT$@9b{;p#&QXH*(D<_IOYW7We=Eyl$ z3?Yg;#KEV2_?x{K4k3!iGTqxApEqg* zbg|BX8^3$P5Tdwl2a|m3hbzueqU`5?(V% zE{_rVig#RgMPwM!+$Mpkn$P&4Wf;+1_kd{0!^$wf3@MuHtq^VMcW=0U7}4C`(^tIg zrU#v=_WM(Z5zTdISS>nveYuF1+2GG4{6!$6t3jfaY|d=20ktroWp_j~QZyF$({ z)Lk#uqgdo}HT(2_E>uf!U!e&0x&&(r?4`JslwvQsxl*jmNxH?#gy07*fw$OgZmtw7 zcNV+F%B5I~A&_d?gIhw*57hlA3fLju$>M3@PyZ~7=~PE)>ib-bYh6Att9vZOSyVV4 zZ#eqAg81EB5oD~h<1a?-u~O{sIbI&MUv+b(*f{4Fx7au-_Lw2a$^Um#6-37)PiXWj zb;lu1v8k8)M@EBbrnqX2an9sH{6j8*SDRUGu80+I{wW4xBcevM@-c?MZ(k-h9P6~c zrEQovm3L1bC41Ga3UdS5#CTz_+vMf+Ds}a0Pc4Lt!Q9a8U2`*}yyt3DPIxKbxh0q9 zsdb&3D>{@o(_9@Ct5q(6SJ?AaZYDSukHbqhD|b-VD|6lv9cc#Tlt&E7;(&XzCN-Bk zKXZ#Ir~IQ!;H~)uH&=>{bvC-ilnZ^&CGZxzHJ2N34TmOLXY!SWQ*AV<^PrI!&NH!H zXJ+JieVKknFORjaJe1-SsC`d$2#7=(X1xhW*ab!Vov< z5USU?%mei~7b*ykf#u;6p2a|)@m)basYJK%+nd41{xbeJTNP3 zUZXKOOX;)0cSHE|4*$H(KfV0Z!$1GvpKktfLiQbyWbNU6Vt7zkS9+AcWW}c54rtXC zlpSoXVpnhGh@s{AOZ7(rSaky{a-hjUco+kN|Xge$2 zncnrmL(V;T$di^`7yBO|ZV#4+ob^VDzT}2JZn*LcGCO~<{WfUW{C^WU@jx+gp_rl1 zxkDAglNIp^JO}B;^TA}tNO6}B|5NOe3HBBR`?xX4mhW@^v`3rE>AoJ~v5q4916{|L z4ymKk)G;Ezj$*0f>qFOZgQ;VG_ffizV*4Xq$MJ{M@v`DFj2;z3*KxF|V_)}AU{LBY z!v0Lx@wAefbJLxI6xHiYJ)`n<;qF(Tvr{n~Y_CWEsO`0<`vO%D$Kq^aH{285JDYIW zbA2D7Z=52VUg)zW?Wvj2JJc?I0OB&+4+6Uce^9bL_#*Q5$(VkVY^PNwH|)W#%qBRA zG?U+XQmdy%5yyEz!37H5=iHSCmpNMutf~f=kP~e*hE%L zT7a_@P7h;_b4p&>eU7%j7O2b_Y49EaK2WwR*a3}2fIpiIEx>De@E+$m1ydkrqcjo> z-0OV$2i*qPz1m|XPREd>&8Q76&vDTl(iY`no-^Pc*~FfIQRuE*p|?ed zN)gdJaJmnV8kwQhExP{;@Fkzo_hN2dz*O}mf9Oq~S zX@PsHHIbD^^uCc=*zGr#!RAw?mB5zKK5Uj+JtL7oj3?*&%>0||9V-m|&ZAyiQYTopCeHwj)mq%$0b2B}(r2;y>>Y(aP8NO2 zDJo6f4h(!urc56O8(EzO`P`f7m%=? z#@o|p;o_?mc3&-q}BX#t{ef9rRzOKWFV^~2uWutR*HpsXh;8)Q~y zNeS-zk)E_Gji!cFeQ*^mEcQLIhh5yg4UM4tz=+=&=K=A?nWN)JNF0cxF9IGFFYKAX zGcGq5_IzICfRFlV7r%3%l7u?=dpb40pQ=pR(JU!QoUJ>7IQ9YbcJ!

B^&6gG!D@ zB{+phV5fzmZQWGNEdVlA zGm~xD953$vE|eD&F)xE1-?=jtI|1>?Cfnk7-c@Q0MwB>j42>vrwhxUccfNNh0!IdJ z9hA#TuUC29OW1itWr`&7ga?r`x5QaAL~faL#t^yX&gTc^LWa@atZd6n=i}ch-{KDU zOqhv=>#8boQ|!DdPxim%%+f$|!vM|7pP8WzVdR2%ard2S#<#a&>_INIJMgM7=j!$w zWTTEmz(;|VssEwZlq*)3p!cn%^HRs^&x}kw8d18LKU-ttVy**rfhv<6C@B5%iNsuq z>J0K!g)FKrSqRTAO<98pM&$_5sk`?%_^7(#`iM z1BrBH&JPuVNS9PP8|liNTNJXWx{T5#>OG>DInf*vlJzX2SP$jIdTf`rWn$`~V$~Y4 z%4tU$E89};eEO-j<3Og0M+>^=;l993+*smyI@$YLY~Ab1oVW5b%AH**L)8A??_^)~ zvd#6r66e~X5oOM$LnF$a^AAP9!6yyMr5hfp@`mbUpD3&5bv>+ChsZ5+UKt{{+<9_P zE@b$=H;bL@4wV7h{m(mDuQS($Pv&|2~+YFh{`7cH^MVR&B&fOgpr*eW;M>>b%Z& zt6IwUOe3qG<@)Ww%UaWfI$xp{ed1E=*XaR*bf zcC2%|&cZ6KFJpC8@u3ZRy`=TnEonV^OInZLl9rQNq~|b(-3p#WHKTeIzk14z>MH;Z za=0%TjY?BL(ACNU+|zo$)$&!tpTy-*D|=#tI_@cwCfjhnbMptdu}~mKcTlL3;&{K; zxUpV>g^Hl4+OJ0VpA1EGa*l`&cq?eAgvSAEe&ssg?}kxgj8OtB5y&sc^ssL5+t1`$ z_gKS*N80)_gYEDug_eVSnnmem{OHi5a*4A}5sAwCr5A!xPURA3l_GHa+b@P9s>~5l z1H zOUMdgWs#mZQnJ*z#haHttjT-MVzq#l&rFAym#G3VR`7*Ahi6vxsSJ_F-}4qBAxh8n zdDb*|ZlKEGNmDOs&USa8-mYRA*i(P$W}Y=y*+?Kot7`(ROZXT{J>RYZT8+cUD{b$) z3zV79^RF6=N2vUC(Y=B(!?6p=fb2ptB)gCd$_f$_YI9RL8F1pCS4a#jY82uM_=Ojw z8FQlw+WSyCzjKeF^9vK*0?Bhl)x=nxDOyYVJ|cSNWsamC$_?kLqPd1RN@GRUD%?-u zX3F352PI`lQL{zZcxe8_)`R@Z(qH@O&^Iay-haFZp17CKRF%k23&5thn@x*6 zegDFrm9f9?%9WpO%ImosOeqlO?p5ze8?b?5Q#86{>ZKN4KlpQfgh|(QU6e z)2%D*`LVWm?+%d`7DwY)=C^iiqSqA09mx8D7Z1{R;9oK@yh7Xz90d>0Qa83OO4&jA zSpN^1?{}&W$;Zn3wERr^z*v>l-7JRFmCCL3L1h8AMr4*b-G^rSo!9d-QPFd5PId-+ zjq_ubA)`+f9{7N(ICz!w4G(7ULH|!X#h~oTY<=^{W53C+f~jFScxkVUaq9WVC7XK3 z!*%cNgAd?GTj*e2seb$=CC>*~PX+L&eR~^JKJdXq=@;=^Euhl_>C}G2UXlU8nuI8l zkd!CSi~GU8G=-TJ)#EW0&fou|SW?M-CzL+4`A)Kj%bhRnqejA&hmU}Yqg*`S_UVxxGv zjE@+pDe9BwZbculN>jg6lc}o9Iaj-RYCoux z+xgc@;CDOH4Jw^6zmPfrYx*UMUe&`9$Wu=p>*-24;8BRPR3b1+hLnFiUsj(^{e!jO zB)wFJkz6HkfInN=E(LL(E9BVHzGay!%cyOc!pLM*>5S6?S5Jft$+n5Oe|Z)9I?q1~ z&NE~2ob1b)qknfyyE?aNwqhf$ggmnIwMeKeJA=-N$pu?I&XHUef&nI*O4lL>wh!WZ zj^gU!^+oW46QD{~f|Bh5K!LJk$HWkRV1U~+{(xz#NC-F5iugqcZTi~>SM-`{3uXG^ zeDS+=qt1wPAhZ=Fh8oClXrrDzm~)$#^Pook8aR6zwU}Ncjk-b=`foSt?W#G3Y}5r> zU{9lRx5|0<8OWd4s2iB$ZPaHp+o2luw_^VMMy>ipG01DwQwDRbS6n@f`V}$wf1^=< zF1$>m#?Nv+=eeH`61~7HdVYiU9EW1M%l$z$my!iiU|3<-b{E_jR`C8A^gEnKBDyOT za8u=Os!XSn8v<6M3NIg!d+Fn3OS8mY%OOYowC^+}W{>)LUO76coy=iJD{NP4zvd^L zDy{e|Nldrm)0n^t>vk5xCiP*;wyBQz(ZFO|rc0G$@$+WqsJFC5{Px|TR-NL)rzI1U z4r;&v%z8MmmAeC5iLSc)C@kG`0~*cAXNa)T3L=7vNF%Z>5Vs^dqXoVX>FsU8fs#3&Jw<5)J&hdIbe)!Ar|KlA*lC2+ zdEgx_n5vVYB4R49w(?IXuhjN6Rh#&RWZWtlW!-AhMVmtR>%`-f`U9pGB?>`Ri$;hj z{f0=1LQoND#31z{oZXU>>C-1^fy$mnqkf-Zr|KlA*lC2+*=vYYodgvTQ(pC1TW_wm zcBw6pv%oEq!HFU}gR3POyUU%+J`^wF*zF-cLHSl!%5|aNqU*4%*w@|&7iv#kiUtcg z)ylM3Ie8fDRhb?JUl#`I1w5vxy-zBK%byCPz;F4ggq+C#eh8B#!UXe#s@ndUluxHt z(v#$GIu$}1lJe(im`_B+KE;T=?V?X{SVJg7&Slyp+gaR7oHLXRN1CD-WyslyA0iYt zjagw)_6agFl|D4MtTNNp<1qhFjGIp4drlG5jI>!;fo43XxEDTtlXfdUHprEqdr0{Y z$H>cF(8MxW6g*+vvbdYZ%lSRukb+eSa4 zCf_)O$%SB|B%sUr?G^CnkaL1ki4PBEU|4{zM#|vhQb9UZ386FcH=UY^wAh20&Ucl% zMS@H^IGG?~s-8lMkTX@0B)1c^5U5{sd0G`O+&8G&Rijp zOP708W%FmdU?s{v{1e?UN+x8B?T%Ekz^t3v#!X=_NrmxO%86J%`=gYDk){vZ3gX3A zs5(7HFGW9+WsOMPNU7CQ=4vGiDgjHbx3v}=h>pW}m8<>3A1kSgx?eKQG1jU150$B! zL!?})vZOf*RdX0V(jqr$?Td1(zol8H=Alh8@;9BDE)C|lY2tRt3pw94(&lGpOg2nO z7TLsD`$y6;tWhbpM#Kt8rDW~gujI_6D7EPrhA;C%&KZVpe)c$(ZAvZ%EjzaoRF(W# zX_VYf&@#h>+gGHOo=|K&t;E+pihyM4q#jeYO)Uh}E@Ec=s?wd-)SUIMG;~y5vc;Wt z8sT9SsV|tzNZ>aR7}P)+&pF!?dR<1vaaSJo3$SDf5>Pmmfko z#-+@kQ7R9i+>a?Ext%ks6`!J}SW)WE@yO~*y^hJX4jJiP|NhZ-0_`&QPAcos!+1Xb zU8-?BGH=fDFWp={kj-wDHWO2R=@Dsiv_f(BVS4)UF4^wKUV*;dR-P=S8V{)>z&hL| zn|IdP`lzk}50-z38>+#x(ylj*1VheB1&37i1XcyPS?O^5Y4p30lU8(C8BQ=rzx}eH zjyDo;sCQsf4)un}w|2Dsx{xg}97!Rwha%6@hXAU_#hr|S%hdMEb;!&IYa=si=CT25H^s4-v&C2XQi_Jc#1kShm z2ZDM)83~>Rs|TP-zST?Y0U>);vF2)m$0cA3W95$?`fpnKpt8^qR-UeycfSQIcggsU zgm_@_0j>nZz*%izHU4ksP|Y_13aG3gtrKwmrZmcHo#k4{>VU_}0q5t6XHj+Fkk$z} z-_9ZPTPNTobEv_s6L9Kt$h_8Bs^s#v&Pb>iauTKqN>hAi6V!l5%hEcr z-(j&w^XsuYtt#^jBj@75nW+Oe6T6k>8=sl@5x9&_9P7b7qi{Pn3(i~WyZ@rpmj{=k zcsT2;yT^1VqGVDHVBBQa4|V^Tu5)saGA`VuJ!mo*Wm5c$$8YZ-b36YQ#81+Ro5!d` zHeT1ArWpiZ7@yp86O4v?MXdQfwrNs=dTetd9?&UDZe!-GSbYfPM|hm5C~lZ1#edspXhu*_@G;!kV73O zsQV47H1)@i#YixizDIPY(ziTvlsUh3A)MVlO1DI+U(C(F@YAkTAKBz7TViU$tg}ln z`40*z|E849e^BtD;_p$A@Bdf!ksF=01;!~JQWd17&6C^N^4u+7gULtvL6Jy>hx?S$ zY4l1i(-=!=W*H$Z60Jf`jUvt5sVJyhLD?o%1KPfH^HFk7gldSw>0njEXm1T(jvN8_ zsz*{|$2jLYepMp5oimmd$Ilb6C?%C)x#cz8l8IBKlxQ+ni|0P!$XToB=Hzx6hZAez zfXNPjs691S@grL;=)-2rheT=8-&j2Tk-3z|&Wraf+umS;N|^_Q=X zm6t_L9Q@;e{qz$3^E`2Mzcb@E#$5y0Xwt9Sl9W8Z=?d;Mz6jXgliuQc|M+Fd#aF-CUYuUyS6r4JnrDd+^(s4cAVSovIKXQd|!B^ zu^)wV=&A<3DXPyYQW*5M)3Ec;-&}&*$q=zU*cWnwrf&4(ea=Y3Wxwd&=Y04*&9)!A z)pGC!r;BG~@$M`h4wv;f6b&gRI4~|#&5_!0XXmo@WWhIO;RjTxs~_joL(akdWX}aS zd<(g~PVo=4EdcFzJ~Fxv;63h|>t9q}t|@c_;1#G?{OKC@OC@LpRnOaw)RPW6{1rsv zb10l_8*lNBy zea@50FX?k5?RxRKKBrhoht&{T0S^W3W{sF^c#=A%Gk>o#Ic|Sne1?x$ZqUf5bI2`% zgh~3GI^_}P@?DnKT})phGrbk(9A(Uhms^4W1q`O~yGCbnIo?Qvr55)EL(VM<4+Z(l z2C3>}RaIAt@36%@Vs&35i}#YX^PDNh9{9C!zouWLS>WVlzk8$!Dcl!u!b6I;DJsP| zQl;dWk6_2TdE?wvz)k79JaC!l=1q1}m2PUfo0{pStL(IFF($WjgUU)Q0YA8f^XR>- zi+**dN?{z9W>BvBeR_a$)fs{h=yKP&#N$|byNSi!53?JXtz0=;YH!c+`|CFC z20oh;vi$;4Pf$ANmj;Emgy(V}H@S3X5B1%-e9Qi2j()SrhiA3HZ$i%f%CLG%8ArFu ztoYd;3E{&$U6K*qN4Vw$oq30_LFZ6{Tsc?}yzcuD z4&_eaAsosLhvNowC>er7iPIrA@T%e(<4tut0-ZI__S9~%(r9(-DTsy4F^Ipby%F(5 zaX^e{c-;X>>{oMkFZm7|Hn0|L&sSW2CLmw8nU`U`K)kSj^>4Yn%q$c9*eSP=Lqjsge-nhF89kO7rQ~; z-qM`M7N`Hji@WEe716M|10Ex7hPS|TyyZBBhhIpk(@mJ;g(`Eutqs3*Em0r%pu27YU1TE*!V({f)*lFuRq`0bxa-K@Zb zIzDQ%g3FgLTRtmrJ-*NrPBhm>n_5~Uu^7>T2{BM;{L*M3nuxa~;(@vZKj##v!xx_v zExw=xx~aGO{3jwXW_ZvOo3!S&sCivYV?$k_sb)<>ZD95KcqA66kG2LDT)rq2sKdv9 zD43?L&Q%g<;+Kl*0u8Z1a};t!G?^K?Q@OCKGBW)LKD) z%PAn=PQs_3pawpYgfCV>UFtfwrY_JLxi%4r#jQ}ydVcIm;D%Ow#Ar@!EewNdn;QTv zYQ`t3nrj*Z_?{9zd=*e1iCPM|8k$fITF7v+ImeueLl(%)sO-W6>VG>3Gnkx z2BAK2g(d;Eo*!QVVq$Zmv2hY?n$KU62I5hzgEE4mkq>2&3%?sx$EL4u)ax?|(6r)XRngb2m2MhyELIT+HAe8QELby^SRJcvZCD+VVmZB(V??iOZj7QS$t9fWpn<^QWVAO#)F8p|7#D0g;uZE%AiJ;RVsRVCfY#zr)W#TqTllQkN$U` zeLc{=RX_aavg3!{4~NJ5k5vZ8Z!Xq69d&X%`{I- z4L%VTh?`bKm9wm-x*1dODYuwKg{I)MX{xO!#H=~Uh_*IdFLs+1m|GLWh`_$sYE7++ zteeVjmqnXMZrg4DDu9I7GVEtpSE_58%KBG-CkHHkNv+AcHriq}N1JOSRxO6yS!#~Ee7>9cEj3h8k?Ge z3yddX*7AAtty&w78ELfeIl*{Sw639^!*?s35f=v?-YfCp0&m8aM6YjXY^<3&qoOh} z@zQD2&IznKC$N}b{0%gmIrGeOFv?9?wc?yW>$+LfD$c5yF)6Shg74r>J-xDW`m|}4 zX9ebCG^~%dO`V3%)Gdv~L(#@Y3nO?#w4xSr#WC+$6Rx;w)|Im+*mc%CJy3C|)V7-t zqw{+agag{$tk%=;QPJQ^^bi?-S4Rw2WQ_&zv#`!%kdA zz-o71m=%yO`;A_RNkdSJ>ZdHxW|^|00bM|Hl_3AeD^qN#f=A*1cXauGtWu>@z*=lR zCrQ=Z&VcT~>7ylzVK2tX7T-I>{Hc}}YVw?ni4w~my1{iA8!+WxhnY{!MHLm8qFh^5 zVaQyGF)>z;)-^2-4^K2-1E*dL4|m(rLX*c)OOA|w;pVPf-x9%hi7&b2k|`Y8^yC*o zk5JXctSa;exj3^VVl`_bvqsbLW^-Civ^Jg>nB5wUHgWhvvKim|tiQId8OhpsT_Y4b zZ!|uGi3u13LY_eg#F#rFHdQZfreYL~qDN0fw`7@$35|8ubs&JVs#%4fmPF-hHlH%( zJftz@)#6LpA(TO4(S%qP8lu)(iLykf>}8TyEq|BF-&OK=iED^B3^6MZkF>?7+VQ5w z(W9-|3s){#eBSK2%jR7M9az!qxyu1B2Q+(5VBzxMd>H}Hn-H5lbq+J93hnHvQczYr zFdS|Y zpg3;d*zWH>`=h-FaENUp?#ZnM)zz~C6Y-t+Nr5ve@Grnw1M|k|fr-npm|IvA*HevP zr<{cq`~NNSe=YX^4~MCUt#69gthOrfsb3Y_CWVPH(%NEG;O-$(F=y_gDe;;$R)t-I z&m&jVt#3v_72`9=6>FLk6}XLz@vF^VAPih=P0(b5>VD&#xW6&%~G3i|Lv(0vgH|F?3Sbc(AjS~u1HFG>ru&ROmUcpLh_HPBN)-vn6B56_8 zUlo2NBk7k4mSVDgRIn15{Y=4Jt%BZ#m1DEtC_GoSYCWr9MbCbqV4jq+Ec&~GO9iH1 zE0~r;zjk4^3fk9&xtdk}$hb(4u=G0xa}})C#R^udW%`kV`FPWXpQb%xArqrSBU33g8*10yg>;fSS9lM!y2<2rCG1b&BGw;?v|QDBu?-vR8E z=OMsx-G0=s6R^3c-)jayO!!ZO;I{|C9{{$Y4?UIo(SVH}*yrgO6l=fNvp4n}w9~%I(e=B&5#jk7fIvIiGlNx?k!*>Gy%U!tVLd^Q_ zN6a^W7=8}{KK~V;{}L_VPXU|$Q&#w1whQpfKlJ$@6@DewD}Z_3fx!CqX?T&?!?Hfs z@Q*ZpG=|KpQ6E6-@WP|4V*&F+-tz=sVx6jnRLkf)AFzY|bsS>Kdl}-Amwo;-z?X21 zroTvF_%h)1Q$D|{uf)1uVwFP?!Ish7Z$td9Ma=muh@gl#-6VOf9E@FN=kG2o~E?(<)u+iwJ>$X@i%h~{^! znli0jntlpk{tBIJ4##+P7GSSGUI3WifL|!|CD!G-{Iy!%DBx?ck)q^>KLDn^L5|V* zz3i6(^W*VGMxVO!#iZyiEUj3b42SUjn|xmG5Zl_nO}L+dnisU;F20 z8s<2}_9;cZW_}DnzheO(`4~2ipfKyN0L=9+WuQM!2h8gf1eWKI0~g<;#$VDe)%YK4 z{#OGoy3^^Y%7^?LHGGAJZw7qVPksKRz$Mll8egsP_iOm;8s4tqHVy9r zJmO*4U;Fc4H2!{#|4_qcYIr0h_uBVZz*`|NipzLD9`N&?@%#+HH+uZ*9KgJceU<|L z{jUIgw}*ZWVB=q9@UOTozZyJfkDE1Y#-rOb%sK)H-v{iK_vaeVl^e_d z3UG^C-%-}@G@k1S;{T#yB!%DmfEPaE^Q-<{VioH(c_r|qw`y^8AiSn2+8o9XQ)}F+ zZH;4(s=glEJK=Ek%4N&Lix;g}84d%Qmj_)??V?(+H^j3OjS(DG!0C)|xGoxA(->V{ z(-@Y=5aF6cn%+(mAg3j6=A}F%Nm*QeRUW zfz;916n$(0Ktq(9vvRHh5Vk<^H_}|!P}3Z)wXxA<)j{;ShBdH4El_xR?Ji(Rui5=ViyrbgKmRwZOBY{{HU!#qC_ULKscXnC-DG$&eC#Cbf$!o#Iz?4VRt zEm*v0Zgu!{$bqa{yVaYsAQE3L8_aWg2F0qc<2FN%U>Y_sqV+kr(n0}>E&J@cqVT-S zmd;tSsG98S5-ncpc@b=kTR0fdh@F3_z9|nc8*$zO^$pnZtc|vy#p_$b_H|Z$3zUiH ziBMnPn26clQX$f;;to*5O|ZB{JOh@%@` zR^f0>3>$~>Fb@fN5l}9|qch$-_-!m2#_nnz_G80g93ZL<*Txc38DP*iN5f_x%Uf{m zn((0GM($oId}V%XBr-2ri_KkZXhzK5Ry^7my)KehKX%aPH{b{k9SjB7n9*oFs1Dy) zVR`s>!GiGRaDec<;M`RU;6#m($14hoq8)3STF~FHZR|ziWKdIdUBsKUCKBf{oM=m) z(PHsB=<21bS&d^z-V8LxqGgseZ@2}|F=5VFB+nOk&c<6pk1uNDw;KIz;hbqybUCa7 z_g^B%maNq{4s+&o9=)lHSoIXo%R_oyxV5hS^h)$!wF}Og<9WgYZ1j4)>L{@mPe-V9 zcckZIcg}M8PRrxl3^V3c-x_V>lTG2n7D5$m+N-V}fA2j3&)rpl;OP%2` z9=vL!a4g6ZLiYYOM`0f(iC31exPzw)sJ7s?XMDXCk769jcP<{~800H3)SwZCPlUBh z=xe|yYU8TKWJFRu0e)25vfe8U9CuN3L)@w#GyqJq&@;G8>p6&LQEAkR<1_RdC`Peh zQ5@5w;XEpVtumMurp+6r!V!5|9$v6`+1xpc!^`H+U%~Nx&fLYpFuPj5uX9YY + * + * setup.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef SETUP_H +#define SETUP_H + +int Config (void); + +struct entity { + char * entity; + char * converted_entity; + int entity_length; + struct entity * next_ptr; +}; + +#endif diff --git a/rss2ctdl/xmlparse.c b/rss2ctdl/xmlparse.c new file mode 100644 index 000000000..6498d1f4e --- /dev/null +++ b/rss2ctdl/xmlparse.c @@ -0,0 +1,404 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Rene Puls and + * Oliver Feiler + * + * http://kiza.kcore.de/software/snownews/ + * http://home.kcore.de/~kianga/study/c/xmlparse.c + * + * xmlparse.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + + +#include + +#include "config.h" +#include "xmlparse.h" +#include "conversions.h" + +int saverestore; +struct newsitem *copy; +struct newsitem *firstcopy; + +/* During the parsens one calls, if we meet a element. + * The function returns a new Struct for the new feed. */ + +void parse_rdf10_channel(struct feed *feed, xmlDocPtr doc, xmlNodePtr node) { + xmlNodePtr cur; + + /* Free everything before we write to it again. */ + free (feed->title); + free (feed->link); + free (feed->description); + + if (feed->items != NULL) { + while (feed->items->next_ptr != NULL) { + feed->items = feed->items->next_ptr; + free (feed->items->prev_ptr->data->title); + free (feed->items->prev_ptr->data->link); + free (feed->items->prev_ptr->data->guid); + free (feed->items->prev_ptr->data->description); + free (feed->items->prev_ptr->data); + free (feed->items->prev_ptr); + } + free (feed->items->data->title); + free (feed->items->data->link); + free (feed->items->data->guid); + free (feed->items->data->description); + free (feed->items->data); + free (feed->items); + } + + /* At the moment we have still no Items, so set the list to null. */ + feed->items = NULL; + feed->title = NULL; + feed->link= NULL; + feed->description = NULL; + + /* Go through all tags and extract the information */ + for (cur = node; cur != NULL; cur = cur->next) { + if (cur->type != XML_ELEMENT_NODE) + continue; + if (xmlStrcmp(cur->name, "title") == 0) { + feed->title = xmlNodeListGetString(doc, cur->children, 1); + CleanupString (feed->title, 1); + /* Remove trailing newline */ + if (feed->title != NULL) { + if (strlen(feed->title) > 1) { + if (feed->title[strlen(feed->title)-1] == '\n') + feed->title[strlen(feed->title)-1] = '\0'; + } + } + } + else if (xmlStrcmp(cur->name, "link") == 0) { + feed->link = xmlNodeListGetString(doc, cur->children, 1); + /* Remove trailing newline */ + if (feed->link != NULL) { + if (strlen(feed->link) > 1) { + if (feed->link[strlen(feed->link)-1] == '\n') + feed->link[strlen(feed->link)-1] = '\0'; + } + } + } + else if (xmlStrcmp(cur->name, "description") == 0) { + feed->description = xmlNodeListGetString(doc, cur->children, 1); + CleanupString (feed->description, 0); + } + } +} + + +void parse_rdf20_channel(struct feed *feed, xmlDocPtr doc, xmlNodePtr node) +{ + xmlNodePtr cur; + + /* Free everything before we write to it again. */ + free (feed->title); + free (feed->link); + free (feed->description); + + if (feed->items != NULL) { + while (feed->items->next_ptr != NULL) { + feed->items = feed->items->next_ptr; + free (feed->items->prev_ptr->data->title); + free (feed->items->prev_ptr->data->link); + free (feed->items->prev_ptr->data->guid); + free (feed->items->prev_ptr->data->description); + free (feed->items->prev_ptr->data); + free (feed->items->prev_ptr); + } + free (feed->items->data->title); + free (feed->items->data->link); + free (feed->items->data->guid); + free (feed->items->data->description); + free (feed->items->data); + free (feed->items); + } + + /* Im Augenblick haben wir noch keine Items, also die Liste auf NULL setzen. */ + feed->items = NULL; + feed->title = NULL; + feed->link = NULL; + feed->description = NULL; + + /* Alle Tags im Tag durchgehen und die Informationen extrahieren */ + for (cur = node; cur != NULL; cur = cur->next) { + if (cur->type != XML_ELEMENT_NODE) + continue; + if (xmlStrcmp(cur->name, "title") == 0) { + feed->title = xmlNodeListGetString(doc, cur->children, 1); + CleanupString (feed->title, 1); + /* Remove trailing newline */ + if (feed->title != NULL) { + if (strlen(feed->title) > 1) { + if (feed->title[strlen(feed->title)-1] == '\n') + feed->title[strlen(feed->title)-1] = '\0'; + } + } + } + else if (xmlStrcmp(cur->name, "link") == 0) { + feed->link = xmlNodeListGetString(doc, cur->children, 1); + /* Remove trailing newline */ + if (feed->link != NULL) { + if (strlen(feed->link) > 1) { + if (feed->link[strlen(feed->link)-1] == '\n') + feed->link[strlen(feed->link)-1] = '\0'; + } + } + } + else if (xmlStrcmp(cur->name, "description") == 0) { + feed->description = xmlNodeListGetString(doc, cur->children, 1); + CleanupString (feed->description, 0); + } else if (xmlStrcmp(cur->name, "item") == 0) { + parse_rdf10_item(feed, doc, cur->children); + } + } +} + +/* This function is called each mark, if we meet on. As parameter it needs the + * current new feed (new feed struct *), as well as the current XML + * document-acts and the current element, both comes directly of libxml. + */ + +void parse_rdf10_item(struct feed *feed, xmlDocPtr doc, xmlNodePtr node) +{ + xmlNodePtr cur; + xmlChar *readstatusstring; + + struct newsitem *item; + struct newsitem *current; + + /* Speicher für ein neues Newsitem reservieren */ + item = malloc(sizeof (struct newsitem)); + item->data = malloc (sizeof (struct newsdata)); + + item->data->title = NULL; + item->data->link = NULL; + item->data->guid = NULL; + item->data->description = NULL; + item->data->readstatus = 0; + item->data->parent = feed; + + /* Alle Tags im Tag durchgehen und die Informationen extrahieren. + Selbe Vorgehensweise wie in der parse_channel() Funktion */ + for (cur = node; cur != NULL; cur = cur->next) { + if (cur->type != XML_ELEMENT_NODE) + continue; + if (xmlStrcmp(cur->name, "title") == 0) { + item->data->title = xmlNodeListGetString(doc, cur->children, 1); + CleanupString (item->data->title, 1); + /* Remove trailing newline */ + if (item->data->title != NULL) { + if (strlen(item->data->title) > 1) { + if (item->data->title[strlen(item->data->title)-1] == '\n') + item->data->title[strlen(item->data->title)-1] = '\0'; + } + } + } + else if (xmlStrcmp(cur->name, "link") == 0) { + item->data->link = xmlNodeListGetString(doc, cur->children, 1); + if (item->data->link == NULL) { + if (xmlStrcmp(cur->name, "guid") == 0) + item->data->link = xmlNodeListGetString(doc, cur->children, 1); + } + /* Remove trailing newline */ + if (item->data->link != NULL) { + if (strlen(item->data->link) > 1) { + if (item->data->link[strlen(item->data->link)-1] == '\n') + item->data->link[strlen(item->data->link)-1] = '\0'; + } + } + } + else if (xmlStrcmp(cur->name, "guid") == 0) { + item->data->guid = xmlNodeListGetString(doc, cur->children, 1); + if (item->data->guid == NULL) { + if (xmlStrcmp(cur->name, "guid") == 0) + item->data->guid = xmlNodeListGetString(doc, cur->children, 1); + } + /* Remove trailing newline */ + if (item->data->guid != NULL) { + if (strlen(item->data->guid) > 1) { + if (item->data->guid[strlen(item->data->guid)-1] == '\n') + item->data->guid[strlen(item->data->guid)-1] = '\0'; + } + } + } + else if (xmlStrcmp(cur->name, "description") == 0) { + item->data->description = xmlNodeListGetString(doc, cur->children, 1); + CleanupString (item->data->description, 0); + } + else if (xmlStrcmp(cur->name, "readstatus") == 0) { + /* Will cause memory leak otherwise, xmlNodeListGetString must be freed. */ + readstatusstring = xmlNodeListGetString(doc, cur->children, 1); + item->data->readstatus = atoi (readstatusstring); + xmlFree (readstatusstring); + } + } + + /* If saverestore == 1, restore readstatus. */ + if (saverestore == 1) { + for (current = firstcopy; current != NULL; current = current->next_ptr) { + if ((current->data->link != NULL) && (item->data->link != NULL)) { + if ((current->data->title != NULL) && (item->data->title != NULL)) { + if ((strcmp(item->data->link, current->data->link) == 0) && + (strcmp(item->data->title, current->data->title) == 0)) + item->data->readstatus = current->data->readstatus; + } else { + if (strcmp(item->data->link, current->data->link) == 0) + item->data->readstatus = current->data->readstatus; + } + } + } + } + + item->next_ptr = NULL; + if (feed->items == NULL) { + item->prev_ptr = NULL; + feed->items = item; + } else { + item->prev_ptr = feed->items; + while (item->prev_ptr->next_ptr != NULL) + item->prev_ptr = item->prev_ptr->next_ptr; + item->prev_ptr->next_ptr = item; + } +} + + +/* rrr */ + +int DeXML (struct feed *cur_ptr) { + xmlDocPtr doc; + xmlNodePtr cur; + struct newsitem *cur_item; + + if (cur_ptr->feed == NULL) + return -1; + + saverestore = 0; + /* Wenn cur_ptr->items != NULL dann können wir uns item->readstatus + zwischenspeichern. */ + if (cur_ptr->items != NULL) { + saverestore = 1; + + firstcopy = NULL; + + /* Copy current newsitem struct. */ + for (cur_item = cur_ptr->items; cur_item != NULL; cur_item = cur_item->next_ptr) { + copy = malloc (sizeof(struct newsitem)); + copy->data = malloc (sizeof (struct newsdata)); + copy->data->title = NULL; + copy->data->link = NULL; + copy->data->guid = NULL; + copy->data->description = NULL; + copy->data->readstatus = cur_item->data->readstatus; + if (cur_item->data->link != NULL) + copy->data->link = strdup (cur_item->data->link); + if (cur_item->data->title != NULL) + copy->data->title = strdup (cur_item->data->title); + + copy->next_ptr = NULL; + if (firstcopy == NULL) { + copy->prev_ptr = NULL; + firstcopy = copy; + } else { + copy->prev_ptr = firstcopy; + while (copy->prev_ptr->next_ptr != NULL) + copy->prev_ptr = copy->prev_ptr->next_ptr; + copy->prev_ptr->next_ptr = copy; + } + } + } + + /* xmlRecoverMemory: + parse an XML in-memory document and build a tree. + In case the document is not Well Formed, a tree is built anyway. */ + doc = xmlRecoverMemory(cur_ptr->feed, strlen(cur_ptr->feed)); + + if (doc == NULL) + return 2; + + /* Das Root-Element finden (in unserem Fall sollte es "" heißen. + Dabei wird das RDF: Prefix fürs Erste ignoriert, bis der Jaguar + herausfindet, wie man das genau auslesen kann (jau). */ + cur = xmlDocGetRootElement(doc); + + if (cur == NULL) { + xmlFreeDoc (doc); + return 2; + } + + /* Überprüfen, ob das Element auch wirklich heißt */ + if (xmlStrcmp(cur->name, "RDF") == 0) { + + /* Jetzt gehen wir alle Elemente im Dokument durch. Diese Schleife + selbst läuft jedoch nur durch die Elemente auf höchster Ebene + (bei HTML wären das nur HEAD und BODY), wandert also nicht die + gesamte Struktur nach unten durch. Dafür sind die Funktionen zuständig, + die wir dann in der Schleife selbst aufrufen. */ + for (cur = cur->children; cur != NULL; cur = cur->next) { + if (cur->type != XML_ELEMENT_NODE) + continue; + if (xmlStrcmp(cur->name, "channel") == 0) + parse_rdf10_channel(cur_ptr, doc, cur->children); + if (xmlStrcmp(cur->name, "item") == 0) + parse_rdf10_item(cur_ptr, doc, cur->children); + /* Last-Modified is only used when reading from internal feeds (disk cache). */ + if (xmlStrcmp(cur->name, "lastmodified") == 0) + cur_ptr->lastmodified = xmlNodeListGetString(doc, cur->children, 1); + } + } else if (xmlStrcmp(cur->name, "rss") == 0) { + for (cur = cur->children; cur != NULL; cur = cur->next) { + if (cur->type != XML_ELEMENT_NODE) + continue; + if (xmlStrcmp(cur->name, "channel") == 0) + parse_rdf20_channel(cur_ptr, doc, cur->children); + } + } else { + xmlFreeDoc(doc); + return 3; + } + + xmlFreeDoc(doc); + + if (saverestore == 1) { + /* free struct newsitem *copy. */ + while (firstcopy->next_ptr != NULL) { + firstcopy = firstcopy->next_ptr; + free (firstcopy->prev_ptr->data->link); + free (firstcopy->prev_ptr->data->guid); + free (firstcopy->prev_ptr->data->title); + free (firstcopy->prev_ptr->data); + free (firstcopy->prev_ptr); + } + free (firstcopy->data->link); + free (firstcopy->data->guid); + free (firstcopy->data->title); + free (firstcopy->data); + free (firstcopy); + } + + if (cur_ptr->original != NULL) + free (cur_ptr->original); + + /* Set -> title to something if it's a NULL pointer to avoid crash with strdup below. */ + if (cur_ptr->title == NULL) + cur_ptr->title = strdup (cur_ptr->feedurl); + cur_ptr->original = strdup (cur_ptr->title); + + return 0; +} diff --git a/rss2ctdl/xmlparse.h b/rss2ctdl/xmlparse.h new file mode 100644 index 000000000..f929f1465 --- /dev/null +++ b/rss2ctdl/xmlparse.h @@ -0,0 +1,33 @@ +/* + * $Id$ + * + * Copyright 2003-2004 Rene Puls + * + * xmlparse.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef XMLPARSE_H +#define XMLPARSE_H + +#include + +void parse_rdf10_item(struct feed *feed, xmlDocPtr doc, xmlNodePtr node); +void parse_rdf10_channel(struct feed * feed, xmlDocPtr doc, xmlNodePtr node); +void parse_rdf20_channel(struct feed * feed, xmlDocPtr doc, xmlNodePtr node); +int DeXML (struct feed * cur_ptr); + +#endif diff --git a/rss2ctdl/zlib_interface.c b/rss2ctdl/zlib_interface.c new file mode 100644 index 000000000..6979d330d --- /dev/null +++ b/rss2ctdl/zlib_interface.c @@ -0,0 +1,165 @@ +/* Low-level function, decompresses deflate compressed data. Used by gzip_uncompress below. */ + +#include "zlib_interface.h" +#include +#include +#include +#include + +int JG_ZLIB_DEBUG = 0; + +struct gzip_header { + unsigned char magic[2]; + unsigned char method; + unsigned char flags; + unsigned char mtime[4]; + unsigned char xfl; + unsigned char os; +}; + +struct gzip_footer { + unsigned char crc32[4]; + unsigned char size[4]; +}; + +int jg_zlib_uncompress(void * const in_buf, int in_size, + void **out_buf_ptr, int *out_size, + int gzip) +{ + char tmpstring[1024]; + z_stream stream; + char *out_buf = NULL; + int out_buf_bytes = 0; + char tmp_buf[4096]; + int result; + int new_bytes; + + /* Prepare the stream structure. */ + stream.zalloc = NULL; + stream.zfree = NULL; + stream.opaque = NULL; + stream.next_in = in_buf; + stream.avail_in = in_size; + stream.next_out = tmp_buf; + stream.avail_out = sizeof tmp_buf; + + if (out_size != NULL) + *out_size = 0; + + if (gzip) + result = inflateInit2(&stream, MAX_WBITS + 32); /* UNTESTED */ + else + result = inflateInit2(&stream, -MAX_WBITS); + + if (result != 0) { + if (JG_ZLIB_DEBUG) + fprintf(stderr, "inflateInit2 failed: %d\n", result); + return JG_ZLIB_ERROR_OLDVERSION; + } + + do { + /* Should be Z_FINISH? */ + result = inflate(&stream, Z_NO_FLUSH); + switch (result) { + case Z_BUF_ERROR: + if (stream.avail_in == 0) + goto DONE; /* zlib bug */ + case Z_ERRNO: + case Z_NEED_DICT: + case Z_MEM_ERROR: + case Z_DATA_ERROR: + case Z_VERSION_ERROR: + inflateEnd(&stream); + free(out_buf); + if (JG_ZLIB_DEBUG) { + snprintf (tmpstring, sizeof(tmpstring), "ERROR: zlib_uncompress: %d %s\n", result, stream.msg); + fprintf(stderr, tmpstring); + } + return JG_ZLIB_ERROR_UNCOMPRESS; + } + if (stream.avail_out < sizeof tmp_buf) { + /* Add the new uncompressed data to our output buffer. */ + new_bytes = sizeof tmp_buf - stream.avail_out; + out_buf = realloc(out_buf, out_buf_bytes + new_bytes); + memcpy(out_buf + out_buf_bytes, tmp_buf, new_bytes); + out_buf_bytes += new_bytes; + stream.next_out = tmp_buf; + stream.avail_out = sizeof tmp_buf; + } else { + /* For some reason, inflate() didn't write out a single byte. */ + inflateEnd(&stream); + free(out_buf); + if (JG_ZLIB_DEBUG) + fprintf(stderr, "ERROR: No output during decompression\n"); + return JG_ZLIB_ERROR_NODATA; + } + } while (result != Z_STREAM_END); + +DONE: + + inflateEnd(&stream); + + /* Null-terminate the output buffer so it can be handled like a string. */ + out_buf = realloc(out_buf, out_buf_bytes + 1); + out_buf[out_buf_bytes] = 0; + + /* The returned size does NOT include the additionall null byte! */ + if (out_size != NULL) + *out_size = out_buf_bytes; + + *out_buf_ptr = out_buf; + + return 0; +} + +/* Decompressed gzip,deflate compressed data. This is what the webservers usually send. */ + +int jg_gzip_uncompress(void *in_buf, int in_size, + void **out_buf_ptr, int *out_size) +{ + char tmpstring[1024]; + struct gzip_header *header; + char *data_start; + int offset = sizeof *header; + + header = in_buf; + + if (out_size != NULL) + *out_size = 0; + + if ((header->magic[0] != 0x1F) || (header->magic[1] != 0x8B)) { + if (JG_ZLIB_DEBUG) + fprintf(stderr, "ERROR: Invalid magic bytes for GZIP data\n"); + return JG_ZLIB_ERROR_BAD_MAGIC; + } + + if (header->method != 8) { + if (JG_ZLIB_DEBUG) + fprintf(stderr, "ERROR: Compression method is not deflate\n"); + return JG_ZLIB_ERROR_BAD_METHOD; + } + + if (header->flags != 0 && header->flags != 8) { + if (JG_ZLIB_DEBUG) { + snprintf (tmpstring, sizeof(tmpstring), "ERROR: Unsupported flags %d", header->flags); + fprintf(stderr, "ERROR: %s\n", tmpstring); + } + return JG_ZLIB_ERROR_BAD_FLAGS; + } + + if (header->flags & 8) { + /* skip the file name */ + while (offset < in_size) { + if (((char *)in_buf)[offset] == 0) { + offset++; + break; + } + offset++; + } + } + + data_start = (char *)in_buf + offset; + + return jg_zlib_uncompress(data_start, in_size - offset - 8, + out_buf_ptr, out_size, 0); +} diff --git a/rss2ctdl/zlib_interface.h b/rss2ctdl/zlib_interface.h new file mode 100644 index 000000000..1ba72c1a8 --- /dev/null +++ b/rss2ctdl/zlib_interface.h @@ -0,0 +1,22 @@ +#ifndef JG_ZLIB_INTERFACE +#define JG_ZLIB_INTERFACE + +enum JG_ZLIB_ERROR { + JG_ZLIB_ERROR_OLDVERSION = -1, + JG_ZLIB_ERROR_UNCOMPRESS = -2, + JG_ZLIB_ERROR_NODATA = -3, + JG_ZLIB_ERROR_BAD_MAGIC = -4, + JG_ZLIB_ERROR_BAD_METHOD = -5, + JG_ZLIB_ERROR_BAD_FLAGS = -6 +}; + +extern int JG_ZLIB_DEBUG; + +int jg_zlib_uncompress(void *in_buf, int in_size, + void **out_buf_ptr, int *out_size, + int gzip); + +int jg_gzip_uncompress(void *in_buf, int in_size, + void **out_buf_ptr, int *out_size); + +#endif -- 2.39.2