4 * rss2ctdl -- a utility to pull RSS feeds into Citadel rooms.
6 * Main program is (c)2004 by Art Cancro
7 * RSS parser is (c)2003-2004 by Oliver Feiler
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include <sys/types.h>
37 #include "io-internal.h"
38 #include "conversions.h"
42 struct feed *first_ptr = NULL;
43 struct entity *first_entity = NULL;
46 * If you want to use a proxy server, you can hack the following two lines.
49 unsigned short proxyport = 0;
52 * Main function of program.
54 int main (int argc, char *argv[]) {
58 struct newsitem *itemptr;
62 HASHHEX md5context_hex;
65 setlocale (LC_ALL, "");
66 bindtextdomain ("rss2ctdl", LOCALEPATH);
67 textdomain ("rss2ctdl");
72 "%s: usage:\n %s <feedurl> <roomname> <nodefqdn> <ctdldir>\n",
77 /* Init the pRNG. See about.c for usages of rand() ;) */
80 url = strdup(argv[1]);
81 CleanupString(url, 0);
83 /* Support that stupid feed:// "protocol" */
84 if (strncasecmp (url, "feed://", 7) == 0)
85 memcpy (url, "http", 4);
87 /* If URL does not start with the procotol specification,
89 -> tmp[512] -> we can "only" use max 504 chars from url ("http://" == 7). */
90 if ((strncasecmp (url, "http://", 7) != 0) &&
91 (strncasecmp (url, "https://", 8) != 0)) {
92 if (strlen (url) < 504) {
93 strcpy (tmp, "http://");
94 strncat (tmp, url, 504);
103 new_ptr = malloc (sizeof(struct feed));
104 new_ptr->feedurl = strdup(url);
105 new_ptr->feed = NULL;
106 new_ptr->content_length = 0;
107 new_ptr->title = NULL;
108 new_ptr->link = NULL;
109 new_ptr->description = NULL;
110 new_ptr->lastmodified = NULL;
111 new_ptr->lasthttpstatus = 0;
112 new_ptr->content_type = NULL;
113 new_ptr->netio_error = NET_ERR_OK;
114 new_ptr->connectresult = 0;
115 new_ptr->cookies = NULL;
116 new_ptr->authinfo = NULL;
117 new_ptr->servauth = NULL;
118 new_ptr->items = NULL;
119 new_ptr->problem = 0;
120 new_ptr->original = NULL;
122 /* Don't need url text anymore. */
125 /* Download new feed and DeXMLize it. */
126 if ((UpdateFeed (new_ptr)) != 0) {
130 sprintf(tmp, "%s/network/spoolin/rssfeed.%08lx.%04x", argv[4], time(NULL), getpid());
131 fp = fopen(tmp, "w");
133 fprintf(stderr, "%s: cannot open %s: %s\n",
134 argv[0], tmp, strerror(errno));
138 for (itemptr = new_ptr->items; itemptr != NULL; itemptr = itemptr->next_ptr) {
140 /* Reject items more than 6 days old, because the loopzapper only keeps 7 days
143 if ( (itemptr->data->date != 0L) && (time(NULL) - itemptr->data->date >= 518400L) ) {
144 fprintf(stderr, "REJECTED: %s\n", itemptr->data->title);
147 if ( (itemptr->data->date == 0L) || (time(NULL) - itemptr->data->date < 518400L) ) {
149 fprintf(stderr, "ACCEPTED: %s\n", itemptr->data->title);
150 fprintf(fp, "%c", 255); /* Start of message */
151 fprintf(fp, "A"); /* Non-anonymous */
152 fprintf(fp, "%c", 4); /* MIME */
153 fprintf(fp, "Prss%c", 0); /* path */
155 /* The message ID will be an MD5 hash of the GUID.
156 * If there is no GUID present, we construct a message ID based
157 * on an MD5 hash of each item. Citadel's loopzapper will automatically
158 * reject items with message ID's which have already been submitted.
160 MD5Init(&md5context);
161 if (itemptr->data->guid != NULL) {
162 MD5Update(&md5context, itemptr->data->guid, strlen(itemptr->data->guid));
165 if (itemptr->data->title != NULL) {
166 MD5Update(&md5context, itemptr->data->title, strlen(itemptr->data->title));
168 //if (itemptr->data->description != NULL) {
169 //MD5Update(&md5context, itemptr->data->description, strlen(itemptr->data->description));
171 if (itemptr->data->link != NULL) {
172 MD5Update(&md5context, itemptr->data->link, strlen(itemptr->data->link));
175 MD5Final(md5msgid, &md5context);
176 CvtHex(md5msgid, md5context_hex);
178 fprintf(fp, "I%s@%s%c", md5context_hex, argv[3], 0); /* ID */
180 fprintf(fp, "T%ld%c", time(NULL), 0); /* time */
181 fprintf(fp, "Arss%c", 0); /* author */
182 fprintf(fp, "O%s%c", argv[2], 0); /* room */
183 fprintf(fp, "C%s%c", argv[2], 0); /* room */
184 fprintf(fp, "N%s%c", argv[3], 0); /* orig node */
185 if (itemptr->data->guid != NULL) {
186 fprintf(fp, "E%s%c", itemptr->data->guid, 0); /* guid=euid*/
188 if (itemptr->data->title != NULL) {
189 fprintf(fp, "U%s%c", itemptr->data->title, 0); /* subject */
192 fprintf(fp, "M"); /* msg text */
193 fprintf(fp, "Content-type: text/html\r\n\r\n");
194 fprintf(fp, "<HTML><BODY>\r\n");
195 fprintf(fp, "%s\n", itemptr->data->description);
196 if (itemptr->data->link != NULL) {
197 fprintf(fp, "<BR><BR>\r\n");
198 fprintf(fp, "<A HREF=\"%s\">%s</A>\n",
200 itemptr->data->link);
202 fprintf(fp, "</BODY></HTML>\r\n");
203 fprintf(fp, "%c", 0);
209 /* Be lazy and let the operating system free all the memory. */