]> code.citadel.org Git - citadel.git/commitdiff
* Reject stories more than 6 days old.
authorArt Cancro <ajc@citadel.org>
Sat, 17 Sep 2005 19:59:37 +0000 (19:59 +0000)
committerArt Cancro <ajc@citadel.org>
Sat, 17 Sep 2005 19:59:37 +0000 (19:59 +0000)
rss2ctdl/Makefile
rss2ctdl/config.h
rss2ctdl/main.c
rss2ctdl/xmlparse.c

index 02652130742bae8fe34e5f83c4304b2ee0502ed5..b646fc83018938ec760ad32b44c98933c0294bde 100644 (file)
@@ -1,7 +1,7 @@
 CC=gcc
 OBJFILES=zlib_interface.o os-support.o io-internal.o md5.o digcalc.o \
        net-support.o conversions.o xmlparse.o netio.o \
-       main.o
+       main.o parsedate.o rdf_parsedate.o
 CFLAGS=`xml2-config --cflags`
 LDFLAGS=`xml2-config --libs`
 
index ccd2fb4a98fca2e739f5aa2ca191a2df42ff6be0..e5a0bac1685d443a8ba40df37c0053f67e0efd11 100644 (file)
@@ -23,6 +23,7 @@
 #ifndef CONFIG_H
 #define CONFIG_H
 
+#include <time.h>
 #include "netio.h"
 
 /* Set your charset here. ISO-8859-1 is default. */
@@ -63,7 +64,7 @@ struct newsdata {
        char *link;
        char *guid;                                                     /* Not always present */
        char *description;
-       char *date;                                                     /* not always present */
+       time_t date;                                                    /* not always present */
 };
 
 extern struct feed *first_ptr;
index 9799ce7790265b38e569c28b619e7d0e09cbbb49..d763942efc9b03892185a36b459bf48af1b2b0fc 100644 (file)
@@ -127,7 +127,7 @@ int main (int argc, char *argv[]) {
                exit(1);
        }
 
-       sprintf(tmp, "%s/network/spoolin/rssfeed.%ld", argv[4], time(NULL));
+       sprintf(tmp, "%s/network/spoolin/rssfeed.%08lx.%04x", argv[4], time(NULL), getpid());
        fp = fopen(tmp, "w");
        if (fp == NULL) {
                fprintf(stderr, "%s: cannot open %s: %s\n",
@@ -136,62 +136,72 @@ int main (int argc, char *argv[]) {
        }
 
        for (itemptr = new_ptr->items; itemptr != NULL; itemptr = itemptr->next_ptr) {
-               fprintf(stderr, "--> %s\n", itemptr->data->title);
-               fprintf(stderr, "    Date: %s\n", itemptr->data->date);
-               fprintf(fp, "%c", 255);                 /* Start of message */
-               fprintf(fp, "A");                       /* Non-anonymous */
-               fprintf(fp, "%c", 4);                   /* MIME */
-               fprintf(fp, "Prss%c", 0);               /* path */
-
-               /* The message ID will be an MD5 hash of the GUID.
-                * If there is no GUID present, we construct a message ID based
-                * on an MD5 hash of each item.  Citadel's loopzapper will automatically
-                * reject items with message ID's which have already been submitted.
+       
+               /* Reject items more than 6 days old, because the loopzapper only keeps 7 days
+                * worth of data.
                 */
-               MD5Init(&md5context);
-               if (itemptr->data->guid != NULL) {
-                       MD5Update(&md5context, itemptr->data->guid, strlen(itemptr->data->guid));
+               if ( (itemptr->data->date != 0L) && (time(NULL) - itemptr->data->date >= 518400L) ) {
+                       fprintf(stderr, "REJECTED: %s\n", itemptr->data->title);
                }
-               else {
+
+               if ( (itemptr->data->date == 0L) || (time(NULL) - itemptr->data->date < 518400L) ) {
+
+                       fprintf(stderr, "ACCEPTED: %s\n", itemptr->data->title);
+                       fprintf(fp, "%c", 255);                 /* Start of message */
+                       fprintf(fp, "A");                       /* Non-anonymous */
+                       fprintf(fp, "%c", 4);                   /* MIME */
+                       fprintf(fp, "Prss%c", 0);               /* path */
+       
+                       /* The message ID will be an MD5 hash of the GUID.
+                        * If there is no GUID present, we construct a message ID based
+                        * on an MD5 hash of each item.  Citadel's loopzapper will automatically
+                        * reject items with message ID's which have already been submitted.
+                        */
+                       MD5Init(&md5context);
+                       if (itemptr->data->guid != NULL) {
+                               MD5Update(&md5context, itemptr->data->guid, strlen(itemptr->data->guid));
+                       }
+                       else {
+                               if (itemptr->data->title != NULL) {
+                                       MD5Update(&md5context, itemptr->data->title, strlen(itemptr->data->title));
+                               }
+                               //if (itemptr->data->description != NULL) {
+                                       //MD5Update(&md5context, itemptr->data->description, strlen(itemptr->data->description));
+                               //}
+                               if (itemptr->data->link != NULL) {
+                                       MD5Update(&md5context, itemptr->data->link, strlen(itemptr->data->link));
+                               }
+                       }
+                       MD5Final(md5msgid, &md5context);
+                       CvtHex(md5msgid, md5context_hex);
+       
+                       fprintf(fp, "I%s@%s%c", md5context_hex, argv[3], 0);    /* ID */ 
+       
+                       fprintf(fp, "T%ld%c",  time(NULL),  0); /* time */
+                       fprintf(fp, "Arss%c", 0);               /* author */
+                       fprintf(fp, "O%s%c", argv[2], 0);       /* room */
+                       fprintf(fp, "C%s%c", argv[2], 0);       /* room */
+                       fprintf(fp, "N%s%c", argv[3], 0);       /* orig node */
+                       if (itemptr->data->guid != NULL) {
+                               fprintf(fp, "E%s%c", itemptr->data->guid, 0);   /* guid=euid*/
+                       }
                        if (itemptr->data->title != NULL) {
-                               MD5Update(&md5context, itemptr->data->title, strlen(itemptr->data->title));
+                               fprintf(fp, "U%s%c", itemptr->data->title, 0);  /* subject */
                        }
-                       //if (itemptr->data->description != NULL) {
-                               //MD5Update(&md5context, itemptr->data->description, strlen(itemptr->data->description));
-                       //}
+       
+                       fprintf(fp, "M");                       /* msg text */
+                       fprintf(fp, "Content-type: text/html\r\n\r\n");
+                       fprintf(fp, "<HTML><BODY>\r\n");
+                       fprintf(fp, "%s\n", itemptr->data->description);
                        if (itemptr->data->link != NULL) {
-                               MD5Update(&md5context, itemptr->data->link, strlen(itemptr->data->link));
+                               fprintf(fp, "<BR><BR>\r\n");
+                               fprintf(fp, "<A HREF=\"%s\">%s</A>\n",
+                                       itemptr->data->link,
+                                       itemptr->data->link);
                        }
+                       fprintf(fp, "</BODY></HTML>\r\n");
+                       fprintf(fp, "%c", 0);
                }
-               MD5Final(md5msgid, &md5context);
-               CvtHex(md5msgid, md5context_hex);
-
-               fprintf(fp, "I%s@%s%c", md5context_hex, argv[3], 0);    /* ID */ 
-
-               fprintf(fp, "T%ld%c",  time(NULL),  0); /* time */
-               fprintf(fp, "Arss%c", 0);               /* author */
-               fprintf(fp, "O%s%c", argv[2], 0);       /* room */
-               fprintf(fp, "C%s%c", argv[2], 0);       /* room */
-               fprintf(fp, "N%s%c", argv[3], 0);       /* orig node */
-               if (itemptr->data->guid != NULL) {
-                       fprintf(fp, "E%s%c", itemptr->data->guid, 0);   /* guid=euid*/
-               }
-               if (itemptr->data->title != NULL) {
-                       fprintf(fp, "U%s%c", itemptr->data->title, 0);  /* subject */
-               }
-
-               fprintf(fp, "M");                       /* msg text */
-               fprintf(fp, "Content-type: text/html\r\n\r\n");
-               fprintf(fp, "<HTML><BODY>\r\n");
-               fprintf(fp, "%s\n", itemptr->data->description);
-               if (itemptr->data->link != NULL) {
-                       fprintf(fp, "<BR><BR>\r\n");
-                       fprintf(fp, "<A HREF=\"%s\">%s</A>\n",
-                               itemptr->data->link,
-                               itemptr->data->link);
-               }
-               fprintf(fp, "</BODY></HTML>\r\n");
-               fprintf(fp, "%c", 0);
        }
 
        fclose(fp);
index df512e4ea817ad678afe95314a9f83935adcc51f..e4fad3b029e17eaa3a0ab374a25bba5dbfbb1057 100644 (file)
@@ -31,6 +31,9 @@
 #include "xmlparse.h"
 #include "conversions.h"
 
+#include "parsedate.h"
+#include "rdf_parsedate.h"
+
 int saverestore;
 struct newsitem *copy;
 struct newsitem *firstcopy;
@@ -52,7 +55,6 @@ void parse_rdf10_channel(struct feed *feed, xmlDocPtr doc, xmlNodePtr node) {
                        free (feed->items->prev_ptr->data->link);
                        free (feed->items->prev_ptr->data->guid);
                        free (feed->items->prev_ptr->data->description);
-                       free (feed->items->prev_ptr->data->date);
                        free (feed->items->prev_ptr->data);
                        free (feed->items->prev_ptr);
                }
@@ -60,7 +62,6 @@ void parse_rdf10_channel(struct feed *feed, xmlDocPtr doc, xmlNodePtr node) {
                free (feed->items->data->link);
                free (feed->items->data->guid);
                free (feed->items->data->description);
-               free (feed->items->data->date);
                free (feed->items->data);
                free (feed->items);
        }
@@ -120,7 +121,6 @@ void parse_rdf20_channel(struct feed *feed, xmlDocPtr doc, xmlNodePtr node)
                        free (feed->items->prev_ptr->data->link);
                        free (feed->items->prev_ptr->data->guid);
                        free (feed->items->prev_ptr->data->description);
-                       free (feed->items->prev_ptr->data->date);
                        free (feed->items->prev_ptr->data);
                        free (feed->items->prev_ptr);
                }
@@ -128,7 +128,6 @@ void parse_rdf20_channel(struct feed *feed, xmlDocPtr doc, xmlNodePtr node)
                free (feed->items->data->link);
                free (feed->items->data->guid);
                free (feed->items->data->description);
-               free (feed->items->data->date);
                free (feed->items->data);
                free (feed->items);
        }
@@ -194,7 +193,7 @@ void parse_rdf10_item(struct feed *feed, xmlDocPtr doc, xmlNodePtr node)
        item->data->link = NULL;
        item->data->guid = NULL;
        item->data->description = NULL;
-       item->data->date = NULL;
+       item->data->date = 0L;
        item->data->readstatus = 0;
        item->data->parent = feed;
                
@@ -246,16 +245,18 @@ void parse_rdf10_item(struct feed *feed, xmlDocPtr doc, xmlNodePtr node)
                        item->data->description = xmlNodeListGetString(doc, cur->children, 1);
                        CleanupString (item->data->description, 0);
                }
-               else if (xmlStrcmp(cur->name, "date") == 0) {
-                       item->data->date = xmlNodeListGetString(doc, cur->children, 1);
-                       CleanupString (item->data->date, 0);
-               }
                /* pubDate will be in the form of: Thu, 15 Sep 2005 14:32:44 +0000 */
                else if (xmlStrcmp(cur->name, "pubDate") == 0) {
-                       item->data->date = xmlNodeListGetString(doc, cur->children, 1);
-                       CleanupString (item->data->date, 0);
+                       item->data->date = parsedate(xmlNodeListGetString(doc, cur->children, 1));
+               }
+               /* RSS style date will be in the form of: 2005-09-17T06:18:00+00:00
+                * Only use it if no pubDate was already found.
+                */
+               else if (xmlStrcmp(cur->name, "date") == 0) {
+                       if (item->data->date <= 0L) {
+                               item->data->date = rdf_parsedate(xmlNodeListGetString(doc, cur->children, 1));
+                       }
                }
-               /* RSS style date will be in the form of: 2005-09-17T06:18:00+00:00 */
                else if (xmlStrcmp(cur->name, "readstatus") == 0) {
                        /* Will cause memory leak otherwise, xmlNodeListGetString must be freed. */
                        readstatusstring = xmlNodeListGetString(doc, cur->children, 1);
@@ -319,7 +320,7 @@ int DeXML (struct feed *cur_ptr) {
                        copy->data->link = NULL;
                        copy->data->guid = NULL;
                        copy->data->description = NULL;
-                       copy->data->date = NULL;
+                       copy->data->date = 0L;
                        copy->data->readstatus = cur_item->data->readstatus;
                        if (cur_item->data->link != NULL)
                                copy->data->link = strdup (cur_item->data->link);