]> code.citadel.org Git - citadel.git/blobdiff - citadel/server/modules/rssclient/serv_rssclient.c
dammit, learn to spell
[citadel.git] / citadel / server / modules / rssclient / serv_rssclient.c
index 4f4db62fc07875fc0b9103cbd242a4aeb2780208..995bd87da898d8931c677215814ad568fd25d967 100644 (file)
@@ -19,7 +19,7 @@
 #include <expat.h>
 #include <curl/curl.h>
 #include <libcitadel.h>
-#include "../../citadel.h"
+#include "../../citadel_defs.h"
 #include "../../server.h"
 #include "../../citserver.h"
 #include "../../support.h"
 #include "../../context.h"
 #include "../../internet_addressing.h"
 
-struct rssroom {
-       struct rssroom *next;
-       char *room;
-};
-
-struct rssurl {
-       struct rssurl *next;
-       char *url;
-       struct rssroom *rooms;
+struct rssfeed {
+       char url[SIZ];                  // string containing the URL of an RSS or Atom feed
+       char room[ROOMNAMELEN];         // the name of the room which is pulling this feed
 };
 
 struct rssparser {
+       char url[SIZ];
+       char room[ROOMNAMELEN];
        StrBuf *CData;
        struct CtdlMessage *msg;
        char *link;
        char *description;
        char *item_id;
-       struct rssroom *rooms;
 };
 
 time_t last_run = 0L;
-struct rssurl *rsstodo = NULL;
 
 
 // This handler is called whenever an XML tag opens.
@@ -99,18 +93,16 @@ void rss_start_element(void *data, const char *el, const char **attribute) {
 void rss_end_element(void *data, const char *el) {
        struct rssparser *r = (struct rssparser *)data;
        StrBuf *encoded_field;
+       long msgnum;
 
-       if (server_shutting_down) return;                       // shunt the whole operation if we're exiting
+       if (server_shutting_down) return;                                       // shunt the whole operation if we're exiting
 
-       if (StrLength(r->CData) > 0) {                          // strip leading/trailing whitespace from field
+       if (StrLength(r->CData) > 0) {                                          // strip leading/trailing whitespace from field
                StrBufTrim(r->CData);
        }
 
-       if (                                                    // end of a new item(rss) or entry(atom)
-               (!strcasecmp(el, "entry"))
-               || (!strcasecmp(el, "item"))
-       ) {
-               if (r->msg != NULL) {                           // Save the message to the rooms
+       if ((!strcasecmp(el, "entry")) || (!strcasecmp(el, "item"))) {          // end of a new item(rss) or entry(atom)
+               if (r->msg != NULL) {                                           // Save the message to the room
 
                        // use the link as an item id if nothing else is available
                        if ((r->item_id == NULL) && (r->link != NULL)) {
@@ -126,13 +118,9 @@ void rss_end_element(void *data, const char *el) {
                        if (already_seen == 0) {
 
                                // Compose the message text
-                               // FIXME ajc 2023jan06 - this can create lines longer than 1024 characters which chokes the client message parsers
+
                                StrBuf *TheMessage = NewStrBuf();
-                               StrBufAppendPrintf(TheMessage,
-                                       "Content-type: text/html\n\n"
-                                       "\n\n"
-                                       "<html><head></head><body>"
-                               );
+                               StrBufAppendPrintf(TheMessage, "<html><head></head><body>");
 
                                if (r->description != NULL) {
                                        StrBufAppendPrintf(TheMessage, "%s<br><br>\r\n", r->description);
@@ -147,29 +135,33 @@ void rss_end_element(void *data, const char *el) {
                                }
 
                                StrBufAppendPrintf(TheMessage, "</body></html>\r\n");
-                               CM_SetField(r->msg, eMesageText, ChrPtr(TheMessage), StrLength(TheMessage));
+
+                               // Quoted-Printable encode the HTML message, because RSS and Atom make no guarantee of line length limits.
+                               StrBuf *TheMessage_Encoded = StrBufQuotedPrintableEncode(TheMessage);
+
+                               // Now we reuse TheMessage -- this time it will contain the MIME headers concatenated with the encoded message.
+                               FlushStrBuf(TheMessage);
+                               StrBufAppendBufPlain(TheMessage, HKEY(
+                                       "Content-type: text/html; charset=UTF-8\r\n"
+                                       "Content-Transfer-Encoding: quoted-printable\r\n"
+                                       "\r\n"
+                                       ), 0
+                               );
+                               StrBufAppendBuf(TheMessage, TheMessage_Encoded, 0);
+                               FreeStrBuf(&TheMessage_Encoded);
+
+                               CM_SetField(r->msg, eMessageText, ChrPtr(TheMessage));
                                FreeStrBuf(&TheMessage);
 
                                if (CM_IsEmpty(r->msg, eAuthor)) {
-                                       CM_SetField(r->msg, eAuthor, HKEY("rss"));
+                                       CM_SetField(r->msg, eAuthor, "rss");
                                }
 
                                if (CM_IsEmpty(r->msg, eTimestamp)) {
                                        CM_SetFieldLONG(r->msg, eTimestamp, time(NULL));
                                }
 
-                               // Save it to the room(s)
-                               struct rssroom *rr = NULL;
-                               long msgnum = (-1);
-                               for (rr=r->rooms; rr!=NULL; rr=rr->next) {
-                                       if (rr == r->rooms) {
-                                               msgnum = CtdlSubmitMsg(r->msg, NULL, rr->room);         // in first room, save msg
-                                       }
-                                       else {
-                                               CtdlSaveMsgPointerInRoom(rr->room, msgnum, 0, NULL);    // elsewhere, save a pointer
-                                       }
-                                       syslog(LOG_DEBUG, "rssclient: saved message %ld to %s", msgnum, rr->room);
-                               }
+                               CtdlSubmitMsg(r->msg, NULL, r->room);
                        }
                        else {
                                syslog(LOG_DEBUG, "rssclient: already seen %s", r->item_id);
@@ -278,11 +270,14 @@ void rss_handle_data(void *data, const char *content, int length) {
 
 
 // Feed has been downloaded, now parse it.
-void rss_parse_feed(StrBuf *Feed, struct rssroom *rooms) {
+// `Feed` is the actual RSS downloaded from the site.
+// `url` is a string containing the feed URL
+void rss_parse_feed(StrBuf *Feed, char *url, char *room) {
        struct rssparser r;
 
        memset(&r, 0, sizeof r);
-       r.rooms = rooms;
+       strcpy(r.url, url);
+       strcpy(r.room, room);
        XML_Parser p = XML_ParserCreate("UTF-8");
        XML_SetElementHandler(p, rss_start_element, rss_end_element);
        XML_SetCharacterDataHandler(p, rss_handle_data);
@@ -292,43 +287,12 @@ void rss_parse_feed(StrBuf *Feed, struct rssroom *rooms) {
 }
 
 
-// Add a feed/room pair into the todo list
-void rssclient_push_todo(char *rssurl, char *roomname) {
-       struct rssurl *r = NULL;
-       struct rssurl *thisone = NULL;
-       struct rssroom *newroom = NULL;
-
-       syslog(LOG_DEBUG, "rssclient: will fetch %s to %s", rssurl, roomname);
-
-       for (r=rsstodo; r!=NULL; r=r->next) {
-               if (!strcasecmp(r->url, rssurl)) {
-                       thisone = r;
-               }
-       }
-
-       if (thisone == NULL) {
-               thisone = malloc(sizeof(struct rssurl));
-               thisone->url = strdup(rssurl);
-               thisone->rooms = NULL;
-               thisone->next = rsstodo;
-               rsstodo = thisone;
-       }
-
-       newroom = malloc(sizeof(struct rssroom));
-       newroom->room = strdup(roomname);
-       newroom->next = thisone->rooms;
-       thisone->rooms = newroom;
-}
-
-
-// pull one feed (possibly multiple rooms)
-void rss_pull_one_feed(struct rssurl *url) {
+// pull one RSS feed and save it to a room
+void rss_pull_one_feed(char *url, char *room) {
        CURL *curl;
        CURLcode res;
        StrBuf *Downloaded = NULL;
 
-       syslog(LOG_DEBUG, "rssclient: fetching %s", url->url);
-
        curl = curl_easy_init();
        if (!curl) {
                return;
@@ -336,7 +300,8 @@ void rss_pull_one_feed(struct rssurl *url) {
 
        Downloaded = NewStrBuf();
 
-       curl_easy_setopt(curl, CURLOPT_URL, url->url);
+       syslog(LOG_DEBUG, "rssclient: fetching %s", url);
+       curl_easy_setopt(curl, CURLOPT_URL, url);
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
        curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);                     // Follow redirects
@@ -349,38 +314,19 @@ void rss_pull_one_feed(struct rssurl *url) {
        }
        curl_easy_cleanup(curl);
 
-       rss_parse_feed(Downloaded, url->rooms);                                 // parse the feed
+       rss_parse_feed(Downloaded, url, room);
        FreeStrBuf(&Downloaded);                                                // free the downloaded feed data
 }
 
 
-// We have a list, now download the feeds
-void rss_pull_feeds(void) {
-       struct rssurl *r;
-       struct rssroom *rr;
-
-       while ((rsstodo != NULL) && (!server_shutting_down)) {
-               rss_pull_one_feed(rsstodo);
-               r = rsstodo;
-               rsstodo = rsstodo->next;
-               while (r->rooms != NULL) {
-                       rr = r->rooms;
-                       r->rooms = r->rooms->next;
-                       free(rr->room);
-                       free(rr);
-               }
-               free(r->url);
-               free(r);
-       }
-}
-
-
 // Scan a room's netconfig looking for RSS feed parsing requests
 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) {
        char *serialized_config = NULL;
        int num_configs = 0;
        char cfgline[SIZ];
+       struct rssfeed one_feed;
        int i = 0;
+       Array *feeds = (Array *)data;
 
        if (server_shutting_down) return;
 
@@ -398,7 +344,9 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) {
                        if (vbar != NULL) {
                                *vbar = 0;
                        }
-                       rssclient_push_todo(cfgline, qrbuf->QRname);
+                       safestrncpy(one_feed.url, cfgline, SIZ);
+                       safestrncpy(one_feed.room, qrbuf->QRname, ROOMNAMELEN);
+                       array_append(feeds, &one_feed);
                }
        }
 
@@ -421,8 +369,19 @@ void rssclient_scan(void) {
        }
 
        syslog(LOG_DEBUG, "rssclient: started");
-       CtdlForEachRoom(rssclient_scan_room, NULL);
-       rss_pull_feeds();
+       Array *feeds = array_new(sizeof(struct rssfeed));
+       if (feeds == NULL) {
+               syslog(LOG_DEBUG, "rssclient: cannot allocate memory for feed list");
+               return;
+       }
+       CtdlForEachRoom(rssclient_scan_room, feeds);
+
+       for (int i=0; i<array_len(feeds); ++i) {
+               struct rssfeed *r = (struct rssfeed *) array_get_element_at(feeds, i);
+               rss_pull_one_feed(r->url, r->room);
+       }
+
+       array_free(feeds);
        syslog(LOG_DEBUG, "rssclient: ended");
        last_run = time(NULL);
        return;