]> code.citadel.org Git - citadel.git/blobdiff - citadel/server/modules/rssclient/serv_rssclient.c
dammit, learn to spell
[citadel.git] / citadel / server / modules / rssclient / serv_rssclient.c
index 8021ebdba484e34e7836b84423d322c5a04e3374..995bd87da898d8931c677215814ad568fd25d967 100644 (file)
@@ -1,18 +1,11 @@
-/*
- * Bring external RSS and/or Atom feeds into rooms.  This module implements a
- * very loose parser that scrapes both kinds of feeds and is not picky about
- * the standards compliance of the source data.
- *
- * Copyright (c) 2007-2022 by the citadel.org team
- *
- * This program is open source software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 3.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// Bring external RSS and/or Atom feeds into rooms.  This module implements a
+// very loose parser that scrapes both kinds of feeds and is not picky about
+// the standards compliance of the source data.
+//
+// Copyright (c) 2007-2023 by the citadel.org team
+//
+// This program is open source software.  Use, duplication, or disclosure
+// is subject to the terms of the GNU General Public License, version 3.
 
 #include <stdlib.h>
 #include <unistd.h>
@@ -26,7 +19,7 @@
 #include <expat.h>
 #include <curl/curl.h>
 #include <libcitadel.h>
-#include "../../citadel.h"
+#include "../../citadel_defs.h"
 #include "../../server.h"
 #include "../../citserver.h"
 #include "../../support.h"
 #include "../../context.h"
 #include "../../internet_addressing.h"
 
-struct rssroom {
-       struct rssroom *next;
-       char *room;
-};
-
-struct rssurl {
-       struct rssurl *next;
-       char *url;
-       struct rssroom *rooms;
+struct rssfeed {
+       char url[SIZ];                  // string containing the URL of an RSS or Atom feed
+       char room[ROOMNAMELEN];         // the name of the room which is pulling this feed
 };
 
 struct rssparser {
+       char url[SIZ];
+       char room[ROOMNAMELEN];
        StrBuf *CData;
        struct CtdlMessage *msg;
        char *link;
        char *description;
        char *item_id;
-       struct rssroom *rooms;
 };
 
 time_t last_run = 0L;
-struct rssurl *rsstodo = NULL;
 
 
 // This handler is called whenever an XML tag opens.
-//
 void rss_start_element(void *data, const char *el, const char **attribute) {
        struct rssparser *r = (struct rssparser *)data;
        int i;
@@ -104,22 +90,19 @@ void rss_start_element(void *data, const char *el, const char **attribute) {
 
 
 // This handler is called whenever an XML tag closes.
-//
 void rss_end_element(void *data, const char *el) {
        struct rssparser *r = (struct rssparser *)data;
        StrBuf *encoded_field;
+       long msgnum;
 
-       if (server_shutting_down) return;                       // shunt the whole operation if we're exiting
+       if (server_shutting_down) return;                                       // shunt the whole operation if we're exiting
 
-       if (StrLength(r->CData) > 0) {                          // strip leading/trailing whitespace from field
+       if (StrLength(r->CData) > 0) {                                          // strip leading/trailing whitespace from field
                StrBufTrim(r->CData);
        }
 
-       if (                                                    // end of a new item(rss) or entry(atom)
-               (!strcasecmp(el, "entry"))
-               || (!strcasecmp(el, "item"))
-       ) {
-               if (r->msg != NULL) {                           // Save the message to the rooms
+       if ((!strcasecmp(el, "entry")) || (!strcasecmp(el, "item"))) {          // end of a new item(rss) or entry(atom)
+               if (r->msg != NULL) {                                           // Save the message to the room
 
                        // use the link as an item id if nothing else is available
                        if ((r->item_id == NULL) && (r->link != NULL)) {
@@ -135,54 +118,55 @@ void rss_end_element(void *data, const char *el) {
                        if (already_seen == 0) {
 
                                // Compose the message text
+
                                StrBuf *TheMessage = NewStrBuf();
-                               StrBufAppendPrintf(TheMessage,
-                                       "Content-type: text/html\n\n"
-                                       "\n\n"
-                                       "<html><head></head><body>"
-                               );
-               
+                               StrBufAppendPrintf(TheMessage, "<html><head></head><body>");
+
                                if (r->description != NULL) {
                                        StrBufAppendPrintf(TheMessage, "%s<br><br>\r\n", r->description);
                                        free(r->description);
                                        r->description = NULL;
                                }
-               
+
                                if (r->link != NULL) {
                                        StrBufAppendPrintf(TheMessage, "<a href=\"%s\">%s</a>\r\n", r->link, r->link);
                                        free(r->link);
                                        r->link = NULL;
                                }
-       
+
                                StrBufAppendPrintf(TheMessage, "</body></html>\r\n");
-                               CM_SetField(r->msg, eMesageText, ChrPtr(TheMessage), StrLength(TheMessage));
+
+                               // Quoted-Printable encode the HTML message, because RSS and Atom make no guarantee of line length limits.
+                               StrBuf *TheMessage_Encoded = StrBufQuotedPrintableEncode(TheMessage);
+
+                               // Now we reuse TheMessage -- this time it will contain the MIME headers concatenated with the encoded message.
+                               FlushStrBuf(TheMessage);
+                               StrBufAppendBufPlain(TheMessage, HKEY(
+                                       "Content-type: text/html; charset=UTF-8\r\n"
+                                       "Content-Transfer-Encoding: quoted-printable\r\n"
+                                       "\r\n"
+                                       ), 0
+                               );
+                               StrBufAppendBuf(TheMessage, TheMessage_Encoded, 0);
+                               FreeStrBuf(&TheMessage_Encoded);
+
+                               CM_SetField(r->msg, eMessageText, ChrPtr(TheMessage));
                                FreeStrBuf(&TheMessage);
-       
+
                                if (CM_IsEmpty(r->msg, eAuthor)) {
-                                       CM_SetField(r->msg, eAuthor, HKEY("rss"));
+                                       CM_SetField(r->msg, eAuthor, "rss");
                                }
-       
+
                                if (CM_IsEmpty(r->msg, eTimestamp)) {
                                        CM_SetFieldLONG(r->msg, eTimestamp, time(NULL));
                                }
-       
-                               // Save it to the room(s)
-                               struct rssroom *rr = NULL;
-                               long msgnum = (-1);
-                               for (rr=r->rooms; rr!=NULL; rr=rr->next) {
-                                       if (rr == r->rooms) {
-                                               msgnum = CtdlSubmitMsg(r->msg, NULL, rr->room);         // in first room, save msg
-                                       }
-                                       else {
-                                               CtdlSaveMsgPointerInRoom(rr->room, msgnum, 0, NULL);    // elsewhere, save a pointer
-                                       }
-                                       syslog(LOG_DEBUG, "rssclient: saved message %ld to %s", msgnum, rr->room);
-                               }
+
+                               CtdlSubmitMsg(r->msg, NULL, r->room);
                        }
                        else {
                                syslog(LOG_DEBUG, "rssclient: already seen %s", r->item_id);
                        }
-       
+
                        CM_Free(r->msg);
                        r->msg = NULL;
                }
@@ -274,9 +258,7 @@ void rss_end_element(void *data, const char *el) {
 
 
 // This handler is called whenever data appears between opening and closing tags.
-//
-void rss_handle_data(void *data, const char *content, int length)
-{
+void rss_handle_data(void *data, const char *content, int length) {
        struct rssparser *r = (struct rssparser *)data;
 
        if (r->CData == NULL) {
@@ -288,13 +270,14 @@ void rss_handle_data(void *data, const char *content, int length)
 
 
 // Feed has been downloaded, now parse it.
-//
-void rss_parse_feed(StrBuf *Feed, struct rssroom *rooms)
-{
+// `Feed` is the actual RSS downloaded from the site.
+// `url` is a string containing the feed URL
+void rss_parse_feed(StrBuf *Feed, char *url, char *room) {
        struct rssparser r;
 
        memset(&r, 0, sizeof r);
-       r.rooms = rooms;
+       strcpy(r.url, url);
+       strcpy(r.room, room);
        XML_Parser p = XML_ParserCreate("UTF-8");
        XML_SetElementHandler(p, rss_start_element, rss_end_element);
        XML_SetCharacterDataHandler(p, rss_handle_data);
@@ -304,47 +287,12 @@ void rss_parse_feed(StrBuf *Feed, struct rssroom *rooms)
 }
 
 
-// Add a feed/room pair into the todo list
-//
-void rssclient_push_todo(char *rssurl, char *roomname)
-{
-       struct rssurl *r = NULL;
-       struct rssurl *thisone = NULL;
-       struct rssroom *newroom = NULL;
-
-       syslog(LOG_DEBUG, "rssclient: will fetch %s to %s", rssurl, roomname);
-
-       for (r=rsstodo; r!=NULL; r=r->next) {
-               if (!strcasecmp(r->url, rssurl)) {
-                       thisone = r;
-               }
-       }
-
-       if (thisone == NULL) {
-               thisone = malloc(sizeof(struct rssurl));
-               thisone->url = strdup(rssurl);
-               thisone->rooms = NULL;
-               thisone->next = rsstodo;
-               rsstodo = thisone;
-       }
-
-       newroom = malloc(sizeof(struct rssroom));
-       newroom->room = strdup(roomname);
-       newroom->next = thisone->rooms;
-       thisone->rooms = newroom;
-}
-
-
-// pull one feed (possibly multiple rooms)
-//
-void rss_pull_one_feed(struct rssurl *url)
-{
+// pull one RSS feed and save it to a room
+void rss_pull_one_feed(char *url, char *room) {
        CURL *curl;
        CURLcode res;
        StrBuf *Downloaded = NULL;
 
-       syslog(LOG_DEBUG, "rssclient: fetching %s", url->url);
-
        curl = curl_easy_init();
        if (!curl) {
                return;
@@ -352,7 +300,8 @@ void rss_pull_one_feed(struct rssurl *url)
 
        Downloaded = NewStrBuf();
 
-       curl_easy_setopt(curl, CURLOPT_URL, url->url);
+       syslog(LOG_DEBUG, "rssclient: fetching %s", url);
+       curl_easy_setopt(curl, CURLOPT_URL, url);
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
        curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);                     // Follow redirects
@@ -365,42 +314,19 @@ void rss_pull_one_feed(struct rssurl *url)
        }
        curl_easy_cleanup(curl);
 
-       rss_parse_feed(Downloaded, url->rooms);                                 // parse the feed
+       rss_parse_feed(Downloaded, url, room);
        FreeStrBuf(&Downloaded);                                                // free the downloaded feed data
 }
 
 
-// We have a list, now download the feeds
-//
-void rss_pull_feeds(void)
-{
-       struct rssurl *r;
-       struct rssroom *rr;
-
-       while ((rsstodo != NULL) && (!server_shutting_down)) {
-               rss_pull_one_feed(rsstodo);
-               r = rsstodo;
-               rsstodo = rsstodo->next;
-               while (r->rooms != NULL) {
-                       rr = r->rooms;
-                       r->rooms = r->rooms->next;
-                       free(rr->room);
-                       free(rr);
-               }
-               free(r->url);
-               free(r);
-       }
-}
-
-
 // Scan a room's netconfig looking for RSS feed parsing requests
-//
-void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
-{
+void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) {
        char *serialized_config = NULL;
        int num_configs = 0;
        char cfgline[SIZ];
+       struct rssfeed one_feed;
        int i = 0;
+       Array *feeds = (Array *)data;
 
        if (server_shutting_down) return;
 
@@ -418,7 +344,9 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
                        if (vbar != NULL) {
                                *vbar = 0;
                        }
-                       rssclient_push_todo(cfgline, qrbuf->QRname);
+                       safestrncpy(one_feed.url, cfgline, SIZ);
+                       safestrncpy(one_feed.room, qrbuf->QRname, ROOMNAMELEN);
+                       array_append(feeds, &one_feed);
                }
        }
 
@@ -426,13 +354,11 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
 }
 
 
-/*
- * Scan for rooms that have RSS client requests configured
- */
+// Scan for rooms that have RSS client requests configured
 void rssclient_scan(void) {
        time_t now = time(NULL);
 
-       /* Run no more than once every 15 minutes. */
+       // Run no more than once every 15 minutes.
        if ((now - last_run) < 900) {
                syslog(LOG_DEBUG,
                        "rssclient: polling interval not yet reached; last run was %ldm%lds ago",
@@ -443,8 +369,19 @@ void rssclient_scan(void) {
        }
 
        syslog(LOG_DEBUG, "rssclient: started");
-       CtdlForEachRoom(rssclient_scan_room, NULL);
-       rss_pull_feeds();
+       Array *feeds = array_new(sizeof(struct rssfeed));
+       if (feeds == NULL) {
+               syslog(LOG_DEBUG, "rssclient: cannot allocate memory for feed list");
+               return;
+       }
+       CtdlForEachRoom(rssclient_scan_room, feeds);
+
+       for (int i=0; i<array_len(feeds); ++i) {
+               struct rssfeed *r = (struct rssfeed *) array_get_element_at(feeds, i);
+               rss_pull_one_feed(r->url, r->room);
+       }
+
+       array_free(feeds);
        syslog(LOG_DEBUG, "rssclient: ended");
        last_run = time(NULL);
        return;