-/*
- * Bring external RSS and/or Atom feeds into rooms. This module implements a
- * very loose parser that scrapes both kinds of feeds and is not picky about
- * the standards compliance of the source data.
- *
- * Copyright (c) 2007-2022 by the citadel.org team
- *
- * This program is open source software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 3.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
+// Bring external RSS and/or Atom feeds into rooms. This module implements a
+// very loose parser that scrapes both kinds of feeds and is not picky about
+// the standards compliance of the source data.
+//
+// Copyright (c) 2007-2023 by the citadel.org team
+//
+// This program is open source software. Use, duplication, or disclosure
+// is subject to the terms of the GNU General Public License, version 3.
#include <stdlib.h>
#include <unistd.h>
#include <expat.h>
#include <curl/curl.h>
#include <libcitadel.h>
-#include "../../citadel.h"
+#include "../../citadel_defs.h"
#include "../../server.h"
#include "../../citserver.h"
#include "../../support.h"
#include "../../context.h"
#include "../../internet_addressing.h"
-struct rssroom {
- struct rssroom *next;
- char *room;
-};
-
-struct rssurl {
- struct rssurl *next;
- char *url;
- struct rssroom *rooms;
+struct rssfeed {
+ char url[SIZ]; // string containing the URL of an RSS or Atom feed
+ char room[ROOMNAMELEN]; // the name of the room which is pulling this feed
};
struct rssparser {
+ char url[SIZ];
+ char room[ROOMNAMELEN];
StrBuf *CData;
struct CtdlMessage *msg;
char *link;
char *description;
char *item_id;
- struct rssroom *rooms;
};
time_t last_run = 0L;
-struct rssurl *rsstodo = NULL;
// This handler is called whenever an XML tag opens.
-//
void rss_start_element(void *data, const char *el, const char **attribute) {
struct rssparser *r = (struct rssparser *)data;
int i;
// This handler is called whenever an XML tag closes.
-//
void rss_end_element(void *data, const char *el) {
struct rssparser *r = (struct rssparser *)data;
StrBuf *encoded_field;
+ long msgnum;
- if (server_shutting_down) return; // shunt the whole operation if we're exiting
+ if (server_shutting_down) return; // shunt the whole operation if we're exiting
- if (StrLength(r->CData) > 0) { // strip leading/trailing whitespace from field
+ if (StrLength(r->CData) > 0) { // strip leading/trailing whitespace from field
StrBufTrim(r->CData);
}
- if ( // end of a new item(rss) or entry(atom)
- (!strcasecmp(el, "entry"))
- || (!strcasecmp(el, "item"))
- ) {
- if (r->msg != NULL) { // Save the message to the rooms
+ if ((!strcasecmp(el, "entry")) || (!strcasecmp(el, "item"))) { // end of a new item(rss) or entry(atom)
+ if (r->msg != NULL) { // Save the message to the room
// use the link as an item id if nothing else is available
if ((r->item_id == NULL) && (r->link != NULL)) {
if (already_seen == 0) {
// Compose the message text
+
StrBuf *TheMessage = NewStrBuf();
- StrBufAppendPrintf(TheMessage,
- "Content-type: text/html\n\n"
- "\n\n"
- "<html><head></head><body>"
- );
-
+ StrBufAppendPrintf(TheMessage, "<html><head></head><body>");
+
if (r->description != NULL) {
StrBufAppendPrintf(TheMessage, "%s<br><br>\r\n", r->description);
free(r->description);
r->description = NULL;
}
-
+
if (r->link != NULL) {
StrBufAppendPrintf(TheMessage, "<a href=\"%s\">%s</a>\r\n", r->link, r->link);
free(r->link);
r->link = NULL;
}
-
+
StrBufAppendPrintf(TheMessage, "</body></html>\r\n");
- CM_SetField(r->msg, eMesageText, ChrPtr(TheMessage), StrLength(TheMessage));
+
+ // Quoted-Printable encode the HTML message, because RSS and Atom make no guarantee of line length limits.
+ StrBuf *TheMessage_Encoded = StrBufQuotedPrintableEncode(TheMessage);
+
+ // Now we reuse TheMessage -- this time it will contain the MIME headers concatenated with the encoded message.
+ FlushStrBuf(TheMessage);
+ StrBufAppendBufPlain(TheMessage, HKEY(
+ "Content-type: text/html; charset=UTF-8\r\n"
+ "Content-Transfer-Encoding: quoted-printable\r\n"
+ "\r\n"
+ ), 0
+ );
+ StrBufAppendBuf(TheMessage, TheMessage_Encoded, 0);
+ FreeStrBuf(&TheMessage_Encoded);
+
+ CM_SetField(r->msg, eMessageText, ChrPtr(TheMessage));
FreeStrBuf(&TheMessage);
-
+
if (CM_IsEmpty(r->msg, eAuthor)) {
- CM_SetField(r->msg, eAuthor, HKEY("rss"));
+ CM_SetField(r->msg, eAuthor, "rss");
}
-
+
if (CM_IsEmpty(r->msg, eTimestamp)) {
CM_SetFieldLONG(r->msg, eTimestamp, time(NULL));
}
-
- // Save it to the room(s)
- struct rssroom *rr = NULL;
- long msgnum = (-1);
- for (rr=r->rooms; rr!=NULL; rr=rr->next) {
- if (rr == r->rooms) {
- msgnum = CtdlSubmitMsg(r->msg, NULL, rr->room); // in first room, save msg
- }
- else {
- CtdlSaveMsgPointerInRoom(rr->room, msgnum, 0, NULL); // elsewhere, save a pointer
- }
- syslog(LOG_DEBUG, "rssclient: saved message %ld to %s", msgnum, rr->room);
- }
+
+ CtdlSubmitMsg(r->msg, NULL, r->room);
}
else {
syslog(LOG_DEBUG, "rssclient: already seen %s", r->item_id);
}
-
+
CM_Free(r->msg);
r->msg = NULL;
}
// This handler is called whenever data appears between opening and closing tags.
-//
-void rss_handle_data(void *data, const char *content, int length)
-{
+void rss_handle_data(void *data, const char *content, int length) {
struct rssparser *r = (struct rssparser *)data;
if (r->CData == NULL) {
// Feed has been downloaded, now parse it.
-//
-void rss_parse_feed(StrBuf *Feed, struct rssroom *rooms)
-{
+// `Feed` is the actual RSS downloaded from the site.
+// `url` is a string containing the feed URL
+void rss_parse_feed(StrBuf *Feed, char *url, char *room) {
struct rssparser r;
memset(&r, 0, sizeof r);
- r.rooms = rooms;
+ strcpy(r.url, url);
+ strcpy(r.room, room);
XML_Parser p = XML_ParserCreate("UTF-8");
XML_SetElementHandler(p, rss_start_element, rss_end_element);
XML_SetCharacterDataHandler(p, rss_handle_data);
}
-// Add a feed/room pair into the todo list
-//
-void rssclient_push_todo(char *rssurl, char *roomname)
-{
- struct rssurl *r = NULL;
- struct rssurl *thisone = NULL;
- struct rssroom *newroom = NULL;
-
- syslog(LOG_DEBUG, "rssclient: will fetch %s to %s", rssurl, roomname);
-
- for (r=rsstodo; r!=NULL; r=r->next) {
- if (!strcasecmp(r->url, rssurl)) {
- thisone = r;
- }
- }
-
- if (thisone == NULL) {
- thisone = malloc(sizeof(struct rssurl));
- thisone->url = strdup(rssurl);
- thisone->rooms = NULL;
- thisone->next = rsstodo;
- rsstodo = thisone;
- }
-
- newroom = malloc(sizeof(struct rssroom));
- newroom->room = strdup(roomname);
- newroom->next = thisone->rooms;
- thisone->rooms = newroom;
-}
-
-
-// pull one feed (possibly multiple rooms)
-//
-void rss_pull_one_feed(struct rssurl *url)
-{
+// pull one RSS feed and save it to a room
+void rss_pull_one_feed(char *url, char *room) {
CURL *curl;
CURLcode res;
StrBuf *Downloaded = NULL;
- syslog(LOG_DEBUG, "rssclient: fetching %s", url->url);
-
curl = curl_easy_init();
if (!curl) {
return;
Downloaded = NewStrBuf();
- curl_easy_setopt(curl, CURLOPT_URL, url->url);
+ syslog(LOG_DEBUG, "rssclient: fetching %s", url);
+ curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); // Follow redirects
}
curl_easy_cleanup(curl);
- rss_parse_feed(Downloaded, url->rooms); // parse the feed
+ rss_parse_feed(Downloaded, url, room);
FreeStrBuf(&Downloaded); // free the downloaded feed data
}
-// We have a list, now download the feeds
-//
-void rss_pull_feeds(void)
-{
- struct rssurl *r;
- struct rssroom *rr;
-
- while ((rsstodo != NULL) && (!server_shutting_down)) {
- rss_pull_one_feed(rsstodo);
- r = rsstodo;
- rsstodo = rsstodo->next;
- while (r->rooms != NULL) {
- rr = r->rooms;
- r->rooms = r->rooms->next;
- free(rr->room);
- free(rr);
- }
- free(r->url);
- free(r);
- }
-}
-
-
// Scan a room's netconfig looking for RSS feed parsing requests
-//
-void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
-{
+void rssclient_scan_room(struct ctdlroom *qrbuf, void *data) {
char *serialized_config = NULL;
int num_configs = 0;
char cfgline[SIZ];
+ struct rssfeed one_feed;
int i = 0;
+ Array *feeds = (Array *)data;
if (server_shutting_down) return;
if (vbar != NULL) {
*vbar = 0;
}
- rssclient_push_todo(cfgline, qrbuf->QRname);
+ safestrncpy(one_feed.url, cfgline, SIZ);
+ safestrncpy(one_feed.room, qrbuf->QRname, ROOMNAMELEN);
+ array_append(feeds, &one_feed);
}
}
}
-/*
- * Scan for rooms that have RSS client requests configured
- */
+// Scan for rooms that have RSS client requests configured
void rssclient_scan(void) {
time_t now = time(NULL);
- /* Run no more than once every 15 minutes. */
+ // Run no more than once every 15 minutes.
if ((now - last_run) < 900) {
syslog(LOG_DEBUG,
"rssclient: polling interval not yet reached; last run was %ldm%lds ago",
}
syslog(LOG_DEBUG, "rssclient: started");
- CtdlForEachRoom(rssclient_scan_room, NULL);
- rss_pull_feeds();
+ Array *feeds = array_new(sizeof(struct rssfeed));
+ if (feeds == NULL) {
+ syslog(LOG_DEBUG, "rssclient: cannot allocate memory for feed list");
+ return;
+ }
+ CtdlForEachRoom(rssclient_scan_room, feeds);
+
+ for (int i=0; i<array_len(feeds); ++i) {
+ struct rssfeed *r = (struct rssfeed *) array_get_element_at(feeds, i);
+ rss_pull_one_feed(r->url, r->room);
+ }
+
+ array_free(feeds);
syslog(LOG_DEBUG, "rssclient: ended");
last_run = time(NULL);
return;