2 * Bring external RSS feeds into rooms.
4 * Copyright (c) 2007-2010 by the citadel.org team
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #if TIME_WITH_SYS_TIME
26 # include <sys/time.h>
30 # include <sys/time.h>
39 #include <sys/types.h>
42 #include <curl/curl.h>
43 #include <libcitadel.h>
46 #include "citserver.h"
50 #include "ctdl_module.h"
52 #include "parsedate.h"
54 #include "citadel_dirs.h"
57 #include "event_client.h"
58 #include "rss_atom_parser.h"
61 struct rssnetcfg *rnclist = NULL;
62 void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title)
64 if (StrLength(link) > 0)
66 StrBufAppendBufPlain(Message, HKEY("<a href=\""), 0);
67 StrBufAppendBuf(Message, link, 0);
68 StrBufAppendBufPlain(Message, HKEY("\">"), 0);
69 if (StrLength(LinkTitle) > 0)
70 StrBufAppendBuf(Message, LinkTitle, 0);
71 else if ((Title != NULL) && !IsEmptyStr(Title))
72 StrBufAppendBufPlain(Message, Title, -1, 0);
74 StrBufAppendBuf(Message, link, 0);
75 StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
79 * Commit a fetched and parsed RSS item to disk
81 void rss_save_item(rss_item *ri)
84 struct MD5Context md5context;
85 u_char rawdigest[MD5_DIGEST_LEN];
88 struct cdbdata *cdbut;
90 struct CtdlMessage *msg;
91 struct recptypes *recp = NULL;
95 recp = (struct recptypes *) malloc(sizeof(struct recptypes));
96 if (recp == NULL) return;
97 memset(recp, 0, sizeof(struct recptypes));
98 memset(&ut, 0, sizeof(struct UseTable));
99 recp->recp_room = strdup(ri->roomlist);
100 recp->num_room = num_tokens(ri->roomlist, '|');
101 recp->recptypes_magic = RECPTYPES_MAGIC;
103 /* Construct a GUID to use in the S_USETABLE table.
104 * If one is not present in the item itself, make one up.
106 if (ri->guid != NULL) {
107 StrBufSpaceToBlank(ri->guid);
108 StrBufTrim(ri->guid);
109 snprintf(utmsgid, sizeof utmsgid, "rss/%s", ChrPtr(ri->guid));
112 MD5Init(&md5context);
113 if (ri->title != NULL) {
114 MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->title), StrLength(ri->title));
116 if (ri->link != NULL) {
117 MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->link), StrLength(ri->link));
119 MD5Final(rawdigest, &md5context);
120 for (i=0; i<MD5_DIGEST_LEN; i++) {
121 sprintf(&utmsgid[i*2], "%02X", (unsigned char) (rawdigest[i] & 0xff));
122 utmsgid[i*2] = tolower(utmsgid[i*2]);
123 utmsgid[(i*2)+1] = tolower(utmsgid[(i*2)+1]);
125 strcat(utmsgid, "_rss2ctdl");
128 /* Find out if we've already seen this item */
130 cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid));
133 /* Item has already been seen */
134 CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid);
137 /* rewrite the record anyway, to update the timestamp */
138 strcpy(ut.ut_msgid, utmsgid);
139 ut.ut_timestamp = time(NULL);
140 cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
145 /* Item has not been seen, so save it. */
146 CtdlLogPrintf(CTDL_DEBUG, "RSS: saving item...\n");
147 if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
148 StrBufSpaceToBlank(ri->description);
149 msg = malloc(sizeof(struct CtdlMessage));
150 memset(msg, 0, sizeof(struct CtdlMessage));
151 msg->cm_magic = CTDLMESSAGE_MAGIC;
152 msg->cm_anon_type = MES_NORMAL;
153 msg->cm_format_type = FMT_RFC822;
155 if (ri->guid != NULL) {
156 msg->cm_fields['E'] = strdup(ChrPtr(ri->guid));
159 if (ri->author_or_creator != NULL) {
161 StrBuf *Encoded = NULL;
164 From = html_to_ascii(ChrPtr(ri->author_or_creator),
165 StrLength(ri->author_or_creator),
167 StrBufPlain(ri->author_or_creator, From, -1);
168 StrBufTrim(ri->author_or_creator);
171 FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL;
172 if (!FromAt && StrLength (ri->author_email) > 0)
174 StrBufRFC2047encode(&Encoded, ri->author_or_creator);
175 msg->cm_fields['A'] = SmashStrBuf(&Encoded);
176 msg->cm_fields['P'] = SmashStrBuf(&ri->author_email);
181 msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator);
184 StrBufRFC2047encode(&Encoded, ri->author_or_creator);
185 msg->cm_fields['A'] = SmashStrBuf(&Encoded);
186 msg->cm_fields['P'] = strdup("rss@localhost");
191 msg->cm_fields['A'] = strdup("rss");
194 msg->cm_fields['N'] = strdup(NODENAME);
195 if (ri->title != NULL) {
198 StrBuf *Encoded, *QPEncoded;
201 StrBufSpaceToBlank(ri->title);
202 len = StrLength(ri->title);
203 Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0);
205 if (Sbj[len - 1] == '\n')
210 Encoded = NewStrBufPlain(Sbj, len);
214 StrBufRFC2047encode(&QPEncoded, Encoded);
216 msg->cm_fields['U'] = SmashStrBuf(&QPEncoded);
217 FreeStrBuf(&Encoded);
219 msg->cm_fields['T'] = malloc(64);
220 snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
221 if (ri->channel_title != NULL) {
222 if (StrLength(ri->channel_title) > 0) {
223 msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title));
226 if (ri->link == NULL)
227 ri->link = NewStrBufPlain(HKEY(""));
228 // TODO: reenable me ExpandShortUrls(ri->description);
229 msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ;
231 Message = NewStrBufPlain(NULL, StrLength(ri->description));
233 StrBufPlain(Message, HKEY(
234 "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
237 StrBufAppendBuf(Message, ri->description, 0);
238 StrBufAppendBufPlain(Message, HKEY("<br><br>\n"), 0);
240 AppendLink(Message, ri->link, ri->linkTitle, NULL);
241 AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this");
242 StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
244 msg->cm_fields['M'] = SmashStrBuf(&Message);
246 CtdlSubmitMsg(msg, recp, NULL, 0);
247 CtdlFreeMessage(msg);
249 /* write the uidl to the use table so we don't store this item again */
250 strcpy(ut.ut_msgid, utmsgid);
251 ut.ut_timestamp = time(NULL);
252 cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
254 free_recipients(recp);
264 void rss_do_fetching(rssnetcfg *Cfg) {
275 if ((Cfg->next_poll != 0) && (now < Cfg->next_poll))
279 ri = (rss_item*) malloc(sizeof(rss_item));
280 rssc = (rsscollection*) malloc(sizeof(rsscollection));
281 memset(ri, 0, sizeof(rss_item));
282 memset(rssc, 0, sizeof(rsscollection));
286 IO->CitContext = CloneContext(CC);
288 ri->roomlist = Cfg->rooms;
291 CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url));
292 ParseURL(&IO->ConnectMe, Cfg->Url, 80);
293 CurlPrepareURL(IO->ConnectMe);
295 if (! evcurl_init(IO,
298 "Citadel RSS Client",
301 CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
304 chnd = IO->HttpReq.chnd;
306 evcurl_handle_start(IO);
313 * Scan a room's netconfig to determine whether it is requesting any RSS feeds
315 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
317 char filename[PATH_MAX];
322 rssnetcfg *rncptr = NULL;
323 rssnetcfg *use_this_rncptr = NULL;
327 assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir);
329 if (CtdlThreadCheckStop())
332 /* Only do net processing for rooms that have netconfigs */
333 fp = fopen(filename, "r");
338 while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) {
339 buf[strlen(buf)-1] = 0;
341 extract_token(instr, buf, 0, '|', sizeof instr);
342 if (!strcasecmp(instr, "rssclient")) {
344 use_this_rncptr = NULL;
346 extract_token(feedurl, buf, 1, '|', sizeof feedurl);
348 /* If any other rooms have requested the same feed, then we will just add this
349 * room to the target list for that client request.
351 for (rncptr=rnclist; rncptr!=NULL; rncptr=rncptr->next) {
352 if (!strcmp(ChrPtr(rncptr->Url), feedurl)) {
353 use_this_rncptr = rncptr;
357 /* Otherwise create a new client request */
358 if (use_this_rncptr == NULL) {
359 rncptr = (rssnetcfg *) malloc(sizeof(rssnetcfg));
360 memset(rncptr, 0, sizeof(rssnetcfg));
361 rncptr->ItemType = RSS_UNSET;
362 if (rncptr != NULL) {
363 rncptr->next = rnclist;
364 rncptr->Url = NewStrBufPlain(feedurl, -1);
365 rncptr->rooms = NULL;
367 use_this_rncptr = rncptr;
371 /* Add the room name to the request */
372 if (use_this_rncptr != NULL) {
373 if (use_this_rncptr->rooms == NULL) {
374 rncptr->rooms = strdup(qrbuf->QRname);
377 len = strlen(use_this_rncptr->rooms) + strlen(qrbuf->QRname) + 5;
378 ptr = realloc(use_this_rncptr->rooms, len);
381 strcat(ptr, qrbuf->QRname);
382 use_this_rncptr->rooms = ptr;
395 * Scan for rooms that have RSS client requests configured
397 void rssclient_scan(void) {
398 static time_t last_run = 0L;
399 static int doing_rssclient = 0;
400 rssnetcfg *rptr = NULL;
403 * This is a simple concurrency check to make sure only one rssclient run
404 * is done at a time. We could do this with a mutex, but since we
405 * don't really require extremely fine granularity here, we'll do it
406 * with a static variable instead.
408 if (doing_rssclient) return;
411 CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n");
412 CtdlForEachRoom(rssclient_scan_room, NULL);
414 while (rnclist != NULL && !CtdlThreadCheckStop()) {
415 rss_do_fetching(rnclist);
417 rnclist = rnclist->next;
418 if (rptr->rooms != NULL) free(rptr->rooms);
422 CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n");
423 last_run = time(NULL);
429 CTDL_MODULE_INIT(rssclient)
433 CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version());
434 CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);