2 * Bring external RSS feeds into rooms.
4 * Copyright (c) 2007-2010 by the citadel.org team
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #if TIME_WITH_SYS_TIME
26 # include <sys/time.h>
30 # include <sys/time.h>
39 #include <sys/types.h>
42 #include <curl/curl.h>
43 #include <libcitadel.h>
46 #include "citserver.h"
50 #include "ctdl_module.h"
52 #include "parsedate.h"
54 #include "citadel_dirs.h"
57 #include "event_client.h"
58 #include "rss_atom_parser.h"
61 #define TMP_MSGDATA 0xFF
62 #define TMP_SHORTER_URL_OFFSET 0xFE
63 #define TMP_SHORTER_URLS 0xFD
66 struct rssnetcfg *rnclist = NULL;
67 void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title)
69 if (StrLength(link) > 0)
71 StrBufAppendBufPlain(Message, HKEY("<a href=\""), 0);
72 StrBufAppendBuf(Message, link, 0);
73 StrBufAppendBufPlain(Message, HKEY("\">"), 0);
74 if (StrLength(LinkTitle) > 0)
75 StrBufAppendBuf(Message, LinkTitle, 0);
76 else if ((Title != NULL) && !IsEmptyStr(Title))
77 StrBufAppendBufPlain(Message, Title, -1, 0);
79 StrBufAppendBuf(Message, link, 0);
80 StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
84 void RSSSaveMessage(struct CtdlMessage *Msg, rss_item *ri, struct UseTable *ut)
87 CtdlSubmitMsg(msg, recp, NULL, 0);
90 /* write the uidl to the use table so we don't store this item again */
91 cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
96 rss_save_msg(msg, recp)
103 * Commit a fetched and parsed RSS item to disk
105 void rss_save_item(rss_item *ri)
108 struct MD5Context md5context;
109 u_char rawdigest[MD5_DIGEST_LEN];
112 struct cdbdata *cdbut;
114 struct CtdlMessage *msg;
115 struct recptypes *recp = NULL;
120 recp = (struct recptypes *) malloc(sizeof(struct recptypes));
121 if (recp == NULL) return;
122 memset(recp, 0, sizeof(struct recptypes));
123 memset(&ut, 0, sizeof(struct UseTable));
124 recp->recp_room = strdup(ri->roomlist);
125 recp->num_room = num_tokens(ri->roomlist, '|');
126 recp->recptypes_magic = RECPTYPES_MAGIC;
128 /* Construct a GUID to use in the S_USETABLE table.
129 * If one is not present in the item itself, make one up.
131 if (ri->guid != NULL) {
132 StrBufSpaceToBlank(ri->guid);
133 StrBufTrim(ri->guid);
134 snprintf(utmsgid, sizeof utmsgid, "rss/%s", ChrPtr(ri->guid));
137 MD5Init(&md5context);
138 if (ri->title != NULL) {
139 MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->title), StrLength(ri->title));
141 if (ri->link != NULL) {
142 MD5Update(&md5context, (const unsigned char*)ChrPtr(ri->link), StrLength(ri->link));
144 MD5Final(rawdigest, &md5context);
145 for (i=0; i<MD5_DIGEST_LEN; i++) {
146 sprintf(&utmsgid[i*2], "%02X", (unsigned char) (rawdigest[i] & 0xff));
147 utmsgid[i*2] = tolower(utmsgid[i*2]);
148 utmsgid[(i*2)+1] = tolower(utmsgid[(i*2)+1]);
150 strcat(utmsgid, "_rss2ctdl");
153 /* translate Item into message. */
154 CtdlLogPrintf(CTDL_DEBUG, "RSS: translating item...\n");
155 if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
156 StrBufSpaceToBlank(ri->description);
157 msg = malloc(sizeof(struct CtdlMessage));
158 memset(msg, 0, sizeof(struct CtdlMessage));
159 msg->cm_magic = CTDLMESSAGE_MAGIC;
160 msg->cm_anon_type = MES_NORMAL;
161 msg->cm_format_type = FMT_RFC822;
163 if (ri->guid != NULL) {
164 msg->cm_fields['E'] = strdup(ChrPtr(ri->guid));
167 if (ri->author_or_creator != NULL) {
169 StrBuf *Encoded = NULL;
172 From = html_to_ascii(ChrPtr(ri->author_or_creator),
173 StrLength(ri->author_or_creator),
175 StrBufPlain(ri->author_or_creator, From, -1);
176 StrBufTrim(ri->author_or_creator);
179 FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL;
180 if (!FromAt && StrLength (ri->author_email) > 0)
182 StrBufRFC2047encode(&Encoded, ri->author_or_creator);
183 msg->cm_fields['A'] = SmashStrBuf(&Encoded);
184 msg->cm_fields['P'] = SmashStrBuf(&ri->author_email);
189 msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator);
192 StrBufRFC2047encode(&Encoded, ri->author_or_creator);
193 msg->cm_fields['A'] = SmashStrBuf(&Encoded);
194 msg->cm_fields['P'] = strdup("rss@localhost");
199 msg->cm_fields['A'] = strdup("rss");
202 msg->cm_fields['N'] = strdup(NODENAME);
203 if (ri->title != NULL) {
206 StrBuf *Encoded, *QPEncoded;
209 StrBufSpaceToBlank(ri->title);
210 len = StrLength(ri->title);
211 Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0);
213 if (Sbj[len - 1] == '\n')
218 Encoded = NewStrBufPlain(Sbj, len);
222 StrBufRFC2047encode(&QPEncoded, Encoded);
224 msg->cm_fields['U'] = SmashStrBuf(&QPEncoded);
225 FreeStrBuf(&Encoded);
227 msg->cm_fields['T'] = malloc(64);
228 snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
229 if (ri->channel_title != NULL) {
230 if (StrLength(ri->channel_title) > 0) {
231 msg->cm_fields['O'] = strdup(ChrPtr(ri->channel_title));
234 if (ri->link == NULL)
235 ri->link = NewStrBufPlain(HKEY(""));
238 msg->cm_fields[TMP_SHORTER_URLS] = GetShorterUrls(ri->description);
240 strcpy(ut->ut_msgid, utmsgid);
241 ut->ut_timestamp = time(NULL);
243 msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ;
245 Message = NewStrBufPlain(NULL, StrLength(ri->description));
247 StrBufPlain(Message, HKEY(
248 "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
250 msg->cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message);
251 StrBufAppendBuf(Message, ri->description, 0);
252 StrBufAppendBufPlain(Message, HKEY("<br><br>\n"), 0);
254 AppendLink(Message, ri->link, ri->linkTitle, NULL);
255 AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this");
256 StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
259 msg->cm_fields[TMP_MSGDATA] = Message;
262 OtherIO = malloc(sizeof(AsyncIO));
263 memset(OtherIO, 0, sizeof(AsyncIO));
264 OtherIO->AsyncMsg = msg;
265 OtherIO->AsyncRcp = recp;
267 rss_save_msg(msg, recp);
268 // msg->cm_fields['M'] = SmashStrBuf(&Message);
270 // TODO: reenable me ExpandShortUrls(ri->description);
272 /// free_recipients(recp);
278 /* Find out if we've already seen this item * /
280 cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid));
283 /* Item has already been seen * /
284 CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", utmsgid);
287 /* rewrite the record anyway, to update the timestamp * /
288 strcpy(ut.ut_msgid, utmsgid);
289 ut.ut_timestamp = time(NULL);
290 cdb_store(CDB_USETABLE, utmsgid, strlen(utmsgid), &ut, sizeof(struct UseTable) );
301 void rss_do_fetching(rssnetcfg *Cfg) {
312 if ((Cfg->next_poll != 0) && (now < Cfg->next_poll))
316 ri = (rss_item*) malloc(sizeof(rss_item));
317 rssc = (rsscollection*) malloc(sizeof(rsscollection));
318 memset(ri, 0, sizeof(rss_item));
319 memset(rssc, 0, sizeof(rsscollection));
323 IO->CitContext = CloneContext(CC);
325 ri->roomlist = Cfg->rooms;
328 CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url));
329 ParseURL(&IO->ConnectMe, Cfg->Url, 80);
330 CurlPrepareURL(IO->ConnectMe);
332 if (! evcurl_init(IO,
335 "Citadel RSS Client",
338 CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
341 chnd = IO->HttpReq.chnd;
343 evcurl_handle_start(IO);
350 * Scan a room's netconfig to determine whether it is requesting any RSS feeds
352 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
354 char filename[PATH_MAX];
359 rssnetcfg *rncptr = NULL;
360 rssnetcfg *use_this_rncptr = NULL;
364 assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir);
366 if (CtdlThreadCheckStop())
369 /* Only do net processing for rooms that have netconfigs */
370 fp = fopen(filename, "r");
375 while (fgets(buf, sizeof buf, fp) != NULL && !CtdlThreadCheckStop()) {
376 buf[strlen(buf)-1] = 0;
378 extract_token(instr, buf, 0, '|', sizeof instr);
379 if (!strcasecmp(instr, "rssclient")) {
381 use_this_rncptr = NULL;
383 extract_token(feedurl, buf, 1, '|', sizeof feedurl);
385 /* If any other rooms have requested the same feed, then we will just add this
386 * room to the target list for that client request.
388 for (rncptr=rnclist; rncptr!=NULL; rncptr=rncptr->next) {
389 if (!strcmp(ChrPtr(rncptr->Url), feedurl)) {
390 use_this_rncptr = rncptr;
394 /* Otherwise create a new client request */
395 if (use_this_rncptr == NULL) {
396 rncptr = (rssnetcfg *) malloc(sizeof(rssnetcfg));
397 memset(rncptr, 0, sizeof(rssnetcfg));
398 rncptr->ItemType = RSS_UNSET;
399 if (rncptr != NULL) {
400 rncptr->next = rnclist;
401 rncptr->Url = NewStrBufPlain(feedurl, -1);
402 rncptr->rooms = NULL;
404 use_this_rncptr = rncptr;
408 /* Add the room name to the request */
409 if (use_this_rncptr != NULL) {
410 if (use_this_rncptr->rooms == NULL) {
411 rncptr->rooms = strdup(qrbuf->QRname);
414 len = strlen(use_this_rncptr->rooms) + strlen(qrbuf->QRname) + 5;
415 ptr = realloc(use_this_rncptr->rooms, len);
418 strcat(ptr, qrbuf->QRname);
419 use_this_rncptr->rooms = ptr;
432 * Scan for rooms that have RSS client requests configured
434 void rssclient_scan(void) {
435 static time_t last_run = 0L;
436 static int doing_rssclient = 0;
437 rssnetcfg *rptr = NULL;
440 * This is a simple concurrency check to make sure only one rssclient run
441 * is done at a time. We could do this with a mutex, but since we
442 * don't really require extremely fine granularity here, we'll do it
443 * with a static variable instead.
445 if (doing_rssclient) return;
448 CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n");
449 CtdlForEachRoom(rssclient_scan_room, NULL);
451 while (rnclist != NULL && !CtdlThreadCheckStop()) {
452 rss_do_fetching(rnclist);
454 rnclist = rnclist->next;
455 if (rptr->rooms != NULL) free(rptr->rooms);
459 CtdlLogPrintf(CTDL_DEBUG, "rssclient ended\n");
460 last_run = time(NULL);
466 CTDL_MODULE_INIT(rssclient)
470 CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version());
471 CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);