]> code.citadel.org Git - citadel.git/blobdiff - citadel/modules/rssclient/serv_rssclient.c
Completed the removal of $Id$ tags in the Citadel server. Also, since the strings...
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
index 0bdff60891e72418d7d44906fdb43b1612da8b8e..404d879e7cefbbed44c444d796a55b9686b59e16 100644 (file)
@@ -1,23 +1,21 @@
 /*
- * $Id$
- *
  * Bring external RSS feeds into rooms.
  *
- * Copyright (c) 2007-2009 by the citadel.org team
+ * Copyright (c) 2007-2010 by the citadel.org team
  *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 3 of the License, or
- *  (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
  *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include <stdlib.h>
@@ -85,6 +83,7 @@ typedef struct _rss_item {
        char channel_title[256];
        int item_tag_nesting;
        char *author_or_creator;
+       char *author_url;
 }rss_item;
 
 
@@ -288,10 +287,10 @@ void rss_save_item(rsscollection *rssc) {
        int msglen = 0;
        rss_item *ri = rssc->Item;
 
-       CtdlLogPrintf(0, "RSS: saving item...\n");
        recp = (struct recptypes *) malloc(sizeof(struct recptypes));
        if (recp == NULL) return;
        memset(recp, 0, sizeof(struct recptypes));
+       memset(&ut, 0, sizeof(struct UseTable));
        recp->recp_room = strdup(ri->roomlist);
        recp->num_room = num_tokens(ri->roomlist, '|');
        recp->recptypes_magic = RECPTYPES_MAGIC;
@@ -320,6 +319,7 @@ void rss_save_item(rsscollection *rssc) {
        }
 
        /* Find out if we've already seen this item */
+
        cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid));
        if (cdbut != NULL) {
                /* Item has already been seen */
@@ -333,7 +333,7 @@ void rss_save_item(rsscollection *rssc) {
        }
        else {
                /* Item has not been seen, so save it. */
-
+               CtdlLogPrintf(CTDL_DEBUG, "RSS: saving item...\n");
                if (ri->description == NULL) ri->description = strdup("");
                for (i=strlen(ri->description); i>=0; --i) {
                        if (isspace(ri->description[i])) {
@@ -347,10 +347,36 @@ void rss_save_item(rsscollection *rssc) {
                msg->cm_anon_type = MES_NORMAL;
                msg->cm_format_type = FMT_RFC822;
 
+               if (ri->guid != NULL) {
+                       msg->cm_fields['E'] = strdup(ri->guid);
+               }
+
                if (ri->author_or_creator != NULL) {
-                       msg->cm_fields['A'] = html_to_ascii(ri->author_or_creator,
-                               strlen(ri->author_or_creator), 512, 0);
-                       striplt(msg->cm_fields['A']);
+                       char *From;
+                       StrBuf *Encoded, *QPEncoded;
+                       StrBuf *UserName;
+                       StrBuf *EmailAddress;
+                       StrBuf *EncBuf;
+                       
+                       UserName = NewStrBuf();
+                       EmailAddress = NewStrBuf();
+                       EncBuf = NewStrBuf();
+
+                       From = html_to_ascii(ri->author_or_creator,
+                                            strlen(ri->author_or_creator), 
+                                            512, 0);
+
+                       Encoded = NewStrBufPlain(From, -1);
+                       free(From);
+                       StrBufTrim(Encoded);
+                       QPEncoded = StrBufSanitizeEmailRecipientVector(Encoded, UserName, EmailAddress, EncBuf);
+                       msg->cm_fields['A'] = SmashStrBuf(&QPEncoded);
+
+                       FreeStrBuf(&Encoded);
+                       FreeStrBuf(&UserName);
+                       FreeStrBuf(&EmailAddress);
+                       FreeStrBuf(&EncBuf);
+
                }
                else {
                        msg->cm_fields['A'] = strdup("rss");
@@ -358,8 +384,21 @@ void rss_save_item(rsscollection *rssc) {
 
                msg->cm_fields['N'] = strdup(NODENAME);
                if (ri->title != NULL) {
-                       msg->cm_fields['U'] = html_to_ascii(ri->title, strlen(ri->title), 512, 0);
-                       striplt(msg->cm_fields['U']);
+                       long len;
+                       char *Sbj;
+                       StrBuf *Encoded, *QPEncoded;
+
+                       QPEncoded = NULL;
+                       len = strlen(ri->title);
+                       Sbj = html_to_ascii(ri->title, len, 512, 0);
+                       Encoded = NewStrBufPlain(Sbj, -1);
+                       free(Sbj);
+
+                       StrBufTrim(Encoded);
+                       StrBufRFC2047encode(&QPEncoded, Encoded);
+
+                       msg->cm_fields['U'] = SmashStrBuf(&QPEncoded);
+                       FreeStrBuf(&Encoded);
                }
                msg->cm_fields['T'] = malloc(64);
                snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
@@ -409,7 +448,12 @@ time_t rdf_parsedate(char *p)
 
        memset(&tm, 0, sizeof tm);
 
-       /* YYYY-MM-DDTHH:MM format...
+       /*
+        * If the timestamp appears to be in W3C datetime format, try to
+        * parse it.  See also: http://www.w3.org/TR/NOTE-datetime
+        *
+        * This code, along with parsedate.c, is a potential candidate for
+        * moving into libcitadel.
         */
        if ( (p[4] == '-') && (p[7] == '-') ) {
                tm.tm_year = atoi(&p[0]) - 1900;
@@ -446,6 +490,8 @@ void flush_rss_ite(rss_item *ri)
        ri->link = NULL;
        if (ri->author_or_creator != NULL) free(ri->author_or_creator);
        ri->author_or_creator = NULL;
+       if (ri->author_url != NULL) free(ri->author_url);
+       ri->author_url = NULL;
        if (ri->description != NULL) free(ri->description);
        ri->description = NULL;
        /* Throw away any existing character data */
@@ -469,20 +515,31 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) {
                strcpy(el, ++sep);
        }
 
-       if (((rssc->Cfg->ItemType == RSS_UNSET) || 
-            (rssc->Cfg->ItemType == RSS_RSS)) &&
+       if ((rssc->Cfg->ItemType == RSS_UNSET) && !strcasecmp(el, "rss")) 
+       {
+               CtdlLogPrintf(9, "RSS: This is an RSS feed.\n");
+               rssc->Cfg->ItemType = RSS_RSS;
+       }
+       if ((rssc->Cfg->ItemType == RSS_UNSET) && !strcasecmp(el, "rdf")) 
+       {
+               CtdlLogPrintf(9, "RSS: This is an RDF feed.\n");
+               rssc->Cfg->ItemType = RSS_RSS;
+       }
+       else if ((rssc->Cfg->ItemType == RSS_UNSET) && !strcasecmp(el, "feed")) 
+       {
+               CtdlLogPrintf(9, "RSS: This is an ATOM feed.\n");
+               rssc->Cfg->ItemType = RSS_ATOM;
+       }
+       else if ((rssc->Cfg->ItemType == RSS_RSS) &&
            !strcasecmp(el, "item")) 
        {
                ri->item_tag_nesting ++ ;
-               rssc->Cfg->ItemType = RSS_RSS;
                flush_rss_ite(ri);
        }
-       else if (((rssc->Cfg->ItemType == RSS_UNSET) || 
-                 (rssc->Cfg->ItemType == RSS_ATOM)) &&
-                !strcasecmp(el, "entry")) { /* Atom feed... */
-               CtdlLogPrintf(0, "RSS: found atom...\n");
+       else if ( (rssc->Cfg->ItemType == RSS_ATOM) &&
+                !strcasecmp(el, "entry")) 
+       { /* Atom feed... */
                ++ri->item_tag_nesting;
-               rssc->Cfg->ItemType = RSS_ATOM;
                flush_rss_ite(ri);
        }
        else if ((rssc->Cfg->ItemType == RSS_ATOM) &&
@@ -538,6 +595,11 @@ void rss_xml_end(void *data, const char *supplied_el) {
                ri->guid = strdup(ri->chardata);
        }
 
+       else if ( (rssc->Cfg->ItemType == RSS_RSS) && (!strcasecmp(el, "link")) && (ri->chardata != NULL) ) {
+               if (ri->link != NULL) free(ri->link);
+               striplt(ri->chardata);
+               ri->link = strdup(ri->chardata);
+       }
 
        else if ( (!strcasecmp(el, "title")) && (ri->chardata != NULL) ) {
                if (ri->title != NULL) free(ri->title);
@@ -575,7 +637,7 @@ void rss_xml_end(void *data, const char *supplied_el) {
                ri->pubdate = rdf_parsedate(ri->chardata);
        }
 
-       else if ((rssc->Cfg->ItemType == RSS_ATOM) && 
+       else if ((rssc->Cfg->ItemType == RSS_RSS) && 
                 ((!strcasecmp(el, "author")) || 
                  (!strcasecmp(el, "creator"))) && 
                 (ri->chardata != NULL) ) {
@@ -584,6 +646,21 @@ void rss_xml_end(void *data, const char *supplied_el) {
                ri->author_or_creator = strdup(ri->chardata);
        }
 
+       else if ((rssc->Cfg->ItemType == RSS_ATOM) && 
+                (!strcasecmp(el, "name")) && 
+                (ri->chardata != NULL) ) {
+               if (ri->author_or_creator != NULL) free(ri->author_or_creator);
+               striplt(ri->chardata);
+               ri->author_or_creator = strdup(ri->chardata);
+       }
+       else if ((rssc->Cfg->ItemType == RSS_ATOM) && 
+                (!strcasecmp(el, "uri")) && 
+                (ri->chardata != NULL) ) {
+               if (ri->author_url != NULL) free(ri->author_url);
+               striplt(ri->chardata);
+               ri->author_url = strdup(ri->chardata);
+       }
+
        else if ((rssc->Cfg->ItemType == RSS_RSS) && 
                 !strcasecmp(el, "item")) {
                --ri->item_tag_nesting;
@@ -687,7 +764,12 @@ void rss_do_fetching(rssnetcfg *Cfg) {
 #endif
        curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL);
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180);           /* die after 180 seconds */
-       if (!IsEmptyStr(config.c_ip_addr)) {
+       if (
+               (!IsEmptyStr(config.c_ip_addr))
+               && (strcmp(config.c_ip_addr, "*"))
+               && (strcmp(config.c_ip_addr, "::"))
+               && (strcmp(config.c_ip_addr, "0.0.0.0"))
+       ) {
                curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr);
        }
 
@@ -831,8 +913,6 @@ void *rssclient_scan(void *args) {
        CtdlFillSystemContext(&rssclientCC, "rssclient");
        citthread_setspecific(MyConKey, (void *)&rssclientCC );
 
-       CtdlThreadAllocTSD();
-
        /*
         * This is a simple concurrency check to make sure only one rssclient run
         * is done at a time.  We could do this with a mutex, but since we
@@ -860,6 +940,7 @@ void *rssclient_scan(void *args) {
                CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq);
        else
                CtdlLogPrintf(CTDL_DEBUG, "rssclient: Task STOPPED.\n");
+       CtdlClearSystemContext();
        return NULL;
 }
 
@@ -872,5 +953,5 @@ CTDL_MODULE_INIT(rssclient)
                CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, 0);
        }
        /* return our Subversion id for the Log */
-        return "$Id$";
+        return "rssclient";
 }