]> code.citadel.org Git - citadel.git/blobdiff - citadel/modules/rssclient/serv_rssclient.c
* Don't tell libcurl to use an interface that does not exist
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
index b6517242b23eab2aa93e9d598c0afbad2d24a62f..7f8e9f383dd704d287de0023d2cd03bf864d13c9 100644 (file)
@@ -3,21 +3,21 @@
  *
  * Bring external RSS feeds into rooms.
  *
- * Copyright (c) 2007-2009 by the citadel.org team
+ * Copyright (c) 2007-2010 by the citadel.org team
  *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 3 of the License, or
- *  (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
  *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include <stdlib.h>
@@ -289,10 +289,10 @@ void rss_save_item(rsscollection *rssc) {
        int msglen = 0;
        rss_item *ri = rssc->Item;
 
-       CtdlLogPrintf(0, "RSS: saving item...\n");
        recp = (struct recptypes *) malloc(sizeof(struct recptypes));
        if (recp == NULL) return;
        memset(recp, 0, sizeof(struct recptypes));
+       memset(&ut, 0, sizeof(struct UseTable));
        recp->recp_room = strdup(ri->roomlist);
        recp->num_room = num_tokens(ri->roomlist, '|');
        recp->recptypes_magic = RECPTYPES_MAGIC;
@@ -321,6 +321,7 @@ void rss_save_item(rsscollection *rssc) {
        }
 
        /* Find out if we've already seen this item */
+
        cdbut = cdb_fetch(CDB_USETABLE, utmsgid, strlen(utmsgid));
        if (cdbut != NULL) {
                /* Item has already been seen */
@@ -334,7 +335,7 @@ void rss_save_item(rsscollection *rssc) {
        }
        else {
                /* Item has not been seen, so save it. */
-
+               CtdlLogPrintf(CTDL_DEBUG, "RSS: saving item...\n");
                if (ri->description == NULL) ri->description = strdup("");
                for (i=strlen(ri->description); i>=0; --i) {
                        if (isspace(ri->description[i])) {
@@ -348,10 +349,36 @@ void rss_save_item(rsscollection *rssc) {
                msg->cm_anon_type = MES_NORMAL;
                msg->cm_format_type = FMT_RFC822;
 
+               if (ri->guid != NULL) {
+                       msg->cm_fields['E'] = strdup(ri->guid);
+               }
+
                if (ri->author_or_creator != NULL) {
-                       msg->cm_fields['A'] = html_to_ascii(ri->author_or_creator,
-                               strlen(ri->author_or_creator), 512, 0);
-                       striplt(msg->cm_fields['A']);
+                       char *From;
+                       StrBuf *Encoded, *QPEncoded;
+                       StrBuf *UserName;
+                       StrBuf *EmailAddress;
+                       StrBuf *EncBuf;
+                       
+                       UserName = NewStrBuf();
+                       EmailAddress = NewStrBuf();
+                       EncBuf = NewStrBuf();
+
+                       From = html_to_ascii(ri->author_or_creator,
+                                            strlen(ri->author_or_creator), 
+                                            512, 0);
+
+                       Encoded = NewStrBufPlain(From, -1);
+                       free(From);
+                       StrBufTrim(Encoded);
+                       QPEncoded = StrBufSanitizeEmailRecipientVector(Encoded, UserName, EmailAddress, EncBuf);
+                       msg->cm_fields['A'] = SmashStrBuf(&QPEncoded);
+
+                       FreeStrBuf(&Encoded);
+                       FreeStrBuf(&UserName);
+                       FreeStrBuf(&EmailAddress);
+                       FreeStrBuf(&EncBuf);
+
                }
                else {
                        msg->cm_fields['A'] = strdup("rss");
@@ -359,8 +386,21 @@ void rss_save_item(rsscollection *rssc) {
 
                msg->cm_fields['N'] = strdup(NODENAME);
                if (ri->title != NULL) {
-                       msg->cm_fields['U'] = html_to_ascii(ri->title, strlen(ri->title), 512, 0);
-                       striplt(msg->cm_fields['U']);
+                       long len;
+                       char *Sbj;
+                       StrBuf *Encoded, *QPEncoded;
+
+                       QPEncoded = NULL;
+                       len = strlen(ri->title);
+                       Sbj = html_to_ascii(ri->title, len, 512, 0);
+                       Encoded = NewStrBufPlain(Sbj, -1);
+                       free(Sbj);
+
+                       StrBufTrim(Encoded);
+                       StrBufRFC2047encode(&QPEncoded, Encoded);
+
+                       msg->cm_fields['U'] = SmashStrBuf(&QPEncoded);
+                       FreeStrBuf(&Encoded);
                }
                msg->cm_fields['T'] = malloc(64);
                snprintf(msg->cm_fields['T'], 64, "%ld", ri->pubdate);
@@ -410,7 +450,12 @@ time_t rdf_parsedate(char *p)
 
        memset(&tm, 0, sizeof tm);
 
-       /* YYYY-MM-DDTHH:MM format...
+       /*
+        * If the timestamp appears to be in W3C datetime format, try to
+        * parse it.  See also: http://www.w3.org/TR/NOTE-datetime
+        *
+        * This code, along with parsedate.c, is a potential candidate for
+        * moving into libcitadel.
         */
        if ( (p[4] == '-') && (p[7] == '-') ) {
                tm.tm_year = atoi(&p[0]) - 1900;
@@ -447,6 +492,8 @@ void flush_rss_ite(rss_item *ri)
        ri->link = NULL;
        if (ri->author_or_creator != NULL) free(ri->author_or_creator);
        ri->author_or_creator = NULL;
+       if (ri->author_url != NULL) free(ri->author_url);
+       ri->author_url = NULL;
        if (ri->description != NULL) free(ri->description);
        ri->description = NULL;
        /* Throw away any existing character data */
@@ -470,20 +517,31 @@ void rss_xml_start(void *data, const char *supplied_el, const char **attr) {
                strcpy(el, ++sep);
        }
 
-       if (((rssc->Cfg->ItemType == RSS_UNSET) || 
-            (rssc->Cfg->ItemType == RSS_RSS)) &&
+       if ((rssc->Cfg->ItemType == RSS_UNSET) && !strcasecmp(el, "rss")) 
+       {
+               CtdlLogPrintf(9, "RSS: This is an RSS feed.\n");
+               rssc->Cfg->ItemType = RSS_RSS;
+       }
+       if ((rssc->Cfg->ItemType == RSS_UNSET) && !strcasecmp(el, "rdf")) 
+       {
+               CtdlLogPrintf(9, "RSS: This is an RDF feed.\n");
+               rssc->Cfg->ItemType = RSS_RSS;
+       }
+       else if ((rssc->Cfg->ItemType == RSS_UNSET) && !strcasecmp(el, "feed")) 
+       {
+               CtdlLogPrintf(9, "RSS: This is an ATOM feed.\n");
+               rssc->Cfg->ItemType = RSS_ATOM;
+       }
+       else if ((rssc->Cfg->ItemType == RSS_RSS) &&
            !strcasecmp(el, "item")) 
        {
                ri->item_tag_nesting ++ ;
-               rssc->Cfg->ItemType = RSS_RSS;
                flush_rss_ite(ri);
        }
-       else if (((rssc->Cfg->ItemType == RSS_UNSET) || 
-                 (rssc->Cfg->ItemType == RSS_ATOM)) &&
-                !strcasecmp(el, "entry")) { /* Atom feed... */
-               CtdlLogPrintf(0, "RSS: found atom...\n");
+       else if ( (rssc->Cfg->ItemType == RSS_ATOM) &&
+                !strcasecmp(el, "entry")) 
+       { /* Atom feed... */
                ++ri->item_tag_nesting;
-               rssc->Cfg->ItemType = RSS_ATOM;
                flush_rss_ite(ri);
        }
        else if ((rssc->Cfg->ItemType == RSS_ATOM) &&
@@ -708,7 +766,12 @@ void rss_do_fetching(rssnetcfg *Cfg) {
 #endif
        curl_easy_setopt(curl, CURLOPT_USERAGENT, CITADEL);
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, 180);           /* die after 180 seconds */
-       if (!IsEmptyStr(config.c_ip_addr)) {
+       if (
+               (!IsEmptyStr(config.c_ip_addr))
+               && (strcmp(config.c_ip_addr, "*"))
+               && (strcmp(config.c_ip_addr, "::"))
+               && (strcmp(config.c_ip_addr, "0.0.0.0"))
+       ) {
                curl_easy_setopt(curl, CURLOPT_INTERFACE, config.c_ip_addr);
        }
 
@@ -852,8 +915,6 @@ void *rssclient_scan(void *args) {
        CtdlFillSystemContext(&rssclientCC, "rssclient");
        citthread_setspecific(MyConKey, (void *)&rssclientCC );
 
-       CtdlThreadAllocTSD();
-
        /*
         * This is a simple concurrency check to make sure only one rssclient run
         * is done at a time.  We could do this with a mutex, but since we
@@ -881,6 +942,7 @@ void *rssclient_scan(void *args) {
                CtdlThreadSchedule ("RSS Client", CTDLTHREAD_BIGSTACK, rssclient_scan, NULL, last_run + config.c_net_freq);
        else
                CtdlLogPrintf(CTDL_DEBUG, "rssclient: Task STOPPED.\n");
+       CtdlClearSystemContext();
        return NULL;
 }