Work on RSS Feed
[citadel.git] / citadel / modules / rssclient / serv_rssclient.c
index db13c62cc30bd23eb4d5863d5bbae4aa62c755db..7e5cb481dc338636f25bcb8f322db873f8f6f92b 100644 (file)
 #define TMP_SHORTER_URL_OFFSET 0xFE
 #define TMP_SHORTER_URLS 0xFD
 
-citthread_mutex_t RSSQueueMutex; /* locks the access to the following vars: */
+time_t last_run = 0L;
+
+pthread_mutex_t RSSQueueMutex; /* locks the access to the following vars: */
 HashList *RSSQueueRooms = NULL; /* rss_room_counter */
 HashList *RSSFetchUrls = NULL; /* -> rss_aggregator; ->RefCount access to be locked too. */
 
 eNextState RSSAggregatorTerminate(AsyncIO *IO);
 
+struct CitContext rss_CC;
 
 struct rssnetcfg *rnclist = NULL;
 void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Title)
@@ -86,15 +89,6 @@ void AppendLink(StrBuf *Message, StrBuf *link, StrBuf *LinkTitle, const char *Ti
                StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
        }
 }
-typedef struct __networker_save_message {
-       AsyncIO IO;
-       struct CtdlMessage *Msg;
-       struct recptypes *recp;
-       rss_aggregator *Cfg;
-       StrBuf *MsgGUID;
-       StrBuf *Message;
-       struct UseTable ut;
-} networker_save_message;
 
 
 void DeleteRoomReference(long QRnumber)
@@ -137,10 +131,12 @@ void UnlinkRooms(rss_aggregator *Cfg)
                        long *lData = (long*) vData;
                        DeleteRoomReference(*lData);
                }
+/*
+               if (server_shutting_down)
+                       break; / * TODO */
 
                DeleteHashPos(&At);
        }
-
 }
 
 void UnlinkRSSAggregator(rss_aggregator *Cfg)
@@ -155,13 +151,14 @@ void UnlinkRSSAggregator(rss_aggregator *Cfg)
                DeleteEntryFromHash(RSSFetchUrls, At);
        }
        DeleteHashPos(&At);
+       last_run = time(NULL);
 }
-
+/*
 eNextState FreeNetworkSaveMessage (AsyncIO *IO)
 {
        networker_save_message *Ctx = (networker_save_message *) IO->Data;
 
-       citthread_mutex_lock(&RSSQueueMutex);
+       pthread_mutex_lock(&RSSQueueMutex);
        Ctx->Cfg->RefCount --;
 
        if (Ctx->Cfg->RefCount == 0)
@@ -169,15 +166,28 @@ eNextState FreeNetworkSaveMessage (AsyncIO *IO)
                UnlinkRSSAggregator(Ctx->Cfg);
 
        }
-       citthread_mutex_unlock(&RSSQueueMutex);
+       pthread_mutex_unlock(&RSSQueueMutex);
 
        CtdlFreeMessage(Ctx->Msg);
        free_recipients(Ctx->recp);
        FreeStrBuf(&Ctx->Message);
        FreeStrBuf(&Ctx->MsgGUID);
+       ((struct CitContext*)IO->CitContext)->state = CON_IDLE;
+       ((struct CitContext*)IO->CitContext)->kill_me = 1;
        free(Ctx);
+       last_run = time(NULL);
        return eAbort;
 }
+*/
+void FreeNetworkSaveMessage (void *vMsg)
+{
+       networker_save_message *Msg = (networker_save_message *) vMsg;
+
+       CtdlFreeMessage(Msg->Msg);
+       FreeStrBuf(&Msg->Message);
+       FreeStrBuf(&Msg->MsgGUID);
+       free(Msg);
+}
 
 eNextState AbortNetworkSaveMessage (AsyncIO *IO)
 {
@@ -186,41 +196,51 @@ eNextState AbortNetworkSaveMessage (AsyncIO *IO)
 
 eNextState RSSSaveMessage(AsyncIO *IO)
 {
-       networker_save_message *Ctx = (networker_save_message *) IO->Data;
+       long len;
+       const char *Key;
+       rss_aggregator *Ctx = (rss_aggregator *) IO->Data;
 
-       Ctx->Msg->cm_fields['M'] = SmashStrBuf(&Ctx->Message);
+       Ctx->ThisMsg->Msg->cm_fields['M'] = SmashStrBuf(&Ctx->ThisMsg->Message);
 
-       CtdlSubmitMsg(Ctx->Msg, Ctx->recp, NULL, 0);
+       CtdlSubmitMsg(Ctx->ThisMsg->Msg, &Ctx->recp, NULL, 0);
 
        /* write the uidl to the use table so we don't store this item again */
-       cdb_store(CDB_USETABLE, SKEY(Ctx->MsgGUID), &Ctx->ut, sizeof(struct UseTable) );
-
-       return eTerminateConnection;
+       cdb_store(CDB_USETABLE, SKEY(Ctx->ThisMsg->MsgGUID), &Ctx->ThisMsg->ut, sizeof(struct UseTable) );
+       
+       if (GetNextHashPos(Ctx->Messages, Ctx->Pos, &len, &Key, (void**) &Ctx->ThisMsg))
+               return QueueDBOperation(IO, RSS_FetchNetworkUsetableEntry);
+       else
+               return eAbort;
 }
 
-// TODO: relink me:    ExpandShortUrls(ri->description);
-
 eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO)
 {
+       const char *Key;
+       long len;
        struct cdbdata *cdbut;
-       networker_save_message *Ctx = (networker_save_message *) IO->Data;
+       rss_aggregator *Ctx = (rss_aggregator *) IO->Data;
+
 
        /* Find out if we've already seen this item */
-       strcpy(Ctx->ut.ut_msgid, ChrPtr(Ctx->MsgGUID)); /// TODO
-       Ctx->ut.ut_timestamp = time(NULL);
+       strcpy(Ctx->ThisMsg->ut.ut_msgid, ChrPtr(Ctx->ThisMsg->MsgGUID)); /// TODO
+       Ctx->ThisMsg->ut.ut_timestamp = time(NULL);
 
-       cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->MsgGUID));
+       cdbut = cdb_fetch(CDB_USETABLE, SKEY(Ctx->ThisMsg->MsgGUID));
 #ifndef DEBUG_RSS
        if (cdbut != NULL) {
                /* Item has already been seen */
-               CtdlLogPrintf(CTDL_DEBUG, "%s has already been seen\n", ChrPtr(Ctx->MsgGUID));
+               syslog(LOG_DEBUG, "%s has already been seen\n", ChrPtr(Ctx->ThisMsg->MsgGUID));
                cdb_free(cdbut);
 
                /* rewrite the record anyway, to update the timestamp */
                cdb_store(CDB_USETABLE, 
-                         SKEY(Ctx->MsgGUID), 
-                         &Ctx->ut, sizeof(struct UseTable) );
-               return eAbort;
+                         SKEY(Ctx->ThisMsg->MsgGUID), 
+                         &Ctx->ThisMsg->ut, sizeof(struct UseTable) );
+
+               if (GetNextHashPos(Ctx->Messages, Ctx->Pos, &len, &Key, (void**) &Ctx->ThisMsg))
+                       return QueueDBOperation(IO, RSS_FetchNetworkUsetableEntry);
+               else
+                       return eAbort;
        }
        else
 #endif
@@ -229,10 +249,16 @@ eNextState RSS_FetchNetworkUsetableEntry(AsyncIO *IO)
                return eSendMore;
        }
 }
-void RSSQueueSaveMessage(struct CtdlMessage *Msg, struct recptypes *recp, StrBuf *MsgGUID, StrBuf *MessageBody, rss_aggregator *Cfg)
+/*
+void RSSAddSaveMessage(struct CtdlMessage *Msg, struct recptypes *recp, StrBuf *MsgGUID, StrBuf *MessageBody, rss_aggregat *Cfg)
 {
        networker_save_message *Ctx;
 
+       pthread_mutex_lock(&RSSQueueMutex);
+       Cfg->RefCount ++;
+       pthread_mutex_unlock(&RSSQueueMutex);
+
+
        Ctx = (networker_save_message *) malloc(sizeof(networker_save_message));
        memset(Ctx, 0, sizeof(networker_save_message));
        
@@ -242,12 +268,12 @@ void RSSQueueSaveMessage(struct CtdlMessage *Msg, struct recptypes *recp, StrBuf
        Ctx->Cfg = Cfg;
        Ctx->recp = recp;
        Ctx->IO.Data = Ctx;
-       Ctx->IO.CitContext = CloneContext(CC);
+       Ctx->IO.CitContext = CloneContext(&rss_CC);
        Ctx->IO.Terminate = FreeNetworkSaveMessage;
        Ctx->IO.ShutdownAbort = AbortNetworkSaveMessage;
        QueueDBOperation(&Ctx->IO, RSS_FetchNetworkUsetableEntry);
 }
-
+*/
 
 /*
  * Commit a fetched and parsed RSS item to disk
@@ -258,21 +284,12 @@ void rss_save_item(rss_item *ri, rss_aggregator *Cfg)
        struct MD5Context md5context;
        u_char rawdigest[MD5_DIGEST_LEN];
        struct CtdlMessage *msg;
-       struct recptypes *recp = NULL;
        int msglen = 0;
        StrBuf *Message;
        StrBuf *guid;
-       StrBuf *Buf;
-
-       recp = (struct recptypes *) malloc(sizeof(struct recptypes));
-       if (recp == NULL) return;
-       memset(recp, 0, sizeof(struct recptypes));
-       Buf = NewStrBufDup(Cfg->rooms);
-       recp->recp_room = SmashStrBuf(&Buf);
-       recp->num_room = Cfg->roomlist_parts;
-       recp->recptypes_magic = RECPTYPES_MAGIC;
+
+       int n;
    
-       Cfg->RefCount ++;
        /* Construct a GUID to use in the S_USETABLE table.
         * If one is not present in the item itself, make one up.
         */
@@ -297,7 +314,7 @@ void rss_save_item(rss_item *ri, rss_aggregator *Cfg)
        }
 
        /* translate Item into message. */
-       CtdlLogPrintf(CTDL_DEBUG, "RSS: translating item...\n");
+       syslog(LOG_DEBUG, "RSS: translating item...\n");
        if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
        StrBufSpaceToBlank(ri->description);
        msg = malloc(sizeof(struct CtdlMessage));
@@ -332,12 +349,19 @@ void rss_save_item(rss_item *ri, rss_aggregator *Cfg)
                else
                {
                        if (FromAt)
-                               msg->cm_fields['P'] = SmashStrBuf(&ri->author_or_creator);
+                       {
+                               msg->cm_fields['A'] = SmashStrBuf(&ri->author_or_creator);
+                               msg->cm_fields['P'] = strdup(msg->cm_fields['A']);
+                       }
                        else 
                        {
                                StrBufRFC2047encode(&Encoded, ri->author_or_creator);
                                msg->cm_fields['A'] = SmashStrBuf(&Encoded);
                                msg->cm_fields['P'] = strdup("rss@localhost");
+
+                       }
+                       if (ri->pubdate <= 0) {
+                               ri->pubdate = time(NULL);
                        }
                }
        }
@@ -401,7 +425,19 @@ void rss_save_item(rss_item *ri, rss_aggregator *Cfg)
        AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this");
        StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
 
-       RSSQueueSaveMessage(msg, recp, guid, Message, Cfg);
+
+
+       networker_save_message *SaveMsg;
+
+       SaveMsg = (networker_save_message *) malloc(sizeof(networker_save_message));
+       memset(SaveMsg, 0, sizeof(networker_save_message));
+       
+       SaveMsg->MsgGUID = guid;
+       SaveMsg->Message = Message;
+       SaveMsg->Msg = msg;
+
+       n = GetCount(Cfg->Messages) + 1;
+       Put(Cfg->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage);
 }
 
 
@@ -420,17 +456,17 @@ int rss_do_fetching(rss_aggregator *Cfg)
 
        if ((Cfg->next_poll != 0) && (now < Cfg->next_poll))
                return 0;
-       Cfg->RefCount = 1;
+       Cfg->RefCount++;
 
        ri = (rss_item*) malloc(sizeof(rss_item));
        memset(ri, 0, sizeof(rss_item));
        Cfg->Item = ri;
        IO = &Cfg->IO;
-       IO->CitContext = CloneContext(CC);
+       IO->CitContext = CloneContext(&rss_CC);
        IO->Data = Cfg;
 
 
-       CtdlLogPrintf(CTDL_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url));
+       syslog(LOG_DEBUG, "Fetching RSS feed <%s>\n", ChrPtr(Cfg->Url));
        ParseURL(&IO->ConnectMe, Cfg->Url, 80);
        CurlPrepareURL(IO->ConnectMe);
 
@@ -441,16 +477,15 @@ int rss_do_fetching(rss_aggregator *Cfg)
                          ParseRSSReply, 
                          RSSAggregatorTerminate))
        {
-               CtdlLogPrintf(CTDL_ALERT, "Unable to initialize libcurl.\n");
+               syslog(LOG_DEBUG, "Unable to initialize libcurl.\n");
                return 0;
        }
 
-       evcurl_handle_start(IO);
+       QueueCurlContext(IO);
        return 1;
 }
 
 
-
 void DeleteRssCfg(void *vptr)
 {
        rss_aggregator *rncptr = (rss_aggregator *)vptr;
@@ -459,8 +494,9 @@ void DeleteRssCfg(void *vptr)
        FreeStrBuf(&rncptr->rooms);
        FreeStrBuf(&rncptr->CData);
        FreeStrBuf(&rncptr->Key);
-
+       FreeStrBuf(&rncptr->IO.HttpReq.ReplyData);
        DeleteHash(&rncptr->OtherQRnumbers);
+       FreeURL(&rncptr->IO.ConnectMe);
 
        if (rncptr->Item != NULL)
        {
@@ -484,30 +520,33 @@ void DeleteRssCfg(void *vptr)
 eNextState RSSAggregatorTerminate(AsyncIO *IO)
 {
        rss_aggregator *rncptr = (rss_aggregator *)IO->Data;
+       
        HashPos *At;
        long HKLen;
        const char *HK;
        void *vData;
 
-       citthread_mutex_lock(&RSSQueueMutex);
+       pthread_mutex_lock(&RSSQueueMutex);
        rncptr->RefCount --;
        if (rncptr->RefCount == 0)
        {
                UnlinkRSSAggregator(rncptr);
 
        }
-       citthread_mutex_unlock(&RSSQueueMutex);
-/*
+       pthread_mutex_unlock(&RSSQueueMutex);
+
        At = GetNewHashPos(RSSFetchUrls, 0);
 
-       citthread_mutex_lock(&RSSQueueMutex);
+       pthread_mutex_lock(&RSSQueueMutex);
        GetHashPosFromKey(RSSFetchUrls, SKEY(rncptr->Url), At);
        GetHashPos(RSSFetchUrls, At, &HKLen, &HK, &vData);
        DeleteEntryFromHash(RSSFetchUrls, At);
-       citthread_mutex_unlock(&RSSQueueMutex);
-
+       pthread_mutex_unlock(&RSSQueueMutex);
+       DeleteHashPos (&rncptr->Pos);
+       DeleteHash (&rncptr->Messages);
+       if (rncptr->recp.recp_room != NULL)
+               free(rncptr->recp.recp_room);
        DeleteHashPos(&At);
-*/
        return eAbort;
 }
 
@@ -516,7 +555,7 @@ eNextState RSSAggregatorTerminate(AsyncIO *IO)
  */
 void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
 {
-       StrBuf *CfgData;
+       StrBuf *CfgData=NULL;
        StrBuf *CfgType;
        StrBuf *Line;
        rss_room_counter *Count = NULL;
@@ -530,48 +569,53 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
        const char *CfgPtr, *lPtr;
        const char *Err;
 
-       citthread_mutex_lock(&RSSQueueMutex);
+       pthread_mutex_lock(&RSSQueueMutex);
        if (GetHash(RSSQueueRooms, LKEY(qrbuf->QRnumber), &vptr))
        {
-               CtdlLogPrintf(CTDL_DEBUG, 
+               syslog(LOG_DEBUG, 
                              "rssclient: [%ld] %s already in progress.\n", 
                              qrbuf->QRnumber, 
                              qrbuf->QRname);
-               citthread_mutex_unlock(&RSSQueueMutex);
+               pthread_mutex_unlock(&RSSQueueMutex);
                return;
        }
-       citthread_mutex_unlock(&RSSQueueMutex);
+       pthread_mutex_unlock(&RSSQueueMutex);
 
        assoc_file_name(filename, sizeof filename, qrbuf, ctdl_netcfg_dir);
 
-       if (CtdlThreadCheckStop())
+       if (server_shutting_down)
                return;
                
        /* Only do net processing for rooms that have netconfigs */
        fd = open(filename, 0);
        if (fd <= 0) {
-               //CtdlLogPrintf(CTDL_DEBUG, "rssclient: %s no config.\n", qrbuf->QRname);
+               //syslog(LOG_DEBUG, "rssclient: %s no config.\n", qrbuf->QRname);
                return;
        }
-       if (CtdlThreadCheckStop())
+
+       if (server_shutting_down)
                return;
+
        if (fstat(fd, &statbuf) == -1) {
-               CtdlLogPrintf(CTDL_DEBUG,  "ERROR: could not stat configfile '%s' - %s\n",
-                       filename, strerror(errno));
+               syslog(LOG_DEBUG, "ERROR: could not stat configfile '%s' - %s\n",
+                      filename, strerror(errno));
                return;
        }
-       if (CtdlThreadCheckStop())
+
+       if (server_shutting_down)
                return;
+
        CfgData = NewStrBufPlain(NULL, statbuf.st_size + 1);
+
        if (StrBufReadBLOB(CfgData, &fd, 1, statbuf.st_size, &Err) < 0) {
                close(fd);
                FreeStrBuf(&CfgData);
-               CtdlLogPrintf(CTDL_DEBUG,  "ERROR: reading config '%s' - %s<br>\n",
+               syslog(LOG_DEBUG, "ERROR: reading config '%s' - %s<br>\n",
                        filename, strerror(errno));
                return;
        }
        close(fd);
-       if (CtdlThreadCheckStop())
+       if (server_shutting_down)
                return;
        
        CfgPtr = NULL;
@@ -599,7 +643,7 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
                    rncptr->Url = NewStrBuf();
                    StrBufExtract_NextToken(rncptr->Url, Line, &lPtr, '|');
 
-                   citthread_mutex_lock(&RSSQueueMutex);
+                   pthread_mutex_lock(&RSSQueueMutex);
                    GetHash(RSSFetchUrls, SKEY(rncptr->Url), &vptr);
                    use_this_rncptr = (rss_aggregator *)vptr;
                    if (use_this_rncptr != NULL)
@@ -625,29 +669,34 @@ void rssclient_scan_room(struct ctdlroom *qrbuf, void *data)
                                    Put(use_this_rncptr->OtherQRnumbers, LKEY(qrbuf->QRnumber), QRnumber, NULL);
                                    use_this_rncptr->roomlist_parts++;
                            }
-                           citthread_mutex_unlock(&RSSQueueMutex);
+                           pthread_mutex_unlock(&RSSQueueMutex);
+
+
+                           FreeStrBuf(&rncptr->Url);
+                           free(rncptr);
+                           rncptr = NULL;
                            continue;
                    }
-                   citthread_mutex_unlock(&RSSQueueMutex);
+                   pthread_mutex_unlock(&RSSQueueMutex);
 
                    rncptr->ItemType = RSS_UNSET;
                                
                    rncptr->rooms = NewStrBufPlain(qrbuf->QRname, -1);
 
-                   citthread_mutex_lock(&RSSQueueMutex);
+                   pthread_mutex_lock(&RSSQueueMutex);
                    Put(RSSFetchUrls, SKEY(rncptr->Url), rncptr, DeleteRssCfg);
-                   citthread_mutex_unlock(&RSSQueueMutex);
+                   pthread_mutex_unlock(&RSSQueueMutex);
                }
            }
        }
        if (Count != NULL)
        {
                Count->QRnumber = qrbuf->QRnumber;
-               citthread_mutex_lock(&RSSQueueMutex);
-               CtdlLogPrintf(CTDL_DEBUG, "rssclient: [%ld] %s now starting.\n", 
+               pthread_mutex_lock(&RSSQueueMutex);
+               syslog(LOG_DEBUG, "rssclient: [%ld] %s now starting.\n", 
                              qrbuf->QRnumber, qrbuf->QRname);
                Put(RSSQueueRooms, LKEY(qrbuf->QRnumber), Count, NULL);
-               citthread_mutex_unlock(&RSSQueueMutex);
+               pthread_mutex_unlock(&RSSQueueMutex);
        }
        FreeStrBuf(&CfgData);
        FreeStrBuf(&CfgType);
@@ -665,6 +714,11 @@ void rssclient_scan(void) {
        long len;
        const char *Key;
 
+       /* Run no more than once every 15 minutes. */
+       if ((time(NULL) - last_run) < 900) {
+               return;
+       }
+
        /*
         * This is a simple concurrency check to make sure only one rssclient run
         * is done at a time.  We could do this with a mutex, but since we
@@ -674,13 +728,14 @@ void rssclient_scan(void) {
        if (doing_rssclient) return;
        doing_rssclient = 1;
 
-       CtdlLogPrintf(CTDL_DEBUG, "rssclient started\n");
+       syslog(LOG_DEBUG, "rssclient started\n");
        CtdlForEachRoom(rssclient_scan_room, NULL);
 
-       citthread_mutex_lock(&RSSQueueMutex);
+       pthread_mutex_lock(&RSSQueueMutex);
 
        it = GetNewHashPos(RSSFetchUrls, 0);
-       while (GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && 
+       while (!server_shutting_down &&
+              GetNextHashPos(RSSFetchUrls, it, &len, &Key, &vrptr) && 
               (vrptr != NULL)) {
                rptr = (rss_aggregator *)vrptr;
                if (rptr->RefCount == 0) 
@@ -688,16 +743,16 @@ void rssclient_scan(void) {
                                UnlinkRSSAggregator(rptr);
        }
        DeleteHashPos(&it);
-       citthread_mutex_unlock(&RSSQueueMutex);
+       pthread_mutex_unlock(&RSSQueueMutex);
 
-       CtdlLogPrintf(CTDL_DEBUG, "rssclientscheduler ended\n");
+       syslog(LOG_DEBUG, "rssclient ended\n");
        doing_rssclient = 0;
        return;
 }
 
 void rss_cleanup(void)
 {
-       citthread_mutex_destroy(&RSSQueueMutex);
+       /* citthread_mutex_destroy(&RSSQueueMutex); TODO */
        DeleteHash(&RSSFetchUrls);
        DeleteHash(&RSSQueueRooms);
 }
@@ -707,10 +762,11 @@ CTDL_MODULE_INIT(rssclient)
 {
        if (threading)
        {
-               citthread_mutex_init(&RSSQueueMutex, NULL);
+               CtdlFillSystemContext(&rss_CC, "rssclient");
+               pthread_mutex_init(&RSSQueueMutex, NULL);
                RSSQueueRooms = NewHash(1, lFlathash);
                RSSFetchUrls = NewHash(1, NULL);
-               CtdlLogPrintf(CTDL_INFO, "%s\n", curl_version());
+               syslog(LOG_INFO, "%s\n", curl_version());
                CtdlRegisterSessionHook(rssclient_scan, EVT_TIMER);
                 CtdlRegisterCleanupHook(rss_cleanup);
        }