Add timeout facility for event contexts
authorWilfried Goesgens <dothebart@citadel.org>
Mon, 13 Jul 2015 15:32:49 +0000 (17:32 +0200)
committerWilfried Goesgens <dothebart@citadel.org>
Mon, 13 Jul 2015 15:32:49 +0000 (17:32 +0200)
  - make shure now() is set correctly on the context for proper kill decisions

citadel/event_client.c
citadel/modules/eventclient/serv_eventclient.c
citadel/modules/rwho/serv_rwho.c

index 999c9e4546c1b989efc6fb93c6f6ab8df8394b8f..81221f813c0e8d9b3262a3825265d28efae6a76a 100644 (file)
@@ -156,7 +156,7 @@ DB_PerformNext(struct ev_loop *loop, ev_idle *watcher, int revents)
        AsyncIO *IO = watcher->data;
 
        SetEVState(IO, eDBNext);
-       IO->Now = ev_now(event_db);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_db);
        EV_syslog(LOG_DEBUG, "%s()", __FUNCTION__);
        become_session(IO->CitContext);
 
@@ -220,7 +220,7 @@ static void IO_abort_shutdown_callback(struct ev_loop *loop,
 
        SetEVState(IO, eIOAbort);
        EV_syslog(LOG_DEBUG, "EVENT Q: %s\n", __FUNCTION__);
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
        assert(IO->ShutdownAbort);
        IO->ShutdownAbort(IO);
 }
@@ -543,7 +543,7 @@ IO_send_callback(struct ev_loop *loop, ev_io *watcher, int revents)
        AsyncIO *IO = watcher->data;
        const char *errmsg = NULL;
 
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
        become_session(IO->CitContext);
 #ifdef BIGBAD_IODBG
        {
@@ -720,7 +720,7 @@ IO_Timeout_callback(struct ev_loop *loop, ev_timer *watcher, int revents)
        AsyncIO *IO = watcher->data;
 
        SetEVState(IO, eIOTimeout);
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
        ev_timer_stop (event_base, &IO->rw_timeout);
        become_session(IO->CitContext);
 
@@ -749,7 +749,7 @@ IO_connfail_callback(struct ev_loop *loop, ev_timer *watcher, int revents)
        AsyncIO *IO = watcher->data;
 
        SetEVState(IO, eIOConnfail);
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
        ev_timer_stop (event_base, &IO->conn_fail);
 
        if (IO->SendBuf.fd != 0)
@@ -782,7 +782,7 @@ IO_connfailimmediate_callback(struct ev_loop *loop,
        AsyncIO *IO = watcher->data;
 
        SetEVState(IO, eIOConnfailNow);
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
        ev_idle_stop (event_base, &IO->conn_fail_immediate);
 
        if (IO->SendBuf.fd != 0)
@@ -812,7 +812,7 @@ IO_connestd_callback(struct ev_loop *loop, ev_io *watcher, int revents)
         int             err;
 
        SetEVState(IO, eIOConnNow);
-        IO->Now = ev_now(event_base);
+        IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
         EVM_syslog(LOG_DEBUG, "connect() succeeded.\n");
 
         ev_io_stop(loop, &IO->conn_event);
@@ -846,7 +846,7 @@ IO_recv_callback(struct ev_loop *loop, ev_io *watcher, int revents)
        ssize_t nbytes;
        AsyncIO *IO = watcher->data;
 
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
        switch (IO->NextState) {
        case eReadFile:
                nbytes = FileRecvChunked(&IO->IOB, &errmsg);
@@ -932,7 +932,7 @@ IO_postdns_callback(struct ev_loop *loop, ev_idle *watcher, int revents)
        AsyncIO *IO = watcher->data;
 
        SetEVState(IO, eCaresFinished);
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
        EV_syslog(LOG_DEBUG, "event: %s\n", __FUNCTION__);
        become_session(IO->CitContext);
        assert(IO->DNS.Query->PostDNS);
index f7e286a9ab14cf6188f352b52ac036224df99982..2260446a29671af6db5cb85f81065629f7817a0f 100644 (file)
@@ -154,7 +154,7 @@ gotstatus(int nnrun)
 
                        EVCURLM_syslog(LOG_DEBUG, "request complete\n");
 
-                       IO->Now = ev_now(event_base);
+                       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
 
                        ev_io_stop(event_base, &IO->recv_event);
                        ev_io_stop(event_base, &IO->send_event);
@@ -188,6 +188,7 @@ gotstatus(int nnrun)
 
 
                        curl_slist_free_all(IO->HttpReq.headers);
+                       IO->HttpReq.headers = NULL;
                        msta = curl_multi_remove_handle(global.mhnd, chnd);
                        if (msta)
                                EVCURL_syslog(LOG_ERR,
@@ -289,7 +290,7 @@ gotdata(void *data, size_t size, size_t nmemb, void *cglobal)
        {
                IO->HttpReq.ReplyData = NewStrBufPlain(NULL, SIZ);
        }
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
        return CurlFillStrBuf_callback(data,
                                       size,
                                       nmemb,
@@ -350,7 +351,7 @@ gotwatchsock(CURL *easy,
        }
 
        SetEVState(IO, eCurlGotIO);
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
 
        Action = "";
        switch (action)
@@ -509,10 +510,11 @@ static void IOcurl_abort_shutdown_callback(struct ev_loop *loop,
                return;
 
        SetEVState(IO, eCurlShutdown);
-       IO->Now = ev_now(event_base);
+       IO->CitContext->lastcmd = IO->Now = ev_now(event_base);
        EVCURL_syslog(LOG_DEBUG, "EVENT Curl: %s\n", __FUNCTION__);
 
        curl_slist_free_all(IO->HttpReq.headers);
+       IO->HttpReq.headers = NULL;
        msta = curl_multi_remove_handle(global.mhnd, IO->HttpReq.chnd);
        if (msta)
        {
@@ -530,6 +532,7 @@ static void IOcurl_abort_shutdown_callback(struct ev_loop *loop,
        assert(IO->ShutdownAbort);
        IO->ShutdownAbort(IO);
 }
+
 eNextState
 evcurl_handle_start(AsyncIO *IO)
 {
@@ -655,7 +658,7 @@ static void QueueEventAddCallback(EV_P_ ev_async *w, int revents)
                Ctx = h->IO->CitContext;
                become_session(Ctx);
 
-               h->IO->Now = Now;
+               h->IO->CitContext->lastcmd = h->IO->Now = Now;
                switch (h->EvAttch(h->IO))
                {
                case eReadMore:
@@ -805,7 +808,7 @@ static void DBQueueEventAddCallback(EV_P_ ev_async *w, int revents)
                IOID = h->IO->ID;
                if (h->IO->StartDB == 0.0)
                        h->IO->StartDB = Now;
-               h->IO->Now = Now;
+               h->IO->CitContext->lastcmd = h->IO->Now = Now;
 
                SetEVState(h->IO, eDBAttach);
                Ctx = h->IO->CitContext;
index 4c2e28cdc80495e92c7254bdb2d8d84eec588f0a..5329bc70aaf9ab2aec92cd287a86759dc862b87b 100644 (file)
@@ -214,6 +214,70 @@ void cmd_rwho(char *argbuf) {
        cprintf("000\n");
 }
 
+/*
+ * check for async io jobs that are stuck (didn't ping back for 10 mins)
+ */
+void dead_io_check(void) {
+       struct CitContext *nptr;
+       int nContexts, i;
+       char real_room[ROOMNAMELEN];
+       
+       /* So that we don't keep the context list locked for a long time
+        * we create a copy of it first
+        */
+       nptr = CtdlGetContextArray(&nContexts) ;
+       if (!nptr)
+       {
+               /* Couldn't malloc so we have to bail but stick to the protocol */
+               return;
+       }
+
+       time_t now = time(NULL);
+       time_t idle;
+
+       for (i=0; i<nContexts; i++) 
+       {
+               if ((nptr[i].state != CON_SYS) || (nptr[i].IO == NULL) || (nptr[i].lastcmd == 0))
+                       continue;
+
+               if (nptr[i].kill_me != 0)
+                       continue;
+               idle = now - nptr[i].lastcmd;
+               if (idle < 600) 
+                       continue;
+
+               GenerateRoomDisplay(real_room, &nptr[i], CC);
+
+               syslog(LOG_WARNING,
+                      "Found stuck event context: CC[%d] "
+
+                      "Username: '%s' "
+                      "Room: '%s' "
+                      "while talking to host: '%s' "
+                      "Status: '%s' "
+                      "stuck in IO State: '%s' "
+
+                      "idle since: %d:%d "
+                      "Triggering context termination now!",
+
+                      nptr[i].cs_pid,
+
+                      nptr[i].curr_user,
+                      real_room,
+                      nptr[i].cs_host,
+                      nptr[i].cs_clientname,
+                      nptr[i].lastcmdname,
+
+                      (int) idle / 60,
+                      (int) idle % 60);
+
+               CtdlTerminateOtherSession(nptr[i].cs_pid);
+       }
+       
+       /* release out copy of the context list */
+       free(nptr);
+
+}
 
 /*
  * Masquerade roomname
@@ -317,6 +381,8 @@ CTDL_MODULE_INIT(rwho)
                CtdlRegisterProtoHook(cmd_rchg, "RCHG", "Masquerade roomname");
                CtdlRegisterProtoHook(cmd_uchg, "UCHG", "Masquerade username");
                CtdlRegisterProtoHook(cmd_stel, "STEL", "Enter/exit stealth mode");
+               CtdlRegisterSessionHook(dead_io_check, EVT_TIMER, PRIO_QUEUE + 50);
+
        }
        
        /* return our module name for the log */