From: Wilfried Goesgens Date: Mon, 13 Jul 2015 15:32:49 +0000 (+0200) Subject: Add timeout facility for event contexts X-Git-Tag: Release_902~156^2~1^2~18 X-Git-Url: https://code.citadel.org/?p=citadel.git;a=commitdiff_plain;h=7b5b6987e1077892e01f9ebdb1f27374011af6db Add timeout facility for event contexts - make shure now() is set correctly on the context for proper kill decisions --- diff --git a/citadel/event_client.c b/citadel/event_client.c index 999c9e454..81221f813 100644 --- a/citadel/event_client.c +++ b/citadel/event_client.c @@ -156,7 +156,7 @@ DB_PerformNext(struct ev_loop *loop, ev_idle *watcher, int revents) AsyncIO *IO = watcher->data; SetEVState(IO, eDBNext); - IO->Now = ev_now(event_db); + IO->CitContext->lastcmd = IO->Now = ev_now(event_db); EV_syslog(LOG_DEBUG, "%s()", __FUNCTION__); become_session(IO->CitContext); @@ -220,7 +220,7 @@ static void IO_abort_shutdown_callback(struct ev_loop *loop, SetEVState(IO, eIOAbort); EV_syslog(LOG_DEBUG, "EVENT Q: %s\n", __FUNCTION__); - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); assert(IO->ShutdownAbort); IO->ShutdownAbort(IO); } @@ -543,7 +543,7 @@ IO_send_callback(struct ev_loop *loop, ev_io *watcher, int revents) AsyncIO *IO = watcher->data; const char *errmsg = NULL; - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); become_session(IO->CitContext); #ifdef BIGBAD_IODBG { @@ -720,7 +720,7 @@ IO_Timeout_callback(struct ev_loop *loop, ev_timer *watcher, int revents) AsyncIO *IO = watcher->data; SetEVState(IO, eIOTimeout); - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); ev_timer_stop (event_base, &IO->rw_timeout); become_session(IO->CitContext); @@ -749,7 +749,7 @@ IO_connfail_callback(struct ev_loop *loop, ev_timer *watcher, int revents) AsyncIO *IO = watcher->data; SetEVState(IO, eIOConnfail); - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); ev_timer_stop (event_base, &IO->conn_fail); if (IO->SendBuf.fd != 0) @@ -782,7 +782,7 @@ IO_connfailimmediate_callback(struct ev_loop *loop, AsyncIO *IO = watcher->data; SetEVState(IO, eIOConnfailNow); - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); ev_idle_stop (event_base, &IO->conn_fail_immediate); if (IO->SendBuf.fd != 0) @@ -812,7 +812,7 @@ IO_connestd_callback(struct ev_loop *loop, ev_io *watcher, int revents) int err; SetEVState(IO, eIOConnNow); - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); EVM_syslog(LOG_DEBUG, "connect() succeeded.\n"); ev_io_stop(loop, &IO->conn_event); @@ -846,7 +846,7 @@ IO_recv_callback(struct ev_loop *loop, ev_io *watcher, int revents) ssize_t nbytes; AsyncIO *IO = watcher->data; - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); switch (IO->NextState) { case eReadFile: nbytes = FileRecvChunked(&IO->IOB, &errmsg); @@ -932,7 +932,7 @@ IO_postdns_callback(struct ev_loop *loop, ev_idle *watcher, int revents) AsyncIO *IO = watcher->data; SetEVState(IO, eCaresFinished); - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); EV_syslog(LOG_DEBUG, "event: %s\n", __FUNCTION__); become_session(IO->CitContext); assert(IO->DNS.Query->PostDNS); diff --git a/citadel/modules/eventclient/serv_eventclient.c b/citadel/modules/eventclient/serv_eventclient.c index f7e286a9a..2260446a2 100644 --- a/citadel/modules/eventclient/serv_eventclient.c +++ b/citadel/modules/eventclient/serv_eventclient.c @@ -154,7 +154,7 @@ gotstatus(int nnrun) EVCURLM_syslog(LOG_DEBUG, "request complete\n"); - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); ev_io_stop(event_base, &IO->recv_event); ev_io_stop(event_base, &IO->send_event); @@ -188,6 +188,7 @@ gotstatus(int nnrun) curl_slist_free_all(IO->HttpReq.headers); + IO->HttpReq.headers = NULL; msta = curl_multi_remove_handle(global.mhnd, chnd); if (msta) EVCURL_syslog(LOG_ERR, @@ -289,7 +290,7 @@ gotdata(void *data, size_t size, size_t nmemb, void *cglobal) { IO->HttpReq.ReplyData = NewStrBufPlain(NULL, SIZ); } - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); return CurlFillStrBuf_callback(data, size, nmemb, @@ -350,7 +351,7 @@ gotwatchsock(CURL *easy, } SetEVState(IO, eCurlGotIO); - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); Action = ""; switch (action) @@ -509,10 +510,11 @@ static void IOcurl_abort_shutdown_callback(struct ev_loop *loop, return; SetEVState(IO, eCurlShutdown); - IO->Now = ev_now(event_base); + IO->CitContext->lastcmd = IO->Now = ev_now(event_base); EVCURL_syslog(LOG_DEBUG, "EVENT Curl: %s\n", __FUNCTION__); curl_slist_free_all(IO->HttpReq.headers); + IO->HttpReq.headers = NULL; msta = curl_multi_remove_handle(global.mhnd, IO->HttpReq.chnd); if (msta) { @@ -530,6 +532,7 @@ static void IOcurl_abort_shutdown_callback(struct ev_loop *loop, assert(IO->ShutdownAbort); IO->ShutdownAbort(IO); } + eNextState evcurl_handle_start(AsyncIO *IO) { @@ -655,7 +658,7 @@ static void QueueEventAddCallback(EV_P_ ev_async *w, int revents) Ctx = h->IO->CitContext; become_session(Ctx); - h->IO->Now = Now; + h->IO->CitContext->lastcmd = h->IO->Now = Now; switch (h->EvAttch(h->IO)) { case eReadMore: @@ -805,7 +808,7 @@ static void DBQueueEventAddCallback(EV_P_ ev_async *w, int revents) IOID = h->IO->ID; if (h->IO->StartDB == 0.0) h->IO->StartDB = Now; - h->IO->Now = Now; + h->IO->CitContext->lastcmd = h->IO->Now = Now; SetEVState(h->IO, eDBAttach); Ctx = h->IO->CitContext; diff --git a/citadel/modules/rwho/serv_rwho.c b/citadel/modules/rwho/serv_rwho.c index 4c2e28cdc..5329bc70a 100644 --- a/citadel/modules/rwho/serv_rwho.c +++ b/citadel/modules/rwho/serv_rwho.c @@ -214,6 +214,70 @@ void cmd_rwho(char *argbuf) { cprintf("000\n"); } +/* + * check for async io jobs that are stuck (didn't ping back for 10 mins) + */ +void dead_io_check(void) { + struct CitContext *nptr; + int nContexts, i; + char real_room[ROOMNAMELEN]; + + /* So that we don't keep the context list locked for a long time + * we create a copy of it first + */ + nptr = CtdlGetContextArray(&nContexts) ; + if (!nptr) + { + /* Couldn't malloc so we have to bail but stick to the protocol */ + return; + } + + time_t now = time(NULL); + time_t idle; + + for (i=0; i