From e0288cfdf0bfeed17641a79257c0e92c32630483 Mon Sep 17 00:00:00 2001 From: Dave West Date: Sat, 31 Oct 2009 15:29:37 +0000 Subject: [PATCH] Fixed a bug in the threading code that would prevent new threads starting. It was possible for threads that are running (not idle) to still be marked as Blocked (idle). Also Identified another problem that could prevent threads from starting. If all existing worker threads were given a task during the same GC tick then the system load average would go to 100% and prevent new threads. This was due to a bad method of calculating the load average. We always wanted the machine load average to limit the creation of new threads so that the machine didn't become overloaded. Now we get the machine load and use that to limit new threads. We also log a WARNING in this event. --- citadel/sysdep.c | 3 +++ citadel/threads.c | 57 ++++++++++++++++++++++++++++++++++------------- citadel/threads.h | 1 + 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/citadel/sysdep.c b/citadel/sysdep.c index 732dbc420..433f3467b 100644 --- a/citadel/sysdep.c +++ b/citadel/sysdep.c @@ -1123,6 +1123,9 @@ void InitializeMasterCC(void) { * Bind a thread to a context. (It's inline merely to speed things up.) */ INLINE void become_session(struct CitContext *which_con) { + if (which_con) + ctdl_thread_internal_change_state(CT, CTDL_THREAD_RUNNING); + citthread_setspecific(MyConKey, (void *)which_con ); } diff --git a/citadel/threads.c b/citadel/threads.c index d67f7dbe9..4f9d1428c 100644 --- a/citadel/threads.c +++ b/citadel/threads.c @@ -407,12 +407,33 @@ double CtdlThreadGetWorkerAvg(void) double CtdlThreadGetLoadAvg(void) { - double ret; - + double load_avg[3] ; + + int ret; + int smp_num_cpus; + + /* Borrowed this straight from procps */ + smp_num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if(smp_num_cpus<1) smp_num_cpus=1; /* SPARC glibc is buggy */ + + ret = getloadavg(load_avg, 3); + if (ret < 0) + return 0; + return load_avg[0] / smp_num_cpus; +/* + * This old chunk of code return a value that indicated the load on citserver + * This value could easily reach 100 % even when citserver was doing very little and + * hence the machine has much more spare capacity. + * Because this value was used to determine if the machine was under heavy load conditions + * from other processes in the system then citserver could be strangled un-necesarily + * What we are actually trying to achieve is to strangle citserver if the machine is heavily loaded. + * So we have changed this. + begin_critical_section(S_THREAD_LIST); ret = CtdlThreadLoadAvg; end_critical_section(S_THREAD_LIST); return ret; +*/ } @@ -623,7 +644,7 @@ void ctdl_thread_internal_calc_loadavg(void) CtdlThreadLoadAvg = load_avg/num_threads; CtdlThreadWorkerAvg = worker_avg/workers; #ifdef WITH_THREADLOG - CtdlLogPrintf(CTDL_INFO, "System load average %.2f, workers averag %.2f, threads %d, workers %d, sessions %d\n", CtdlThreadLoadAvg, CtdlThreadWorkerAvg, num_threads, num_workers, num_sessions); + CtdlLogPrintf(CTDL_INFO, "System load average %.2f, workers averag %.2f, threads %d, workers %d, sessions %d\n", CtdlThreadGetLoadAvg(), CtdlThreadWorkerAvg, num_threads, num_workers, num_sessions); #endif } @@ -1300,25 +1321,29 @@ void go_threading(void) #ifdef NEW_WORKER if ((((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkers() <= num_sessions) ) || CtdlThreadGetWorkers() < config.c_min_workers) && (CT->state > CTDL_THREAD_STOP_REQ)) #else - if ((((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkerAvg() > 60) && (CtdlThreadGetLoadAvg() < 90) ) || CtdlThreadGetWorkers() < config.c_min_workers) && (CT->state > CTDL_THREAD_STOP_REQ)) + if ((((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkerAvg() > 60)) || CtdlThreadGetWorkers() < config.c_min_workers) && (CT->state > CTDL_THREAD_STOP_REQ)) #endif /* NEW_WORKER */ { - for (i=0; i<5 ; i++) - { + /* Only start new threads if we are not going to overload the machine */ + if (CtdlThreadGetLoadAvg() < ((double)1.00)) { + for (i=0; i<5 ; i++) { #ifdef NEW_WORKER - CtdlThreadCreate("Worker Thread (new)", - CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER, - new_worker_thread, - NULL - ); + CtdlThreadCreate("Worker Thread (new)", + CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER, + new_worker_thread, + NULL + ); #else - CtdlThreadCreate("Worker Thread", - CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER, - worker_thread, - NULL - ); + CtdlThreadCreate("Worker Thread", + CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER, + worker_thread, + NULL + ); #endif /* NEW_WORKER */ + } } + else + CtdlLogPrintf (CTDL_WARNING, "Server strangled due to machine load average too high.\n"); } CtdlThreadGC(); diff --git a/citadel/threads.h b/citadel/threads.h index fb3378077..9efebbbbd 100644 --- a/citadel/threads.h +++ b/citadel/threads.h @@ -136,6 +136,7 @@ void ctdl_thread_internal_calc_loadavg(void); void ctdl_thread_internal_free_tsd(void); CtdlThreadNode *ctdl_internal_create_thread(char *name, long flags, void *(*thread_func) (void *arg), void *args); void ctdl_thread_internal_check_scheduled(void); +void ctdl_thread_internal_change_state (CtdlThreadNode *this_thread, enum CtdlThreadState new_state); void InitialiseSemaphores(void); int try_critical_section (int which_one); -- 2.30.2