Fixed the bug that prevented DOWN from working properly.
[citadel.git] / citadel / threads.c
index a1497d98c35907ce25c6198f95a1ec9e407cb146..9782d05fc005c5bf9a3f0567cb1c32dd5db65e90 100644 (file)
@@ -1,18 +1,22 @@
 /*
- * $Id: sysdep.c 5882 2007-12-13 19:46:05Z davew $
+ * $Id$
  *
  * Citadel "system dependent" stuff.
- * See copyright.txt for copyright information.
+ * See COPYING for copyright information.
  *
  * Here's where we have the Citadel thread implimentation
  *
  */
 
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
 #include <sys/types.h>
 #include <errno.h>
 #include <sys/socket.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <signal.h>
 
 #if TIME_WITH_SYS_TIME
 # include <sys/time.h>
@@ -25,6 +29,8 @@
 # endif
 #endif
 
+#include <libcitadel.h>
+
 #include "threads.h"
 #include "ctdl_module.h"
 #include "modules_init.h"
@@ -32,6 +38,7 @@
 #include "config.h"
 #include "citserver.h"
 #include "sysdep_decls.h"
+#include "context.h"
 
 /*
  * define this to use the new worker_thread method of handling connections
@@ -322,6 +329,9 @@ void ctdl_thread_internal_change_state (CtdlThreadNode *this_thread, enum CtdlTh
  */
 void CtdlThreadStopAll(void)
 {
+       /* First run any registered shutdown hooks.  This probably doesn't belong here. */
+       PerformSessionHooks(EVT_SHUTDOWN);
+
        //FIXME: The signalling of the condition should not be in the critical_section
        // We need to build a list of threads we are going to signal and then signal them afterwards
        
@@ -329,14 +339,18 @@ void CtdlThreadStopAll(void)
        
        begin_critical_section(S_THREAD_LIST);
        this_thread = CtdlThreadList;
+       // Ask the GC thread to stop first so everything knows we are shutting down.
+       GC_thread->state = CTDL_THREAD_STOP_REQ;
        while(this_thread)
        {
 #ifdef THREADS_USESIGNALS
-               citthread_killl(this_thread->tid, SIGHUP);
+               if (!citthread_equal(this_thread->tid, GC_thread->tid))
+                       citthread_kill(this_thread->tid, SIGHUP);
 #endif
                ctdl_thread_internal_change_state (this_thread, CTDL_THREAD_STOP_REQ);
                citthread_cond_signal(&this_thread->ThreadCond);
                citthread_cond_signal(&this_thread->SleepCond);
+               this_thread->stop_ticker = time(NULL);
                CtdlLogPrintf(CTDL_DEBUG, "Thread system stopping thread \"%s\" (0x%08lx).\n",
                        this_thread->name, this_thread->tid);
                this_thread = this_thread->next;
@@ -394,12 +408,33 @@ double CtdlThreadGetWorkerAvg(void)
 
 double CtdlThreadGetLoadAvg(void)
 {
-       double ret;
-       
+       double load_avg[3] ;
+
+       int ret;
+       int smp_num_cpus;
+
+       /* Borrowed this straight from procps */
+       smp_num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+       if(smp_num_cpus<1) smp_num_cpus=1; /* SPARC glibc is buggy */
+
+       ret = getloadavg(load_avg, 3);
+       if (ret < 0)
+               return 0;
+       return load_avg[0] / smp_num_cpus;
+/*
+ * This old chunk of code return a value that indicated the load on citserver
+ * This value could easily reach 100 % even when citserver was doing very little and
+ * hence the machine has much more spare capacity.
+ * Because this value was used to determine if the machine was under heavy load conditions
+ * from other processes in the system then citserver could be strangled un-necesarily
+ * What we are actually trying to achieve is to strangle citserver if the machine is heavily loaded.
+ * So we have changed this.
+
        begin_critical_section(S_THREAD_LIST);
        ret =  CtdlThreadLoadAvg;
        end_critical_section(S_THREAD_LIST);
        return ret;
+*/
 }
 
 
@@ -473,7 +508,10 @@ int CtdlThreadCheckStop(void)
 
 #ifdef THREADS_USESIGNALS
        if (CT->signal)
+       {
                CtdlLogPrintf(CTDL_DEBUG, "Thread \"%s\" caught signal %d.\n", CT->name, CT->signal);
+               CT->signal = 0;
+       }
 #endif
        if(state == CTDL_THREAD_STOP_REQ)
        {
@@ -505,11 +543,13 @@ void CtdlThreadStop(CtdlThreadNode *thread)
        if (!(this_thread->thread_func))
                return;         // Don't stop garbage collector
 #ifdef THREADS_USESIGNALS
-       citthread_kill(this_thread->tid, SIGHUP);       
+       if (!citthread_equal(this_thread->tid, GC_thread->tid))
+               citthread_kill(this_thread->tid, SIGHUP);
 #endif
        ctdl_thread_internal_change_state (this_thread, CTDL_THREAD_STOP_REQ);
        citthread_cond_signal(&this_thread->ThreadCond);
        citthread_cond_signal(&this_thread->SleepCond);
+       this_thread->stop_ticker = time(NULL);
 }
 
 /*
@@ -605,7 +645,7 @@ void ctdl_thread_internal_calc_loadavg(void)
        CtdlThreadLoadAvg = load_avg/num_threads;
        CtdlThreadWorkerAvg = worker_avg/workers;
 #ifdef WITH_THREADLOG
-       CtdlLogPrintf(CTDL_INFO, "System load average %.2f, workers averag %.2f, threads %d, workers %d, sessions %d\n", CtdlThreadLoadAvg, CtdlThreadWorkerAvg, num_threads, num_workers, num_sessions);
+       CtdlLogPrintf(CTDL_INFO, "System load average %.2f, workers averag %.2f, threads %d, workers %d, sessions %d\n", CtdlThreadGetLoadAvg(), CtdlThreadWorkerAvg, num_threads, num_workers, num_sessions);
 #endif
 }
 
@@ -640,7 +680,7 @@ void CtdlThreadGC (void)
                
                if ((that_thread->state == CTDL_THREAD_STOP_REQ || that_thread->state == CTDL_THREAD_STOPPING)
                        && (!citthread_equal(that_thread->tid, citthread_self())))
-                               that_thread->stop_ticker++;
+                               CtdlLogPrintf(CTDL_DEBUG, "Waiting for thread %s (0x%08lx) to exit.\n", that_thread->name, that_thread->tid);
                else
                {
                        /**
@@ -650,7 +690,7 @@ void CtdlThreadGC (void)
                        that_thread->stop_ticker = 0;
                }
                
-               if (that_thread->stop_ticker == 5)
+               if (that_thread->stop_ticker + 5 == time(NULL))
                {
                        CtdlLogPrintf(CTDL_DEBUG, "Thread System: The thread \"%s\" (0x%08lx) failed to self terminate within 5 ticks. It would be cancelled now.\n", that_thread->name, that_thread->tid);
                        if ((that_thread->flags & CTDLTHREAD_WORKER) == 0)
@@ -1086,6 +1126,10 @@ void ctdl_thread_internal_check_scheduled(void)
        CtdlThreadNode *this_thread, *that_thread;
        time_t now;
        
+       /* Don't start scheduled threads if the system wants single user mode */
+       if (CtdlWantSingleUser())
+               return;
+       
        if (try_critical_section(S_SCHEDULE_LIST))
                return; /* If this list is locked we wait till the next chance */
        
@@ -1120,7 +1164,7 @@ void ctdl_thread_internal_check_scheduled(void)
                                if (ctdl_thread_internal_start_scheduled (that_thread))
                                {
 #ifdef WITH_THREADLOG
-                                       CtdlLogPrintf(CTDL_INFO, "Thread system, Started a scheduled thread \"%s\" (%ud).\n",
+                                       CtdlLogPrintf(CTDL_INFO, "Thread system, Started a scheduled thread \"%s\" (0x%08lx).\n",
                                                that_thread->name, that_thread->tid);
 #endif
                                }
@@ -1143,23 +1187,17 @@ void ctdl_thread_internal_check_scheduled(void)
  */
 int CtdlThreadSelect(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout)
 {
-       int ret;
+       int ret = 0;
        
        ctdl_thread_internal_change_state(CT, CTDL_THREAD_BLOCKED);
-       ret = select(n, readfds, writefds, exceptfds, timeout);
+       if (!CtdlThreadCheckStop())
+               ret = select(n, readfds, writefds, exceptfds, timeout);
        /**
         * If the select returned <= 0 then it failed due to an error
         * or timeout so this thread could stop if asked to do so.
         * Anything else means it needs to continue unless the system is shutting down
         */
-       if (ret <= 0)
-       {
-               /**
-                * select says nothing to do so we can change to running if we haven't been asked to stop.
-                */
-               ctdl_thread_internal_change_state(CT, CTDL_THREAD_RUNNING);
-       }
-       else
+       if (ret > 0)
        {
                /**
                 * The select says this thread needs to do something useful.
@@ -1168,15 +1206,23 @@ int CtdlThreadSelect(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds
                 * idle and select has given it a task to do so it must not stop
                 * In this condition we need to force it into the running state.
                 * CtdlThreadGC will clear its ticker for us.
+                *
+                * FIXME: there is still a small hole here. It is possible for the sequence of locking
+                * to allow the state to get changed to STOP_REQ just after this code if the other thread
+                * has decided to change the state before this lock, it there fore has to wait till the lock
+                * completes but it will continue to change the state. We need something a bit better here.
                 */
-               if (GC_thread->state > CTDL_THREAD_STOP_REQ)
+               citthread_mutex_lock(&CT->ThreadMutex); /* To prevent race condition of a sleeping thread */
+               if (GC_thread->state > CTDL_THREAD_STOP_REQ && CT->state <= CTDL_THREAD_STOP_REQ)
                {
-                       citthread_mutex_lock(&CT->ThreadMutex); /* To prevent race condition of a sleeping thread */
+                       CtdlLogPrintf(CTDL_DEBUG, "Thread %s (0x%08lx) refused stop request.\n", CT->name, CT->tid);
                        CT->state = CTDL_THREAD_RUNNING;
-                       citthread_mutex_unlock(&CT->ThreadMutex);
                }
+               citthread_mutex_unlock(&CT->ThreadMutex);
        }
 
+       ctdl_thread_internal_change_state(CT, CTDL_THREAD_RUNNING);
+
        return ret;
 }
 
@@ -1271,35 +1317,43 @@ void go_threading(void)
 #ifdef NEW_WORKER
                if ((((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkers() <= num_sessions) ) || CtdlThreadGetWorkers() < config.c_min_workers) && (CT->state > CTDL_THREAD_STOP_REQ))
 #else
-               if ((((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkerAvg() > 60) && (CtdlThreadGetLoadAvg() < 90) ) || CtdlThreadGetWorkers() < config.c_min_workers) && (CT->state > CTDL_THREAD_STOP_REQ))
+               if ((((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkerAvg() > 60)) || CtdlThreadGetWorkers() < config.c_min_workers) && (CT->state > CTDL_THREAD_STOP_REQ))
 #endif /* NEW_WORKER */
                {
-                       for (i=0; i<5 ; i++)
-                       {
+                       /* Only start new threads if we are not going to overload the machine */
+                       if (CtdlThreadGetLoadAvg() < ((double)1.00)) {
+                               for (i=0; i<5 ; i++) {
 #ifdef NEW_WORKER
-                               CtdlThreadCreate("Worker Thread (new)",
-                                       CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
-                                       new_worker_thread,
-                                       NULL
-                                       );
+                                       CtdlThreadCreate("Worker Thread (new)",
+                                               CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
+                                               new_worker_thread,
+                                               NULL
+                                               );
 #else
-                               CtdlThreadCreate("Worker Thread",
-                                       CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
-                                       worker_thread,
-                                       NULL
-                                       );
+                                       CtdlThreadCreate("Worker Thread",
+                                               CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
+                                               worker_thread,
+                                               NULL
+                                               );
 #endif /* NEW_WORKER */
+                               }
                        }
+                       else
+                               CtdlLogPrintf (CTDL_WARNING, "Server strangled due to machine load average too high.\n");
                }
                
                CtdlThreadGC();
-               
+
                if (CtdlThreadGetCount() <= 1) // Shutting down clean up the garbage collector
                {
                        CtdlThreadGC();
                }
                
+#ifdef THREADS_USESIGNALS
+               if (CtdlThreadGetCount() && CT->state > CTDL_THREAD_STOP_REQ)
+#else
                if (CtdlThreadGetCount())
+#endif
                        CtdlThreadSleep(1);
        }
        /*
@@ -1516,7 +1570,6 @@ int execute_session(struct CitContext *bind_me)
 
 
 
-extern void dead_session_purge(int force);
 
 /*
  * A new worker_thread loop.