]> code.citadel.org Git - citadel.git/blobdiff - citadel/threads.c
Fixed the bug that prevented DOWN from working properly.
[citadel.git] / citadel / threads.c
index cd1e999d08f738ea3c6c19370e68ac09320f0511..9782d05fc005c5bf9a3f0567cb1c32dd5db65e90 100644 (file)
@@ -1,18 +1,22 @@
 /*
- * $Id: sysdep.c 5882 2007-12-13 19:46:05Z davew $
+ * $Id$
  *
  * Citadel "system dependent" stuff.
- * See copyright.txt for copyright information.
+ * See COPYING for copyright information.
  *
  * Here's where we have the Citadel thread implimentation
  *
  */
 
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
 #include <sys/types.h>
 #include <errno.h>
 #include <sys/socket.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <signal.h>
 
 #if TIME_WITH_SYS_TIME
 # include <sys/time.h>
@@ -25,6 +29,8 @@
 # endif
 #endif
 
+#include <libcitadel.h>
+
 #include "threads.h"
 #include "ctdl_module.h"
 #include "modules_init.h"
@@ -32,6 +38,7 @@
 #include "config.h"
 #include "citserver.h"
 #include "sysdep_decls.h"
+#include "context.h"
 
 /*
  * define this to use the new worker_thread method of handling connections
@@ -57,7 +64,7 @@ static int num_workers = 0;                   /* Current number of worker threads */
 CtdlThreadNode *CtdlThreadList = NULL;
 CtdlThreadNode *CtdlThreadSchedList = NULL;
 
-static citthread_t GC_thread;
+static CtdlThreadNode *GC_thread = NULL;
 static char *CtdlThreadStates[CTDL_THREAD_LAST_STATE];
 double CtdlThreadLoadAvg = 0;
 double CtdlThreadWorkerAvg = 0;
@@ -153,7 +160,7 @@ void ctdl_thread_internal_init_tsd(void)
        int ret;
        
        if ((ret = citthread_key_create(&ThreadKey, ctdl_thread_internal_dest_tsd))) {
-               lprintf(CTDL_EMERG, "citthread_key_create: %s\n", strerror(ret));
+               CtdlLogPrintf(CTDL_EMERG, "citthread_key_create: %s\n", strerror(ret));
                exit(CTDLEXIT_DB);
        }
 }
@@ -220,7 +227,6 @@ void ctdl_thread_internal_init(void)
        CtdlThreadNode *this_thread;
        int ret = 0;
        
-       GC_thread = citthread_self();
        CtdlThreadStates[CTDL_THREAD_INVALID] = strdup ("Invalid Thread");
        CtdlThreadStates[CTDL_THREAD_VALID] = strdup("Valid Thread");
        CtdlThreadStates[CTDL_THREAD_CREATE] = strdup("Thread being Created");
@@ -257,7 +263,8 @@ void ctdl_thread_internal_init(void)
 
        this_thread->name = "Garbage Collection Thread";
        
-       this_thread->tid = GC_thread;
+       this_thread->tid = citthread_self();
+       GC_thread = this_thread;
        CT = this_thread;
        
        num_threads++;  // Increase the count of threads in the system.
@@ -322,6 +329,9 @@ void ctdl_thread_internal_change_state (CtdlThreadNode *this_thread, enum CtdlTh
  */
 void CtdlThreadStopAll(void)
 {
+       /* First run any registered shutdown hooks.  This probably doesn't belong here. */
+       PerformSessionHooks(EVT_SHUTDOWN);
+
        //FIXME: The signalling of the condition should not be in the critical_section
        // We need to build a list of threads we are going to signal and then signal them afterwards
        
@@ -329,14 +339,18 @@ void CtdlThreadStopAll(void)
        
        begin_critical_section(S_THREAD_LIST);
        this_thread = CtdlThreadList;
+       // Ask the GC thread to stop first so everything knows we are shutting down.
+       GC_thread->state = CTDL_THREAD_STOP_REQ;
        while(this_thread)
        {
 #ifdef THREADS_USESIGNALS
-               citthread_killl(this_thread->tid, SIGHUP);
+               if (!citthread_equal(this_thread->tid, GC_thread->tid))
+                       citthread_kill(this_thread->tid, SIGHUP);
 #endif
                ctdl_thread_internal_change_state (this_thread, CTDL_THREAD_STOP_REQ);
                citthread_cond_signal(&this_thread->ThreadCond);
                citthread_cond_signal(&this_thread->SleepCond);
+               this_thread->stop_ticker = time(NULL);
                CtdlLogPrintf(CTDL_DEBUG, "Thread system stopping thread \"%s\" (0x%08lx).\n",
                        this_thread->name, this_thread->tid);
                this_thread = this_thread->next;
@@ -394,12 +408,33 @@ double CtdlThreadGetWorkerAvg(void)
 
 double CtdlThreadGetLoadAvg(void)
 {
-       double ret;
-       
+       double load_avg[3] ;
+
+       int ret;
+       int smp_num_cpus;
+
+       /* Borrowed this straight from procps */
+       smp_num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+       if(smp_num_cpus<1) smp_num_cpus=1; /* SPARC glibc is buggy */
+
+       ret = getloadavg(load_avg, 3);
+       if (ret < 0)
+               return 0;
+       return load_avg[0] / smp_num_cpus;
+/*
+ * This old chunk of code return a value that indicated the load on citserver
+ * This value could easily reach 100 % even when citserver was doing very little and
+ * hence the machine has much more spare capacity.
+ * Because this value was used to determine if the machine was under heavy load conditions
+ * from other processes in the system then citserver could be strangled un-necesarily
+ * What we are actually trying to achieve is to strangle citserver if the machine is heavily loaded.
+ * So we have changed this.
+
        begin_critical_section(S_THREAD_LIST);
        ret =  CtdlThreadLoadAvg;
        end_critical_section(S_THREAD_LIST);
        return ret;
+*/
 }
 
 
@@ -471,9 +506,12 @@ int CtdlThreadCheckStop(void)
        
        state = CT->state;
 
-#ifdef THREADS_USERSIGNALS
+#ifdef THREADS_USESIGNALS
        if (CT->signal)
+       {
                CtdlLogPrintf(CTDL_DEBUG, "Thread \"%s\" caught signal %d.\n", CT->name, CT->signal);
+               CT->signal = 0;
+       }
 #endif
        if(state == CTDL_THREAD_STOP_REQ)
        {
@@ -505,11 +543,13 @@ void CtdlThreadStop(CtdlThreadNode *thread)
        if (!(this_thread->thread_func))
                return;         // Don't stop garbage collector
 #ifdef THREADS_USESIGNALS
-       citthread_kill(this_thread->tid, SIGHUP);       
+       if (!citthread_equal(this_thread->tid, GC_thread->tid))
+               citthread_kill(this_thread->tid, SIGHUP);
 #endif
        ctdl_thread_internal_change_state (this_thread, CTDL_THREAD_STOP_REQ);
        citthread_cond_signal(&this_thread->ThreadCond);
        citthread_cond_signal(&this_thread->SleepCond);
+       this_thread->stop_ticker = time(NULL);
 }
 
 /*
@@ -605,7 +645,7 @@ void ctdl_thread_internal_calc_loadavg(void)
        CtdlThreadLoadAvg = load_avg/num_threads;
        CtdlThreadWorkerAvg = worker_avg/workers;
 #ifdef WITH_THREADLOG
-       CtdlLogPrintf(CTDL_INFO, "System load average %.2f, workers averag %.2f, threads %d, workers %d, sessions %d\n", CtdlThreadLoadAvg, CtdlThreadWorkerAvg, num_threads, num_workers, num_sessions);
+       CtdlLogPrintf(CTDL_INFO, "System load average %.2f, workers averag %.2f, threads %d, workers %d, sessions %d\n", CtdlThreadGetLoadAvg(), CtdlThreadWorkerAvg, num_threads, num_workers, num_sessions);
 #endif
 }
 
@@ -640,17 +680,25 @@ void CtdlThreadGC (void)
                
                if ((that_thread->state == CTDL_THREAD_STOP_REQ || that_thread->state == CTDL_THREAD_STOPPING)
                        && (!citthread_equal(that_thread->tid, citthread_self())))
-                               that_thread->stop_ticker++;
+                               CtdlLogPrintf(CTDL_DEBUG, "Waiting for thread %s (0x%08lx) to exit.\n", that_thread->name, that_thread->tid);
+               else
+               {
+                       /**
+                        * Catch the situation where a worker was asked to stop but couldn't and we are not
+                        * shutting down.
+                        */
+                       that_thread->stop_ticker = 0;
+               }
                
-               if (that_thread->stop_ticker == 5)
+               if (that_thread->stop_ticker + 5 == time(NULL))
                {
-                       CtdlLogPrintf(CTDL_DEBUG, "Thread System: The thread \"%s\" (0x%08lx) failed to self terminate withing 5 ticks. Canceling it.\n", that_thread->name, that_thread->tid);
+                       CtdlLogPrintf(CTDL_DEBUG, "Thread System: The thread \"%s\" (0x%08lx) failed to self terminate within 5 ticks. It would be cancelled now.\n", that_thread->name, that_thread->tid);
                        if ((that_thread->flags & CTDLTHREAD_WORKER) == 0)
-                               CtdlLogPrintf(CTDL_INFO, "Thread System: A non worker thread was canceled this may cause message loss.\n");
-                       that_thread->state = CTDL_THREAD_CANCELLED;
+                               CtdlLogPrintf(CTDL_INFO, "Thread System: A non worker thread would have been canceled this may cause message loss.\n");
+//                     that_thread->state = CTDL_THREAD_CANCELLED;
                        that_thread->stop_ticker++;
-                       citthread_cancel(that_thread->tid);
-                       continue;
+//                     citthread_cancel(that_thread->tid);
+//                     continue;
                }
                
                /* Do we need to clean up this thread? */
@@ -1078,6 +1126,10 @@ void ctdl_thread_internal_check_scheduled(void)
        CtdlThreadNode *this_thread, *that_thread;
        time_t now;
        
+       /* Don't start scheduled threads if the system wants single user mode */
+       if (CtdlWantSingleUser())
+               return;
+       
        if (try_critical_section(S_SCHEDULE_LIST))
                return; /* If this list is locked we wait till the next chance */
        
@@ -1112,7 +1164,7 @@ void ctdl_thread_internal_check_scheduled(void)
                                if (ctdl_thread_internal_start_scheduled (that_thread))
                                {
 #ifdef WITH_THREADLOG
-                                       CtdlLogPrintf(CTDL_INFO, "Thread system, Started a scheduled thread \"%s\" (%ud).\n",
+                                       CtdlLogPrintf(CTDL_INFO, "Thread system, Started a scheduled thread \"%s\" (0x%08lx).\n",
                                                that_thread->name, that_thread->tid);
 #endif
                                }
@@ -1135,11 +1187,42 @@ void ctdl_thread_internal_check_scheduled(void)
  */
 int CtdlThreadSelect(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout)
 {
-       int ret;
+       int ret = 0;
        
        ctdl_thread_internal_change_state(CT, CTDL_THREAD_BLOCKED);
-       ret = select(n, readfds, writefds, exceptfds, timeout);
+       if (!CtdlThreadCheckStop())
+               ret = select(n, readfds, writefds, exceptfds, timeout);
+       /**
+        * If the select returned <= 0 then it failed due to an error
+        * or timeout so this thread could stop if asked to do so.
+        * Anything else means it needs to continue unless the system is shutting down
+        */
+       if (ret > 0)
+       {
+               /**
+                * The select says this thread needs to do something useful.
+                * This thread was in an idle state so it may have been asked to stop
+                * but if the system isn't shutting down this thread is no longer
+                * idle and select has given it a task to do so it must not stop
+                * In this condition we need to force it into the running state.
+                * CtdlThreadGC will clear its ticker for us.
+                *
+                * FIXME: there is still a small hole here. It is possible for the sequence of locking
+                * to allow the state to get changed to STOP_REQ just after this code if the other thread
+                * has decided to change the state before this lock, it there fore has to wait till the lock
+                * completes but it will continue to change the state. We need something a bit better here.
+                */
+               citthread_mutex_lock(&CT->ThreadMutex); /* To prevent race condition of a sleeping thread */
+               if (GC_thread->state > CTDL_THREAD_STOP_REQ && CT->state <= CTDL_THREAD_STOP_REQ)
+               {
+                       CtdlLogPrintf(CTDL_DEBUG, "Thread %s (0x%08lx) refused stop request.\n", CT->name, CT->tid);
+                       CT->state = CTDL_THREAD_RUNNING;
+               }
+               citthread_mutex_unlock(&CT->ThreadMutex);
+       }
+
        ctdl_thread_internal_change_state(CT, CTDL_THREAD_RUNNING);
+
        return ret;
 }
 
@@ -1234,35 +1317,43 @@ void go_threading(void)
 #ifdef NEW_WORKER
                if ((((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkers() <= num_sessions) ) || CtdlThreadGetWorkers() < config.c_min_workers) && (CT->state > CTDL_THREAD_STOP_REQ))
 #else
-               if ((((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkerAvg() > 60) && (CtdlThreadGetLoadAvg() < 90) ) || CtdlThreadGetWorkers() < config.c_min_workers) && (CT->state > CTDL_THREAD_STOP_REQ))
+               if ((((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkerAvg() > 60)) || CtdlThreadGetWorkers() < config.c_min_workers) && (CT->state > CTDL_THREAD_STOP_REQ))
 #endif /* NEW_WORKER */
                {
-                       for (i=0; i<5 ; i++)
-                       {
+                       /* Only start new threads if we are not going to overload the machine */
+                       if (CtdlThreadGetLoadAvg() < ((double)1.00)) {
+                               for (i=0; i<5 ; i++) {
 #ifdef NEW_WORKER
-                               CtdlThreadCreate("Worker Thread (new)",
-                                       CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
-                                       new_worker_thread,
-                                       NULL
-                                       );
+                                       CtdlThreadCreate("Worker Thread (new)",
+                                               CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
+                                               new_worker_thread,
+                                               NULL
+                                               );
 #else
-                               CtdlThreadCreate("Worker Thread",
-                                       CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
-                                       worker_thread,
-                                       NULL
-                                       );
+                                       CtdlThreadCreate("Worker Thread",
+                                               CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
+                                               worker_thread,
+                                               NULL
+                                               );
 #endif /* NEW_WORKER */
+                               }
                        }
+                       else
+                               CtdlLogPrintf (CTDL_WARNING, "Server strangled due to machine load average too high.\n");
                }
                
                CtdlThreadGC();
-               
+
                if (CtdlThreadGetCount() <= 1) // Shutting down clean up the garbage collector
                {
                        CtdlThreadGC();
                }
                
+#ifdef THREADS_USESIGNALS
+               if (CtdlThreadGetCount() && CT->state > CTDL_THREAD_STOP_REQ)
+#else
                if (CtdlThreadGetCount())
+#endif
                        CtdlThreadSleep(1);
        }
        /*
@@ -1479,7 +1570,6 @@ int execute_session(struct CitContext *bind_me)
 
 
 
-extern void dead_session_purge(int force);
 
 /*
  * A new worker_thread loop.