Removed the DEBUG_MEMORY_LEAKS framework because we do this with Valgrind now.
[citadel.git] / citadel / threads.c
1 /*
2  * Thread handling stuff for Citadel server
3  *
4  * Copyright (c) 1987-2011 by the citadel.org team
5  *
6  * This program is open source software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <stdio.h>
24 #include <sys/types.h>
25 #include <errno.h>
26 #include <sys/socket.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <signal.h>
30 #include <syslog.h>
31
32 #include "sysdep.h"
33 #if TIME_WITH_SYS_TIME
34 # include <sys/time.h>
35 # include <time.h>
36 #else
37 # if HAVE_SYS_TIME_H
38 #  include <sys/time.h>
39 # else
40 #  include <time.h>
41 # endif
42 #endif
43
44 #ifdef HAVE_SYSCALL_H
45 # include <syscall.h>
46 #else 
47 # if HAVE_SYS_SYSCALL_H
48 #  include <sys/syscall.h>
49 # endif
50 #endif
51
52 #include <libcitadel.h>
53
54 #include "threads.h"
55 #include "ctdl_module.h"
56 #include "modules_init.h"
57 #include "housekeeping.h"
58 #include "config.h"
59 #include "citserver.h"
60 #include "sysdep_decls.h"
61 #include "context.h"
62 #include "event_client.h"
63
64
65 /*
66  * To create a thread you must call one of the create thread functions.
67  * You must pass it the address of (a pointer to a CtdlThreadNode initialised to NULL) like this
68  * struct CtdlThreadNode *node = NULL;
69  * pass in &node
70  * If the thread is created *node will point to the thread control structure for the created thread.
71  * If the thread creation fails *node remains NULL
72  * Do not free the memory pointed to by *node, it doesn't belong to you.
73  * This new interface duplicates much of the eCrash stuff. We should go for closer integration since that would
74  * remove the need for the calls to eCrashRegisterThread and friends
75  */
76
77 static int num_threads = 0;                     /* Current number of threads */
78 static int num_workers = 0;                     /* Current number of worker threads */
79
80 CtdlThreadNode *CtdlThreadList = NULL;
81 CtdlThreadNode *CtdlThreadSchedList = NULL;
82
83 static CtdlThreadNode *GC_thread = NULL;
84 static char *CtdlThreadStates[CTDL_THREAD_LAST_STATE];
85 double CtdlThreadLoadAvg = 0;
86 double CtdlThreadWorkerAvg = 0;
87 citthread_key_t ThreadKey;
88
89 citthread_mutex_t Critters[MAX_SEMAPHORES];     /* Things needing locking */
90
91
92
93 void InitialiseSemaphores(void)
94 {
95         int i;
96
97         /* Set up a bunch of semaphores to be used for critical sections */
98         for (i=0; i<MAX_SEMAPHORES; ++i) {
99                 citthread_mutex_init(&Critters[i], NULL);
100         }
101 }
102
103
104
105
106 /*
107  * Obtain a semaphore lock to begin a critical section.
108  * but only if no one else has one
109  */
110 int try_critical_section(int which_one)
111 {
112         /* For all types of critical sections except those listed here,
113          * ensure nobody ever tries to do a critical section within a
114          * transaction; this could lead to deadlock.
115          */
116         if (    (which_one != S_FLOORCACHE)
117                 && (which_one != S_RPLIST)
118         ) {
119                 cdb_check_handles();
120         }
121         return (citthread_mutex_trylock(&Critters[which_one]));
122 }
123
124
125 /*
126  * Obtain a semaphore lock to begin a critical section.
127  */
128 void begin_critical_section(int which_one)
129 {
130         /* syslog(LOG_DEBUG, "begin_critical_section(%d)\n", which_one); */
131
132         /* For all types of critical sections except those listed here,
133          * ensure nobody ever tries to do a critical section within a
134          * transaction; this could lead to deadlock.
135          */
136         if (    (which_one != S_FLOORCACHE)
137                 && (which_one != S_RPLIST)
138         ) {
139                 cdb_check_handles();
140         }
141         citthread_mutex_lock(&Critters[which_one]);
142 }
143
144 /*
145  * Release a semaphore lock to end a critical section.
146  */
147 void end_critical_section(int which_one)
148 {
149         citthread_mutex_unlock(&Critters[which_one]);
150 }
151
152
153 /*
154  * A function to destroy the TSD
155  */
156 static void ctdl_thread_internal_dest_tsd(void *arg)
157 {
158         if (arg != NULL) {
159                 check_handles(arg);
160                 free(arg);
161         }
162 }
163
164
165 /*
166  * A function to initialise the thread TSD
167  */
168 void ctdl_thread_internal_init_tsd(void)
169 {
170         int ret;
171         
172         if ((ret = citthread_key_create(&ThreadKey, ctdl_thread_internal_dest_tsd))) {
173                 syslog(LOG_EMERG, "citthread_key_create: %s\n", strerror(ret));
174                 exit(CTDLEXIT_DB);
175         }
176 }
177
178 /*
179  * Ensure that we have a key for thread-specific data. 
180  *
181  * This should be called immediately after startup by any thread 
182  * 
183  */
184 void CtdlThreadAllocTSD(void)
185 {
186         ThreadTSD *tsd;
187
188         if (citthread_getspecific(ThreadKey) != NULL)
189                 return;
190
191         tsd = malloc(sizeof(ThreadTSD));
192
193         tsd->tid = NULL;
194
195         memset(tsd->cursors, 0, sizeof tsd->cursors);
196         tsd->self = NULL;
197         
198         citthread_setspecific(ThreadKey, tsd);
199 }
200
201
202 void ctdl_thread_internal_free_tsd(void)
203 {
204         ctdl_thread_internal_dest_tsd(citthread_getspecific(ThreadKey));
205         citthread_setspecific(ThreadKey, NULL);
206 }
207
208
209 void ctdl_thread_internal_cleanup(void)
210 {
211         int i;
212         CtdlThreadNode *this_thread, *that_thread;
213         
214         for (i=0; i<CTDL_THREAD_LAST_STATE; i++)
215         {
216                 free (CtdlThreadStates[i]);
217         }
218         
219         /* Clean up the scheduled thread list */
220         this_thread = CtdlThreadSchedList;
221         while (this_thread)
222         {
223                 that_thread = this_thread;
224                 this_thread = this_thread->next;
225                 citthread_mutex_destroy(&that_thread->ThreadMutex);
226                 citthread_cond_destroy(&that_thread->ThreadCond);
227                 citthread_mutex_destroy(&that_thread->SleepMutex);
228                 citthread_cond_destroy(&that_thread->SleepCond);
229                 citthread_attr_destroy(&that_thread->attr);
230                 free(that_thread);
231         }
232         ctdl_thread_internal_free_tsd();
233 }
234
235 void ctdl_thread_internal_init(void)
236 {
237         CtdlThreadNode *this_thread;
238         int ret = 0;
239         
240         CtdlThreadStates[CTDL_THREAD_INVALID] = strdup ("Invalid Thread");
241         CtdlThreadStates[CTDL_THREAD_VALID] = strdup("Valid Thread");
242         CtdlThreadStates[CTDL_THREAD_CREATE] = strdup("Thread being Created");
243         CtdlThreadStates[CTDL_THREAD_CANCELLED] = strdup("Thread Cancelled");
244         CtdlThreadStates[CTDL_THREAD_EXITED] = strdup("Thread Exited");
245         CtdlThreadStates[CTDL_THREAD_STOPPING] = strdup("Thread Stopping");
246         CtdlThreadStates[CTDL_THREAD_STOP_REQ] = strdup("Thread Stop Requested");
247         CtdlThreadStates[CTDL_THREAD_SLEEPING] = strdup("Thread Sleeping");
248         CtdlThreadStates[CTDL_THREAD_RUNNING] = strdup("Thread Running");
249         CtdlThreadStates[CTDL_THREAD_BLOCKED] = strdup("Thread Blocked");
250         
251         /* Get ourself a thread entry */
252         this_thread = malloc(sizeof(CtdlThreadNode));
253         if (this_thread == NULL) {
254                 syslog(LOG_EMERG, "Thread system, can't allocate CtdlThreadNode, exiting\n");
255                 return;
256         }
257         // Ensuring this is zero'd means we make sure the thread doesn't start doing its thing until we are ready.
258         memset (this_thread, 0, sizeof(CtdlThreadNode));
259         
260         citthread_mutex_init (&(this_thread->ThreadMutex), NULL);
261         citthread_cond_init (&(this_thread->ThreadCond), NULL);
262         citthread_mutex_init (&(this_thread->SleepMutex), NULL);
263         citthread_cond_init (&(this_thread->SleepCond), NULL);
264         
265         /* We are garbage collector so create us as running */
266         this_thread->state = CTDL_THREAD_RUNNING;
267         
268         if ((ret = citthread_attr_init(&this_thread->attr))) {
269                 syslog(LOG_EMERG, "Thread system, citthread_attr_init: %s\n", strerror(ret));
270                 free(this_thread);
271                 return;
272         }
273
274         this_thread->name = "Garbage Collection Thread";
275         
276         this_thread->tid = citthread_self();
277         GC_thread = this_thread;
278         CT = this_thread;
279         
280         num_threads++;  // Increase the count of threads in the system.
281
282         this_thread->next = CtdlThreadList;
283         CtdlThreadList = this_thread;
284         if (this_thread->next)
285                 this_thread->next->prev = this_thread;
286         /* Set up start times */
287         gettimeofday(&this_thread->start_time, NULL);           /* Time this thread started */
288         memcpy(&this_thread->last_state_change, &this_thread->start_time, sizeof (struct timeval));     /* Changed state so mark it. */
289 }
290
291
292 /*
293  * A function to chenge the state of a thread
294  */
295 void ctdl_thread_internal_change_state (CtdlThreadNode *this_thread, enum CtdlThreadState new_state)
296 {
297         /*
298          * Wether we change state or not we need update the load values
299          */
300         /* This mutex not needed here? */
301         citthread_mutex_lock(&this_thread->ThreadMutex); /* To prevent race condition of a sleeping thread */
302         if ((new_state == CTDL_THREAD_STOP_REQ) && (this_thread->state > CTDL_THREAD_STOP_REQ))
303                 this_thread->state = new_state;
304         if (((new_state == CTDL_THREAD_SLEEPING) || (new_state == CTDL_THREAD_BLOCKED)) && (this_thread->state == CTDL_THREAD_RUNNING))
305                 this_thread->state = new_state;
306         if ((new_state == CTDL_THREAD_RUNNING) && ((this_thread->state == CTDL_THREAD_SLEEPING) || (this_thread->state == CTDL_THREAD_BLOCKED)))
307                 this_thread->state = new_state;
308         citthread_mutex_unlock(&this_thread->ThreadMutex);
309 }
310
311
312 /*
313  * A function to tell all threads to exit
314  */
315 void CtdlThreadStopAll(void)
316 {
317         /* First run any registered shutdown hooks.  This probably doesn't belong here. */
318         PerformSessionHooks(EVT_SHUTDOWN);
319         
320         /* then close all tcp ports so nobody else can talk to us anymore. */
321         CtdlShutdownServiceHooks();
322         //FIXME: The signalling of the condition should not be in the critical_section
323         // We need to build a list of threads we are going to signal and then signal them afterwards
324         
325         ShutDownEventQueue();
326
327         CtdlThreadNode *this_thread;
328         
329         begin_critical_section(S_THREAD_LIST);
330         this_thread = CtdlThreadList;
331         // Ask the GC thread to stop first so everything knows we are shutting down.
332         GC_thread->state = CTDL_THREAD_STOP_REQ;
333         while(this_thread)
334         {
335                 if (!citthread_equal(this_thread->tid, GC_thread->tid))
336                         citthread_kill(this_thread->tid, SIGHUP);
337
338                 ctdl_thread_internal_change_state (this_thread, CTDL_THREAD_STOP_REQ);
339                 citthread_cond_signal(&this_thread->ThreadCond);
340                 citthread_cond_signal(&this_thread->SleepCond);
341                 this_thread->stop_ticker = time(NULL);
342                 syslog(LOG_DEBUG, "Thread system stopping thread \"%s\" (0x%08lx).\n",
343                         this_thread->name, this_thread->tid);
344                 this_thread = this_thread->next;
345         }
346         end_critical_section(S_THREAD_LIST);
347 }
348
349
350 /*
351  * A function to wake up all sleeping threads
352  */
353 void CtdlThreadWakeAll(void)
354 {
355         CtdlThreadNode *this_thread;
356         
357         syslog(LOG_DEBUG, "Thread system waking all threads.\n");
358         
359         begin_critical_section(S_THREAD_LIST);
360         this_thread = CtdlThreadList;
361         while(this_thread)
362         {
363                 if (!this_thread->thread_func)
364                 {
365                         citthread_cond_signal(&this_thread->ThreadCond);
366                         citthread_cond_signal(&this_thread->SleepCond);
367                 }
368                 this_thread = this_thread->next;
369         }
370         end_critical_section(S_THREAD_LIST);
371 }
372
373
374 /*
375  * A function to return the number of threads running in the system
376  */
377 int CtdlThreadGetCount(void)
378 {
379         return  num_threads;
380 }
381
382 int CtdlThreadGetWorkers(void)
383 {
384         return  num_workers;
385 }
386
387 double CtdlThreadGetWorkerAvg(void)
388 {
389         double ret;
390         
391         begin_critical_section(S_THREAD_LIST);
392         ret =  CtdlThreadWorkerAvg;
393         end_critical_section(S_THREAD_LIST);
394         return ret;
395 }
396
397 double CtdlThreadGetLoadAvg(void)
398 {
399         double load_avg[3] = {0.0, 0.0, 0.0};
400
401         int ret = 0;
402         int smp_num_cpus;
403
404         /* Borrowed this straight from procps */
405         smp_num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
406         if(smp_num_cpus<1) smp_num_cpus=1; /* SPARC glibc is buggy */
407
408 #ifdef HAVE_GETLOADAVG
409         ret = getloadavg(load_avg, 3);
410 #endif
411         if (ret < 0)
412                 return 0;
413         return load_avg[0] / smp_num_cpus;
414 /*
415  * This old chunk of code return a value that indicated the load on citserver
416  * This value could easily reach 100 % even when citserver was doing very little and
417  * hence the machine has much more spare capacity.
418  * Because this value was used to determine if the machine was under heavy load conditions
419  * from other processes in the system then citserver could be strangled un-necesarily
420  * What we are actually trying to achieve is to strangle citserver if the machine is heavily loaded.
421  * So we have changed this.
422
423         begin_critical_section(S_THREAD_LIST);
424         ret =  CtdlThreadLoadAvg;
425         end_critical_section(S_THREAD_LIST);
426         return ret;
427 */
428 }
429
430
431
432
433 /*
434  * A function to rename a thread
435  * Returns a const char *
436  */
437 const char *CtdlThreadName(const char *name)
438 {
439         const char *old_name;
440         
441         if (!CT)
442         {
443                 syslog(LOG_WARNING, "Thread system WARNING. Attempt to CtdlThreadRename() a non thread. %s\n", name);
444                 return NULL;
445         }
446         old_name = CT->name;
447         if (name)
448                 CT->name = name;
449         return (old_name);
450 }       
451
452
453 /*
454  * A function to force a thread to exit
455  */
456 void CtdlThreadCancel(CtdlThreadNode *thread)
457 {
458         CtdlThreadNode *this_thread;
459         
460         if (!thread)
461                 this_thread = CT;
462         else
463                 this_thread = thread;
464         if (!this_thread)
465         {
466                 syslog(LOG_EMERG, "Thread system PANIC. Attempt to CtdlThreadCancel() a non thread.\n");
467                 CtdlThreadStopAll();
468                 return;
469         }
470         
471         if (!this_thread->thread_func)
472         {
473                 syslog(LOG_EMERG, "Thread system PANIC. Attempt to CtdlThreadCancel() the garbage collector.\n");
474                 CtdlThreadStopAll();
475                 return;
476         }
477         
478         ctdl_thread_internal_change_state (this_thread, CTDL_THREAD_CANCELLED);
479         citthread_cancel(this_thread->tid);
480 }
481
482
483 /*
484  * A function for a thread to check if it has been asked to stop
485  */
486 int CtdlThreadCheckStop(void)
487 {
488         int state;
489         
490         if (!CT)
491         {
492                 syslog(LOG_EMERG, "Thread system PANIC, CtdlThreadCheckStop() called by a non thread.\n");
493                 CtdlThreadStopAll();
494                 return -1;
495         }
496         
497         state = CT->state;
498
499         if (CT->signal)
500         {
501                 syslog(LOG_DEBUG, "Thread \"%s\" caught signal %d.\n", CT->name, CT->signal);
502                 if (CT->signal == SIGHUP)
503                         CT->state = CTDL_THREAD_STOP_REQ;
504                 CT->signal = 0;
505         }
506         if(state == CTDL_THREAD_STOP_REQ)
507         {
508                 CT->state = CTDL_THREAD_STOPPING;
509                 return -1;
510         }
511         else if((state < CTDL_THREAD_STOP_REQ) && (state > CTDL_THREAD_CREATE))
512         {
513                 return -1;
514         }
515         return 0;
516 }
517
518
519 /*
520  * A function to ask a thread to exit
521  * The thread must call CtdlThreadCheckStop() periodically to determine if it should exit
522  */
523 void CtdlThreadStop(CtdlThreadNode *thread)
524 {
525         CtdlThreadNode *this_thread;
526         
527         if (!thread)
528                 this_thread = CT;
529         else
530                 this_thread = thread;
531         if (!this_thread)
532                 return;
533         if (!(this_thread->thread_func))
534                 return;         // Don't stop garbage collector
535
536         if (!citthread_equal(this_thread->tid, GC_thread->tid))
537                 citthread_kill(this_thread->tid, SIGHUP);
538
539         ctdl_thread_internal_change_state (this_thread, CTDL_THREAD_STOP_REQ);
540         citthread_cond_signal(&this_thread->ThreadCond);
541         citthread_cond_signal(&this_thread->SleepCond);
542         this_thread->stop_ticker = time(NULL);
543 }
544
545 /*
546  * So we now have a sleep command that works with threads but it is in seconds
547  */
548 void CtdlThreadSleep(int secs)
549 {
550         struct timespec wake_time;
551         struct timeval time_now;
552         
553         
554         if (!CT)
555         {
556                 syslog(LOG_WARNING, "CtdlThreadSleep() called by something that is not a thread. Should we die?\n");
557                 return;
558         }
559         
560         memset (&wake_time, 0, sizeof(struct timespec));
561         gettimeofday(&time_now, NULL);
562         wake_time.tv_sec = time_now.tv_sec + secs;
563         wake_time.tv_nsec = time_now.tv_usec * 10;
564
565         ctdl_thread_internal_change_state (CT, CTDL_THREAD_SLEEPING);
566         
567         citthread_mutex_lock(&CT->ThreadMutex); /* Prevent something asking us to awaken before we've gone to sleep */
568         citthread_cond_timedwait(&CT->SleepCond, &CT->ThreadMutex, &wake_time);
569         citthread_mutex_unlock(&CT->ThreadMutex);
570         
571         ctdl_thread_internal_change_state (CT, CTDL_THREAD_RUNNING);
572 }
573
574
575 /*
576  * Routine to clean up our thread function on exit
577  */
578 static void ctdl_internal_thread_cleanup(void *arg)
579 {
580         /*
581          * In here we were called by the current thread because it is exiting
582          * NB. WE ARE THE CURRENT THREAD
583          */
584         if (CT)
585         {
586                 const char *name = CT->name;
587                 const pid_t tid = CT->tid;
588
589                 syslog(LOG_NOTICE, "Thread \"%s\" (0x%08lx) exited.\n", name, (unsigned long) tid);
590         }
591         else 
592         {
593                 syslog(LOG_NOTICE, "some ((unknown ? ? ?) Thread exited.\n");
594         }
595         
596         #ifdef HAVE_BACKTRACE
597 ///     eCrash_UnregisterThread();
598         #endif
599         
600         citthread_mutex_lock(&CT->ThreadMutex);
601         CT->state = CTDL_THREAD_EXITED; // needs to be last thing else house keeping will unlink us too early
602         citthread_mutex_unlock(&CT->ThreadMutex);
603 }
604
605 /*
606  * Garbage collection routine.
607  * Gets called by main() in a loop to clean up the thread list periodically.
608  */
609 void CtdlThreadGC (void)
610 {
611
612
613         return;
614         /* FIXME this is a big deal, but I think it's causing corruption */
615
616
617         CtdlThreadNode *this_thread, *that_thread;
618         int workers = 0, sys_workers;
619         int ret=0;
620
621         begin_critical_section(S_THREAD_LIST);
622         
623         /* Handle exiting of garbage collector thread */
624         if(num_threads == 1)
625                 CtdlThreadList->state = CTDL_THREAD_EXITED;
626         
627 #ifdef WITH_THREADLOG
628         syslog(LOG_DEBUG, "Thread system running garbage collection.\n");
629 #endif
630         /*
631          * Woke up to do garbage collection
632          */
633         this_thread = CtdlThreadList;
634         while(this_thread)
635         {
636                 that_thread = this_thread;
637                 this_thread = this_thread->next;
638                 
639                 if ((that_thread->state == CTDL_THREAD_STOP_REQ || that_thread->state == CTDL_THREAD_STOPPING)
640                         && (!citthread_equal(that_thread->tid, citthread_self())))
641                 {
642                         syslog(LOG_DEBUG, "Waiting for thread %s (0x%08lx) to exit.\n", that_thread->name, that_thread->tid);
643                         terminate_stuck_sessions();
644                 }
645                 else
646                 {
647                         /**
648                          * Catch the situation where a worker was asked to stop but couldn't and we are not
649                          * shutting down.
650                          */
651                         that_thread->stop_ticker = 0;
652                 }
653                 
654                 if (that_thread->stop_ticker + 5 == time(NULL))
655                 {
656                         syslog(LOG_DEBUG, "Thread System: The thread \"%s\" (0x%08lx) failed to self terminate within 5 ticks. It would be cancelled now.\n", that_thread->name, that_thread->tid);
657                         if ((that_thread->flags & CTDLTHREAD_WORKER) == 0)
658                                 syslog(LOG_INFO, "Thread System: A non worker thread would have been canceled this may cause message loss.\n");
659 //                      that_thread->state = CTDL_THREAD_CANCELLED;
660                         that_thread->stop_ticker++;
661 //                      citthread_cancel(that_thread->tid);
662 //                      continue;
663                 }
664                 
665                 /* Do we need to clean up this thread? */
666                 if ((that_thread->state != CTDL_THREAD_EXITED) && (that_thread->state != CTDL_THREAD_CANCELLED))
667                 {
668                         if(that_thread->flags & CTDLTHREAD_WORKER)
669                                 workers++;      /* Sanity check on number of worker threads */
670                         continue;
671                 }
672                 
673                 if (citthread_equal(that_thread->tid, citthread_self()) && that_thread->thread_func)
674                 {       /* Sanity check */
675                         end_critical_section(S_THREAD_LIST);
676                         syslog(LOG_EMERG, "Thread system PANIC, a thread is trying to clean up after itself.\n");
677                         abort();
678                         return;
679                 }
680                 
681                 if (num_threads <= 0)
682                 {       /* Sanity check */
683                         end_critical_section(S_THREAD_LIST);
684                         syslog(LOG_EMERG, "Thread system PANIC, num_threads <= 0 and trying to do Garbage Collection.\n");
685                         abort();
686                         return;
687                 }
688
689                 if(that_thread->flags & CTDLTHREAD_WORKER)
690                         num_workers--;  /* This is a wroker thread so reduce the count. */
691                 num_threads--;
692                 /* If we are unlinking the list head then the next becomes the list head */
693                 if(that_thread->prev)
694                         that_thread->prev->next = that_thread->next;
695                 else
696                         CtdlThreadList = that_thread->next;
697                 if(that_thread->next)
698                         that_thread->next->prev = that_thread->prev;
699                 
700                 citthread_cond_signal(&that_thread->ThreadCond);
701                 citthread_cond_signal(&that_thread->SleepCond); // Make sure this thread is awake
702                 citthread_mutex_lock(&that_thread->ThreadMutex);        // Make sure it has done what its doing
703                 citthread_mutex_unlock(&that_thread->ThreadMutex);
704                 /*
705                  * Join on the thread to do clean up and prevent memory leaks
706                  * Also makes sure the thread has cleaned up after itself before we remove it from the list
707                  * We can join on the garbage collector thread the join should just return EDEADLCK
708                  */
709                 ret = citthread_join (that_thread->tid, NULL);
710                 if (ret == EDEADLK)
711                         syslog(LOG_DEBUG, "Garbage collection on own thread.\n");
712                 else if (ret == EINVAL)
713                         syslog(LOG_DEBUG, "Garbage collection, that thread already joined on.\n");
714                 else if (ret == ESRCH)
715                         syslog(LOG_DEBUG, "Garbage collection, no thread to join on.\n");
716                 else if (ret != 0)
717                         syslog(LOG_DEBUG, "Garbage collection, citthread_join returned an unknown error(%d).\n", ret);
718                 /*
719                  * Now we own that thread entry
720                  */
721                 syslog(LOG_INFO, "Garbage Collection for thread \"%s\" (0x%08lx).\n",
722                         that_thread->name, that_thread->tid);
723                 citthread_mutex_destroy(&that_thread->ThreadMutex);
724                 citthread_cond_destroy(&that_thread->ThreadCond);
725                 citthread_mutex_destroy(&that_thread->SleepMutex);
726                 citthread_cond_destroy(&that_thread->SleepCond);
727                 citthread_attr_destroy(&that_thread->attr);
728                 free(that_thread);
729         }
730         sys_workers = num_workers;
731         end_critical_section(S_THREAD_LIST);
732         
733         /* Sanity check number of worker threads */
734         if (workers != sys_workers)
735         {
736                 syslog(LOG_EMERG,
737                         "Thread system PANIC, discrepancy in number of worker threads. Counted %d, should be %d.\n",
738                         workers, sys_workers
739                         );
740                 abort();
741         }
742 }
743
744
745
746  
747 /*
748  * Runtime function for a Citadel Thread.
749  * This initialises the threads environment and then calls the user supplied thread function
750  * Note that this is the REAL thread function and wraps the users thread function.
751  */ 
752 static void *ctdl_internal_thread_func (void *arg)
753 {
754         CtdlThreadNode *this_thread;
755         void *ret = NULL;
756
757         /* lock and unlock the thread list.
758          * This causes this thread to wait until all its creation stuff has finished before it
759          * can continue its execution.
760          */
761         begin_critical_section(S_THREAD_LIST);
762         this_thread = (CtdlThreadNode *) arg;
763         gettimeofday(&this_thread->start_time, NULL);           /* Time this thread started */
764         
765         // Register the cleanup function to take care of when we exit.
766         citthread_cleanup_push(ctdl_internal_thread_cleanup, NULL);
767         // Get our thread data structure
768         CtdlThreadAllocTSD();
769         CT = this_thread;
770         this_thread->pid = getpid();
771         memcpy(&this_thread->last_state_change, &this_thread->start_time, sizeof (struct timeval));     /* Changed state so mark it. */
772         /* Only change to running state if we weren't asked to stop during the create cycle
773          * Other wise there is a window to allow this threads creation to continue to full grown and
774          * therby prevent a shutdown of the server.
775          */
776         if (!CtdlThreadCheckStop())
777         {
778                 citthread_mutex_lock(&this_thread->ThreadMutex);
779                 this_thread->state = CTDL_THREAD_RUNNING;
780                 citthread_mutex_unlock(&this_thread->ThreadMutex);
781         }
782         end_critical_section(S_THREAD_LIST);
783         
784         // Register for tracing
785         #ifdef HAVE_BACKTRACE
786 ///     eCrash_RegisterThread(this_thread->name, 0);
787         #endif
788         
789         // Tell the world we are here
790 #if defined(HAVE_SYSCALL_H) && defined (SYS_gettid)
791         this_thread->reltid = syscall(SYS_gettid);
792 #endif
793         syslog(LOG_NOTICE, "Created a new thread \"%s\" (0x%08lx).\n",
794                 this_thread->name, this_thread->tid);
795         
796         /*
797          * run the thread to do the work but only if we haven't been asked to stop
798          */
799         if (!CtdlThreadCheckStop())
800                 ret = (this_thread->thread_func)(this_thread->user_args);
801         
802         /*
803          * Our thread is exiting either because it wanted to end or because the server is stopping
804          * We need to clean up
805          */
806         citthread_cleanup_pop(1);       // Execute our cleanup routine and remove it
807         
808         return(ret);
809 }
810
811
812
813
814 /*
815  * Function to initialise an empty thread structure
816  */
817 CtdlThreadNode *ctdl_internal_init_thread_struct(CtdlThreadNode *this_thread, long flags)
818 {
819         int ret = 0;
820         
821         // Ensuring this is zero'd means we make sure the thread doesn't start doing its thing until we are ready.
822         memset (this_thread, 0, sizeof(CtdlThreadNode));
823         
824         /* Create the mutex's early so we can use them */
825         citthread_mutex_init (&(this_thread->ThreadMutex), NULL);
826         citthread_cond_init (&(this_thread->ThreadCond), NULL);
827         citthread_mutex_init (&(this_thread->SleepMutex), NULL);
828         citthread_cond_init (&(this_thread->SleepCond), NULL);
829         
830         this_thread->state = CTDL_THREAD_CREATE;
831         
832         if ((ret = citthread_attr_init(&this_thread->attr))) {
833                 citthread_mutex_unlock(&this_thread->ThreadMutex);
834                 citthread_mutex_destroy(&(this_thread->ThreadMutex));
835                 citthread_cond_destroy(&(this_thread->ThreadCond));
836                 citthread_mutex_destroy(&(this_thread->SleepMutex));
837                 citthread_cond_destroy(&(this_thread->SleepCond));
838                 syslog(LOG_EMERG, "Thread system, citthread_attr_init: %s\n", strerror(ret));
839                 free(this_thread);
840                 return NULL;
841         }
842
843         /* Our per-thread stacks need to be bigger than the default size,
844          * otherwise the MIME parser crashes on FreeBSD, and the IMAP service
845          * crashes on 64-bit Linux.
846          */
847         if (flags & CTDLTHREAD_BIGSTACK)
848         {
849 #ifdef WITH_THREADLOG
850                 syslog(LOG_INFO, "Thread system. Creating BIG STACK thread.\n");
851 #endif
852                 if ((ret = citthread_attr_setstacksize(&this_thread->attr, THREADSTACKSIZE))) {
853                         citthread_mutex_unlock(&this_thread->ThreadMutex);
854                         citthread_mutex_destroy(&(this_thread->ThreadMutex));
855                         citthread_cond_destroy(&(this_thread->ThreadCond));
856                         citthread_mutex_destroy(&(this_thread->SleepMutex));
857                         citthread_cond_destroy(&(this_thread->SleepCond));
858                         citthread_attr_destroy(&this_thread->attr);
859                         syslog(LOG_EMERG, "Thread system, citthread_attr_setstacksize: %s\n",
860                                 strerror(ret));
861                         free(this_thread);
862                         return NULL;
863                 }
864         }
865
866         /* Set this new thread with an avg_blocked of 2. We do this so that its creation affects the
867          * load average for the system. If we don't do this then we create a mass of threads at the same time 
868          * because the creation didn't affect the load average.
869          */
870         this_thread->avg_blocked = 2;
871         
872         return (this_thread);
873 }
874
875
876
877  
878 /*
879  * Internal function to create a thread.
880  */ 
881 CtdlThreadNode *ctdl_internal_create_thread(char *name, long flags, void *(*thread_func) (void *arg), void *args)
882 {
883         int ret = 0;
884         CtdlThreadNode *this_thread;
885
886         if (num_threads >= 32767)
887         {
888                 syslog(LOG_EMERG, "Thread system. Thread list full.\n");
889                 return NULL;
890         }
891                 
892         this_thread = malloc(sizeof(CtdlThreadNode));
893         if (this_thread == NULL) {
894                 syslog(LOG_EMERG, "Thread system, can't allocate CtdlThreadNode, exiting\n");
895                 return NULL;
896         }
897         
898         /* Initialise the thread structure */
899         if (ctdl_internal_init_thread_struct(this_thread, flags) == NULL)
900         {
901                 free(this_thread);
902                 syslog(LOG_EMERG, "Thread system, can't initialise CtdlThreadNode, exiting\n");
903                 return NULL;
904         }
905         /*
906          * If we got here we are going to create the thread so we must initilise the structure
907          * first because most implimentations of threading can't create it in a stopped state
908          * and it might want to do things with its structure that aren't initialised otherwise.
909          */
910         if(name)
911         {
912                 this_thread->name = name;
913         }
914         else
915         {
916                 this_thread->name = "Un-named Thread";
917         }
918         
919         this_thread->flags = flags;
920         this_thread->thread_func = thread_func;
921         this_thread->user_args = args;
922         
923         begin_critical_section(S_THREAD_LIST);
924         /*
925          * We pass this_thread into the thread as its args so that it can find out information
926          * about itself and it has a bit of storage space for itself, not to mention that the REAL
927          * thread function needs to finish off the setup of the structure
928          */
929         if ((ret = citthread_create(&this_thread->tid, &this_thread->attr, ctdl_internal_thread_func, this_thread) != 0))
930         {
931                 end_critical_section(S_THREAD_LIST);
932                 syslog(LOG_ALERT, "Thread system, Can't create thread: %s\n",
933                         strerror(ret));
934                 citthread_mutex_unlock(&this_thread->ThreadMutex);
935                 citthread_mutex_destroy(&(this_thread->ThreadMutex));
936                 citthread_cond_destroy(&(this_thread->ThreadCond));
937                 citthread_mutex_destroy(&(this_thread->SleepMutex));
938                 citthread_cond_destroy(&(this_thread->SleepCond));
939                 citthread_attr_destroy(&this_thread->attr);
940                 free(this_thread);
941                 return NULL;
942         }
943         num_threads++;  // Increase the count of threads in the system.
944         if(this_thread->flags & CTDLTHREAD_WORKER)
945                 num_workers++;
946
947         this_thread->next = CtdlThreadList;
948         CtdlThreadList = this_thread;
949         if (this_thread->next)
950                 this_thread->next->prev = this_thread;
951         
952         end_critical_section(S_THREAD_LIST);
953         
954         return this_thread;
955 }
956
957 /*
958  * Wrapper function to create a thread
959  * ensures the critical section and other protections are in place.
960  * char *name = name to give to thread, if NULL, use generic name
961  * int flags = flags to determine type of thread and standard facilities
962  */
963 CtdlThreadNode *CtdlThreadCreate(char *name, long flags, void *(*thread_func) (void *arg), void *args)
964 {
965         CtdlThreadNode *ret = NULL;
966         
967         ret = ctdl_internal_create_thread(name, flags, thread_func, args);
968         return ret;
969 }
970
971
972
973 CtdlThreadNode *ctdl_thread_internal_start_scheduled (CtdlThreadNode *this_thread)
974 {
975         int ret = 0;
976         
977         begin_critical_section(S_THREAD_LIST);
978         /*
979          * We pass this_thread into the thread as its args so that it can find out information
980          * about itself and it has a bit of storage space for itself, not to mention that the REAL
981          * thread function needs to finish off the setup of the structure
982          */
983         if ((ret = citthread_create(&this_thread->tid, &this_thread->attr, ctdl_internal_thread_func, this_thread) != 0))
984         {
985                 end_critical_section(S_THREAD_LIST);
986                 syslog(LOG_DEBUG, "Failed to start scheduled thread \"%s\": %s\n", this_thread->name, strerror(ret));
987                 citthread_mutex_destroy(&(this_thread->ThreadMutex));
988                 citthread_cond_destroy(&(this_thread->ThreadCond));
989                 citthread_mutex_destroy(&(this_thread->SleepMutex));
990                 citthread_cond_destroy(&(this_thread->SleepCond));
991                 citthread_attr_destroy(&this_thread->attr);
992                 free(this_thread);
993                 return NULL;
994         }
995         
996         
997         num_threads++;  // Increase the count of threads in the system.
998         if(this_thread->flags & CTDLTHREAD_WORKER)
999                 num_workers++;
1000
1001         this_thread->next = CtdlThreadList;
1002         CtdlThreadList = this_thread;
1003         if (this_thread->next)
1004                 this_thread->next->prev = this_thread;
1005         
1006         end_critical_section(S_THREAD_LIST);
1007         
1008         
1009         return this_thread;
1010 }
1011
1012
1013
1014 void ctdl_thread_internal_check_scheduled(void)
1015 {
1016         CtdlThreadNode *this_thread, *that_thread;
1017         time_t now;
1018         
1019         /* Don't start scheduled threads if the system wants single user mode */
1020         if (CtdlWantSingleUser())
1021                 return;
1022         
1023         if (try_critical_section(S_SCHEDULE_LIST))
1024                 return; /* If this list is locked we wait till the next chance */
1025         
1026         now = time(NULL);
1027         
1028 #ifdef WITH_THREADLOG
1029         syslog(LOG_DEBUG, "Checking for scheduled threads to start.\n");
1030 #endif
1031
1032         this_thread = CtdlThreadSchedList;
1033         while(this_thread)
1034         {
1035                 that_thread = this_thread;
1036                 this_thread = this_thread->next;
1037                 
1038                 if (now > that_thread->when)
1039                 {
1040                         /* Unlink from schedule list */
1041                         if (that_thread->prev)
1042                                 that_thread->prev->next = that_thread->next;
1043                         else
1044                                 CtdlThreadSchedList = that_thread->next;
1045                         if (that_thread->next)
1046                                 that_thread->next->prev = that_thread->prev;
1047                                 
1048                         that_thread->next = that_thread->prev = NULL;
1049 #ifdef WITH_THREADLOG
1050                         syslog(LOG_DEBUG, "About to start scheduled thread \"%s\".\n", that_thread->name);
1051 #endif
1052                         if (CT->state > CTDL_THREAD_STOP_REQ)
1053                         {       /* Only start it if the system is not stopping */
1054                                 if (ctdl_thread_internal_start_scheduled (that_thread))
1055                                 {
1056 #ifdef WITH_THREADLOG
1057                                         syslog(LOG_INFO, "Thread system, Started a scheduled thread \"%s\" (0x%08lx).\n",
1058                                                 that_thread->name, that_thread->tid);
1059 #endif
1060                                 }
1061                         }
1062                 }
1063 #ifdef WITH_THREADLOG
1064                 else
1065                 {
1066                         syslog(LOG_DEBUG, "Thread \"%s\" will start in %ld seconds.\n",
1067                                 that_thread->name, that_thread->when - time(NULL));
1068                 }
1069 #endif
1070         }
1071         end_critical_section(S_SCHEDULE_LIST);
1072 }
1073
1074
1075 /*
1076  * A warapper function for select so we can show a thread as blocked
1077  */
1078 int CtdlThreadSelect(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout)
1079 {
1080         int ret = 0;
1081         
1082         ctdl_thread_internal_change_state(CT, CTDL_THREAD_BLOCKED);
1083         if (!CtdlThreadCheckStop())
1084                 ret = select(n, readfds, writefds, exceptfds, timeout);
1085         /**
1086          * If the select returned <= 0 then it failed due to an error
1087          * or timeout so this thread could stop if asked to do so.
1088          * Anything else means it needs to continue unless the system is shutting down
1089          */
1090         if (ret > 0)
1091         {
1092                 /**
1093                  * The select says this thread needs to do something useful.
1094                  * This thread was in an idle state so it may have been asked to stop
1095                  * but if the system isn't shutting down this thread is no longer
1096                  * idle and select has given it a task to do so it must not stop
1097                  * In this condition we need to force it into the running state.
1098                  * CtdlThreadGC will clear its ticker for us.
1099                  *
1100                  * FIXME: there is still a small hole here. It is possible for the sequence of locking
1101                  * to allow the state to get changed to STOP_REQ just after this code if the other thread
1102                  * has decided to change the state before this lock, it there fore has to wait till the lock
1103                  * completes but it will continue to change the state. We need something a bit better here.
1104                  */
1105                 citthread_mutex_lock(&CT->ThreadMutex); /* To prevent race condition of a sleeping thread */
1106                 if (GC_thread->state > CTDL_THREAD_STOP_REQ && CT->state <= CTDL_THREAD_STOP_REQ)
1107                 {
1108                         syslog(LOG_DEBUG, "Thread %s (0x%08lx) refused stop request.\n", CT->name, CT->tid);
1109                         CT->state = CTDL_THREAD_RUNNING;
1110                 }
1111                 citthread_mutex_unlock(&CT->ThreadMutex);
1112         }
1113
1114         ctdl_thread_internal_change_state(CT, CTDL_THREAD_RUNNING);
1115
1116         return ret;
1117 }
1118
1119
1120
1121 void *new_worker_thread(void *arg);
1122 extern void close_masters (void);
1123
1124
1125
1126 void go_threading(void)
1127 {
1128         int i;
1129         CtdlThreadNode *last_worker;
1130
1131         /*
1132          * Initialise the thread system
1133          */
1134         ctdl_thread_internal_init();
1135
1136         /* Second call to module init functions now that threading is up */
1137         initialise_modules(1);
1138         CtdlThreadCreate("select_on_master", CTDLTHREAD_BIGSTACK, select_on_master, NULL);
1139
1140         /*
1141          * This thread is now used for garbage collection of other threads in the thread list
1142          */
1143         syslog(LOG_INFO, "Startup thread %ld becoming garbage collector,\n", (long) citthread_self());
1144
1145         /*
1146          * We do a lot of locking and unlocking of the thread list in here.
1147          * We do this so that we can repeatedly release time for other threads
1148          * that may be waiting on the thread list.
1149          * We are a low priority thread so we can afford to do this
1150          */
1151         
1152         while (CtdlThreadGetCount())
1153         {
1154                 if (CT->signal)
1155                         exit_signal = CT->signal;
1156                 if (exit_signal)
1157                 {
1158                         CtdlThreadStopAll();
1159                 }
1160                 check_sched_shutdown();
1161                 if (CT->state > CTDL_THREAD_STOP_REQ)
1162                 {
1163                         begin_critical_section(S_THREAD_LIST);
1164                         end_critical_section(S_THREAD_LIST);
1165                         
1166                         ctdl_thread_internal_check_scheduled(); /* start scheduled threads */
1167                 }
1168                 
1169                 /* Reduce the size of the worker thread pool if necessary. */
1170                 if ((CtdlThreadGetWorkers() > config.c_min_workers + 1) && (CtdlThreadWorkerAvg < 20) && (CT->state > CTDL_THREAD_STOP_REQ))
1171                 {
1172                         /* Ask a worker thread to stop as we no longer need it */
1173                         begin_critical_section(S_THREAD_LIST);
1174                         last_worker = CtdlThreadList;
1175                         while (last_worker)
1176                         {
1177                                 citthread_mutex_lock(&last_worker->ThreadMutex);
1178                                 if (last_worker->flags & CTDLTHREAD_WORKER && (last_worker->state > CTDL_THREAD_STOPPING) && (last_worker->Context == NULL))
1179                                 {
1180                                         citthread_mutex_unlock(&last_worker->ThreadMutex);
1181                                         break;
1182                                 }
1183                                 citthread_mutex_unlock(&last_worker->ThreadMutex);
1184                                 last_worker = last_worker->next;
1185                         }
1186                         end_critical_section(S_THREAD_LIST);
1187                         if (last_worker)
1188                         {
1189 #ifdef WITH_THREADLOG
1190                                 syslog(LOG_DEBUG, "Thread system, stopping excess worker thread \"%s\" (0x%08lx).\n",
1191                                         last_worker->name,
1192                                         last_worker->tid
1193                                         );
1194 #endif
1195                                 CtdlThreadStop(last_worker);
1196                         }
1197                 }
1198         
1199                 /*
1200                  * If all our workers are working hard, start some more to help out
1201                  * with things
1202                  */
1203                 /* FIXME: come up with a better way to dynamically alter the number of threads
1204                  * based on the system load
1205                  */
1206                 if (    (((CtdlThreadGetWorkers() < config.c_max_workers)
1207                         && (CtdlThreadGetWorkerAvg() > 60))
1208                         || CtdlThreadGetWorkers() < config.c_min_workers)
1209                         && (CT->state > CTDL_THREAD_STOP_REQ)
1210                 )
1211                 {
1212                         /* Only start new threads if we are not going to overload the machine */
1213                         /* Temporarily set to 10 should be enough to make sure we don't stranglew the server
1214                          * at least until we make this a config option */
1215                         if (CtdlThreadGetLoadAvg() < ((double)10.00)) {
1216                                 for (i=0; i<5 ; i++) {
1217                                         CtdlThreadCreate("Worker Thread",
1218                                                 CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
1219                                                 worker_thread,
1220                                                 NULL
1221                                                 );
1222                                 }
1223                         }
1224                         else
1225                                 syslog(LOG_WARNING, "Server strangled due to machine load average too high.\n");
1226                 }
1227
1228                 CtdlThreadGC();
1229
1230                 if (CtdlThreadGetCount() <= 1) // Shutting down clean up the garbage collector
1231                 {
1232                         CtdlThreadGC();
1233                 }
1234                 
1235 #ifdef THREADS_USESIGNALS
1236                 if (CtdlThreadGetCount() && CT->state > CTDL_THREAD_STOP_REQ)
1237 #else
1238                 if (CtdlThreadGetCount())
1239 #endif
1240                         CtdlThreadSleep(1);
1241         }
1242         /*
1243          * If the above loop exits we must be shutting down since we obviously have no threads
1244          */
1245         ctdl_thread_internal_cleanup();
1246 }
1247
1248
1249
1250