When doing shutdown we allow each thread up to 5 seconds to exit on its
authorDave West <davew@uncensored.citadel.org>
Tue, 18 Mar 2008 23:28:55 +0000 (23:28 +0000)
committerDave West <davew@uncensored.citadel.org>
Tue, 18 Mar 2008 23:28:55 +0000 (23:28 +0000)
own after asking it nicely to do so. After 5 seconds we will cancel()
the thread. Its possible that we may loose messages in this manner
since its similar to a SIGTERM. Because of this we write messages to the
log about this event. If we are seeing these entries in the log we need
to track down why the thread is strugling to exit and fix it.

citadel/threads.c
citadel/threads.h

index 15fc0d5dee0e55f350c02a757d50b4181970d008..cd1e999d08f738ea3c6c19370e68ac09320f0511 100644 (file)
@@ -638,8 +638,23 @@ void CtdlThreadGC (void)
                that_thread = this_thread;
                this_thread = this_thread->next;
                
+               if ((that_thread->state == CTDL_THREAD_STOP_REQ || that_thread->state == CTDL_THREAD_STOPPING)
+                       && (!citthread_equal(that_thread->tid, citthread_self())))
+                               that_thread->stop_ticker++;
+               
+               if (that_thread->stop_ticker == 5)
+               {
+                       CtdlLogPrintf(CTDL_DEBUG, "Thread System: The thread \"%s\" (0x%08lx) failed to self terminate withing 5 ticks. Canceling it.\n", that_thread->name, that_thread->tid);
+                       if ((that_thread->flags & CTDLTHREAD_WORKER) == 0)
+                               CtdlLogPrintf(CTDL_INFO, "Thread System: A non worker thread was canceled this may cause message loss.\n");
+                       that_thread->state = CTDL_THREAD_CANCELLED;
+                       that_thread->stop_ticker++;
+                       citthread_cancel(that_thread->tid);
+                       continue;
+               }
+               
                /* Do we need to clean up this thread? */
-               if (that_thread->state != CTDL_THREAD_EXITED)
+               if ((that_thread->state != CTDL_THREAD_EXITED) && (that_thread->state != CTDL_THREAD_CANCELLED))
                {
                        if(that_thread->flags & CTDLTHREAD_WORKER)
                                workers++;      /* Sanity check on number of worker threads */
@@ -690,7 +705,7 @@ void CtdlThreadGC (void)
                else if (ret == ESRCH)
                        CtdlLogPrintf(CTDL_DEBUG, "Garbage collection, no thread to join on.\n");
                else if (ret != 0)
-                       CtdlLogPrintf(CTDL_DEBUG, "Garbage collection, citthread_join returned an unknown error.\n");
+                       CtdlLogPrintf(CTDL_DEBUG, "Garbage collection, citthread_join returned an unknown error(%d).\n", ret);
                /*
                 * Now we own that thread entry
                 */
index 5a38316189311828408744cd85c9ebebdd7d1bc8..c9e683cf524e2adcf98c67222449347262ea0ee5 100644 (file)
@@ -56,6 +56,7 @@ struct CtdlThreadNode{
        void *user_args;                        /* Arguments passed to this threads work function */
        long flags;                             /* Flags that describe this thread */
        enum CtdlThreadState state;             /* Flag to show state of this thread */
+       int stop_ticker;                        /* A counter to determine how long it has taken for this thread to exit */
        citthread_mutex_t ThreadMutex;          /* A mutex to sync this thread to others if this thread allows (also used for sleeping) */
        citthread_cond_t ThreadCond;            /* A condition variable to sync this thread with others */
        citthread_mutex_t SleepMutex;           /* A mutex for sleeping */