Began removing $Id$ tags. This will be an ongoing process.
[citadel.git] / citadel / ecrash.c
1 /*
2  * author: David Frascone
3  * 
4  * eCrash Implementation
5  *
6  * eCrash will allow you to capture stack traces in the
7  * event of a crash, and write those traces to disk, stdout,
8  * or any other file handle.
9  *
10  * modified to integrate closer into citadel by Wilfried Goesgens
11  *
12  * vim: ts=4
13  *
14  * This program is free software; you can redistribute it and/or modify
15  * it under the terms of the GNU General Public License as published by
16  * the Free Software Foundation; either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * This program is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22  * GNU General Public License for more details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with this program; if not, write to the Free Software
26  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27  */
28
29 #include "sysdep.h"
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <stdlib.h>
33 #include <stdarg.h>
34 #include <string.h>
35 #include <fcntl.h>
36 #include <syslog.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <pthread.h>
40 #include <libcitadel.h>
41 #include "server.h"
42 #include "sysdep_decls.h"
43 #include "support.h"
44 #include "config.h"
45 #include "citserver.h"
46 #include "ecrash.h"
47
48 #define NIY()   printf("%s: Not Implemented Yet!\n", __FUNCTION__)
49 #ifdef HAVE_BACKTRACE
50 #include <execinfo.h>
51 static eCrashParameters gbl_params;
52
53 static int    gbl_backtraceEntries;
54 static void **gbl_backtraceBuffer;
55 static char **gbl_backtraceSymbols;
56 static int    gbl_backtraceDoneFlag = 0;
57
58 static void *stack_frames[50];
59 static size_t size, NThread;
60 static char **strings;
61 static char StaticBuf[SIZ];
62
63 /* 
64  * Private structures for our thread list
65  */
66 typedef struct thread_list_node{
67         char *threadName;
68         pthread_t thread;
69         int backtraceSignal;
70         sighandler_t oldHandler;
71         struct thread_list_node *Next;
72 } ThreadListNode;
73
74 static pthread_mutex_t ThreadListMutex = PTHREAD_MUTEX_INITIALIZER;
75 static ThreadListNode *ThreadList = NULL;
76
77 /*********************************************************************
78  *********************************************************************
79  **     P  R  I  V  A  T  E      F  U  N  C  T  I  O  N  S
80  *********************************************************************
81  ********************************************************************/
82
83
84 /*!
85  * Insert a node into our threadList
86  *
87  * @param name   Text string indicating our thread
88  * @param thread Our Thread Id
89  * @param signo  Signal to create backtrace with
90  * @param old_handler Our old handler for signo
91  *
92  * @returns zero on success
93  */
94 static int addThreadToList(char *name, pthread_t thread,int signo,
95                                            sighandler_t old_handler)
96 {
97         ThreadListNode *node;
98
99         node = malloc(sizeof(ThreadListNode));
100         if (!node) return -1;
101
102         DPRINTF(ECRASH_DEBUG_VERBOSE,
103                                         "Adding thread 0x%08x (%s)\n", (unsigned int)thread, name);
104         node->threadName = strdup(name);
105         node->thread = thread;
106         node->backtraceSignal = signo;
107         node->oldHandler = old_handler;
108
109         /* And, add it to the list */
110         pthread_mutex_lock(&ThreadListMutex);
111         node->Next = ThreadList;
112         ThreadList = node;
113         pthread_mutex_unlock(&ThreadListMutex);
114         
115         return 0;
116
117 } // addThreadToList
118
119 /*!
120  * Remove a node from our threadList
121  *
122  * @param thread Our Thread Id
123  *
124  * @returns zero on success
125  */
126 static int removeThreadFromList(pthread_t thread)
127 {
128         ThreadListNode *Probe, *Prev=NULL;
129         ThreadListNode *Removed = NULL;
130
131         DPRINTF(ECRASH_DEBUG_VERBOSE,
132                                         "Removing thread 0x%08x from list . . .\n", (unsigned int)thread);
133         pthread_mutex_lock(&ThreadListMutex);
134         for (Probe=ThreadList;Probe != NULL; Probe = Probe->Next) {
135                 if (Probe->thread == thread) {
136                         // We found it!  Unlink it and move on!
137                         Removed = Probe;
138                         if (Prev == NULL) { // head of list
139                                 ThreadList = Probe->Next;
140                         } else {
141                                 // Prev != null, so we need to link around ourselves.
142                                 Prev->Next = Probe->Next;
143                         }
144                         Removed->Next = NULL;
145                         break;
146                 }
147
148                 Prev = Probe;
149         }
150         pthread_mutex_unlock(&ThreadListMutex);
151
152         // Now, if something is in Removed, free it, and return success
153         if (Removed) {
154             DPRINTF(ECRASH_DEBUG_VERBOSE,
155                                                 "   Found %s -- removing\n", Removed->threadName);
156                 // Reset the signal handler
157                 signal(Removed->backtraceSignal, Removed->oldHandler);
158
159                 // And free the allocated memory
160                 free (Removed->threadName);
161                 free (Removed);
162
163                 return 0;
164         } else {
165             DPRINTF(ECRASH_DEBUG_VERBOSE,
166                                                 "   Not Found\n");
167                 return -1; // Not Found
168         }
169 } // removeThreadFromList
170
171 /*!
172  * Print out a line of output to all our destinations
173  *
174  * One by one, output a line of text to all of our output destinations.
175  *
176  * Return failure if we fail to output to any of them.
177  *
178  * @param format   Normal printf style vararg format
179  *
180  * @returns nothing// bytes written, or error on failure.
181  */
182 static void outputPrintf(char *format, ...)
183 {
184         va_list ap;
185
186         va_start(ap, format);
187
188         if (enable_syslog)
189         {
190                 snprintf (StaticBuf, SIZ, format, ap);
191                 syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
192         }
193         else
194                 CtdlLogPrintf(CTDL_EMERG, format, ap);
195
196 } // outputPrintf
197
198
199
200 /*!
201  * Dump our backtrace into a global location
202  *
203  * This function will dump out our backtrace into our
204  * global holding area.
205  *
206  */
207 static void createGlobalBacktrace( void )
208 {
209
210         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
211         if (enable_syslog)
212                 for (NThread = 0; NThread < size; NThread++) 
213                 {
214                         snprintf (StaticBuf, SIZ, "RAW: %p  ", stack_frames[NThread]);
215                         syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
216                 }
217         else 
218                 for (NThread = 0; NThread < size; NThread++) 
219                         CtdlLogPrintf(1, "RAW: %p\n", stack_frames[NThread]);
220         strings = backtrace_symbols(stack_frames, size);
221         for (NThread = 0; NThread < size; NThread++) {
222                 if (strings != NULL) {
223                         if (enable_syslog)
224                         {// vsyslogs printf compliance sucks.
225                                 snprintf (StaticBuf, SIZ, "RAW: %p  ", strings[NThread]);
226                                 syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
227                         }
228                         else
229                                 CtdlLogPrintf(1, "%s\n", strings[NThread]);
230                 }
231         }
232 } /* createGlobalBacktrace */
233 static void outputRawtrace( void )
234 {
235
236         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
237         if (enable_syslog)
238                 for (NThread = 0; NThread < size; NThread++) 
239                 {
240                         snprintf (StaticBuf, SIZ, "RAW: %p  ", stack_frames[NThread]);
241                         syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
242                 }
243         else 
244                 for (NThread = 0; NThread < size; NThread++) 
245                         CtdlLogPrintf(1, "RAW: %p\n", stack_frames[NThread]);
246 } /* createGlobalBacktrace */
247
248 /*!
249  * Print out (to all the fds, etc), or global backtrace
250  */
251 static void outputGlobalBacktrace ( void )
252 {
253         int i;
254
255         for (i=0; i < gbl_backtraceEntries; i++) {
256                 if (gbl_backtraceSymbols != FALSE) {
257                         outputPrintf("*      Frame %02x: %s\n",
258                                      i, gbl_backtraceSymbols[i]);
259                 } else {
260                         outputPrintf("*      Frame %02x: %p\n", i,
261                                      gbl_backtraceBuffer[i]);
262                 }
263         }
264 } // outputGlobalBacktrace
265
266 /*!
267  * Output our current stack's backtrace
268  */
269 static void outputBacktrace( void )
270 {
271         createGlobalBacktrace();
272         outputGlobalBacktrace();
273 } /* outputBacktrace */
274
275 static void outputBacktraceThreads( void )
276 {
277         ThreadListNode *probe;
278         int i;
279
280         // When we're backtracing, don't worry about the mutex . . hopefully
281         // we're in a safe place.
282
283         for (probe=ThreadList; probe; probe=probe->Next) {
284                 gbl_backtraceDoneFlag = 0;
285                 pthread_kill(probe->thread, probe->backtraceSignal);
286                 for (i=0; i < gbl_params.threadWaitTime; i++) {
287                         if (gbl_backtraceDoneFlag)
288                                 break;
289                         sleep(1);
290                 }
291                 if (gbl_backtraceDoneFlag) {
292                         outputPrintf("*  Backtrace of \"%s\" (0x%08x)\n", 
293                                                  probe->threadName, (unsigned int)probe->thread);
294                         outputGlobalBacktrace();
295                 } else {
296                         outputPrintf("*  Error: unable to get backtrace of \"%s\" (0x%08x)\n", 
297                                                  probe->threadName, (unsigned int)probe->thread);
298                 }
299                 outputPrintf("*\n");
300         }
301 } // outputBacktraceThreads
302
303
304 /*!
305  * Handle signals (crash signals)
306  *
307  * This function will catch all crash signals, and will output the
308  * crash dump.  
309  *
310  * It will physically write (and sync) the current thread's information
311  * before it attempts to send signals to other threads.
312  * 
313  * @param signum Signal received.
314  */
315 static void crash_handler(int signo)
316 {
317         outputRawtrace();
318         outputPrintf("*********************************************************\n");
319         outputPrintf("*               eCrash Crash Handler\n");
320         outputPrintf("*********************************************************\n");
321         outputPrintf("*\n");
322         outputPrintf("*  Got a crash! signo=%d\n", signo);
323         outputPrintf("*\n");
324         outputPrintf("*  Offending Thread's Backtrace:\n");
325         outputPrintf("*\n");
326         outputBacktrace();
327         outputPrintf("*\n");
328
329         if (gbl_params.dumpAllThreads != FALSE) {
330                 outputBacktraceThreads();
331         }
332
333         outputPrintf("*\n");
334         outputPrintf("*********************************************************\n");
335         outputPrintf("*               eCrash Crash Handler\n");
336         outputPrintf("*********************************************************\n");
337
338         exit(signo);
339 } // crash_handler
340
341 /*!
342  * Handle signals (bt signals)
343  *
344  * This function shoudl be called to generate a crashdump into our
345  * global area.  Once the dump has been completed, this function will
346  * return after tickling a global.  Since mutexes are not async
347  * signal safe, the main thread, after signaling us to generate our
348  * own backtrace, will sleep for a few seconds waiting for us to complete.
349  *
350  * @param signum Signal received.
351  */
352 static void bt_handler(int signo)
353 {
354         createGlobalBacktrace();
355         gbl_backtraceDoneFlag=1;
356 } // bt_handler
357
358 /*!
359  * Validate a passed-in symbol table
360  *
361  * For now, just print it out (if verbose), and make sure it's
362  * sorted and none of the pointers are zero.
363  */
364 static int ValidateSymbolTable( void )
365 {
366         int i;
367         int rc=0;
368         unsigned long lastAddress =0;
369
370         // Get out of here if the table is empty
371         if (!gbl_params.symbolTable) return 0;
372
373         // Dump it in verbose mode
374         DPRINTF(ECRASH_DEBUG_VERBOSE,
375                                         "Symbol Table Provided with %d symbols\n",
376                                         gbl_params.symbolTable->numSymbols);
377         for (i=0; i < gbl_params.symbolTable->numSymbols; i++){
378                 // Dump it in verbose mode
379                 DPRINTF(ECRASH_DEBUG_VERBOSE, 
380                                 "%-30s %p\n",
381                                 gbl_params.symbolTable->symbols[i].function,
382                                 gbl_params.symbolTable->symbols[i].address);
383                 if (lastAddress >
384                     (unsigned long)gbl_params.symbolTable->symbols[i].address) {
385                         DPRINTF(ECRASH_DEBUG_ERROR,
386                                         "Error: symbol table is not sorted (last=%p, current=%p)\n",
387                                         (void *)lastAddress,
388                                         gbl_params.symbolTable->symbols[i].address);
389                         rc = -1;
390                 }
391
392         } // for
393
394         return rc;
395         
396 } // ValidateSymbolTable
397
398 /*********************************************************************
399  *********************************************************************
400  **      P  U  B  L  I  C      F  U  N  C  T  I  O  N  S
401  *********************************************************************
402  ********************************************************************/
403
404 /*!
405  * Initialize eCrash.
406  * 
407  * This function must be called before calling any other eCrash
408  * functions.  It sets up the global behavior of the system, and
409  * registers the calling thread for crash dumps.
410  *
411  * @param params Our input parameters.  The passed in structure will be copied.
412  *
413  * @return Zero on success.
414  */
415 int eCrash_Init(eCrashParameters *params)
416 {
417         int sigIndex;
418         int ret = 0;
419 #ifdef DO_SIGNALS_RIGHT
420         sigset_t blocked;
421         struct sigaction act;
422 #endif
423
424         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,"Init Starting params = %p\n", params);
425
426         // Allocate our backtrace area
427         gbl_backtraceBuffer = malloc(sizeof(void *) * (params->maxStackDepth+5));
428
429 #ifdef DO_SIGNALS_RIGHT
430         sigemptyset(&blocked);
431         act.sa_sigaction = crash_handler;
432         act.sa_mask = blocked;
433         act.sa_flags = SA_SIGINFO;
434 #endif
435
436         if (params != NULL) {
437                 // Make ourselves a global copy of params.
438                 gbl_params = *params;
439                 gbl_params.filename = strdup(params->filename);
440
441                 // Set our defaults, if they weren't specified
442                 if (gbl_params.maxStackDepth == 0 )
443                         gbl_params.maxStackDepth = ECRASH_DEFAULT_STACK_DEPTH;
444
445                 if (gbl_params.defaultBacktraceSignal == 0 )
446                         gbl_params.defaultBacktraceSignal = ECRASH_DEFAULT_BACKTRACE_SIGNAL;
447
448                 if (gbl_params.threadWaitTime == 0 )
449                         gbl_params.threadWaitTime = ECRASH_DEFAULT_THREAD_WAIT_TIME;
450
451                 if (gbl_params.debugLevel == 0 )
452                         gbl_params.debugLevel = ECRASH_DEBUG_DEFAULT;
453
454                 // Copy our symbol table
455                 if (gbl_params.symbolTable) {
456                     DPRINTF(ECRASH_DEBUG_VERBOSE,
457                                                         "symbolTable @ %p -- %d symbols\n", gbl_params.symbolTable,
458                                                 gbl_params.symbolTable->numSymbols);
459                         // Make a copy of our symbol table
460                         gbl_params.symbolTable = malloc(sizeof(eCrashSymbolTable));
461                         memcpy(gbl_params.symbolTable, params->symbolTable,
462                                    sizeof(eCrashSymbolTable));
463
464                         // Now allocate / copy the actual table.
465                         gbl_params.symbolTable->symbols = malloc(sizeof(eCrashSymbol) *
466                                                                      gbl_params.symbolTable->numSymbols);
467                         memcpy(gbl_params.symbolTable->symbols,
468                                    params->symbolTable->symbols,
469                                    sizeof(eCrashSymbol) * gbl_params.symbolTable->numSymbols);
470
471                         ValidateSymbolTable();
472                 }
473         
474                 // And, finally, register for our signals
475                 for (sigIndex=0; gbl_params.signals[sigIndex] != 0; sigIndex++) {
476                         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,
477                                                         "   Catching signal[%d] %d\n", sigIndex,
478                                         gbl_params.signals[sigIndex]);
479
480                         // I know there's a better way to catch signals with pthreads.
481                         // I'll do it later TODO
482                         signal(gbl_params.signals[sigIndex], crash_handler);
483                 }
484         } else {
485                 DPRINTF(ECRASH_DEBUG_ERROR, "   Error:  Null Params!\n");
486                 ret = -1;
487         }
488         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE, "Init Complete ret=%d\n", ret);
489         return ret;
490 } /* eCrash_Init */
491
492 /*!
493  * UnInitialize eCrash.
494  * 
495  * This function may be called to de-activate eCrash, release the
496  * signal handlers, and free any memory allocated by eCrash.
497  *
498  * @return Zero on success.
499  */
500 int eCrash_Uninit( void )
501 {
502         NIY();
503
504         return 0;
505 } /* eCrash_Uninit */
506
507 /*!
508  * Register a thread for backtracing on crash.
509  * 
510  * This function must be called by any thread wanting it's stack
511  * dumped in the event of a crash.  The thread my specify what 
512  * signal should be used, or the default, SIGUSR1 will be used.
513  *
514  * @param signo Signal to use to generate dump (default: SIGUSR1)
515  *
516  * @return Zero on success.
517  */
518 int eCrash_RegisterThread(char *name, int signo)
519 {
520         sighandler_t old_handler;
521
522         // Register for our signal
523         if (signo == 0) {
524                 signo = gbl_params.defaultBacktraceSignal;
525         }
526
527         old_handler = signal(signo, bt_handler);
528         return addThreadToList(name, pthread_self(), signo, old_handler);
529
530 } /* eCrash_RegisterThread */
531
532 /*!
533  * Un-register a thread for stack dumps.
534  * 
535  * This function may be called to un-register any previously 
536  * registered thread.
537  *
538  * @return Zero on success.
539  */
540 int eCrash_UnregisterThread( void )
541 {
542         return removeThreadFromList(pthread_self());
543 } /* eCrash_UnregisterThread */
544
545 #endif