a030842cdda96451683abef5946aa0b4a359d240
[citadel.git] / citadel / ecrash.c
1 /*
2  * File: eCrash.c
3  * @author David Frascone
4  * 
5  *  eCrash Implementation
6  *
7  *  eCrash will allow you to capture stack traces in the
8  *  event of a crash, and write those traces to disk, stdout,
9  *  or any other file handle.
10  *
11  *  modified to integrate closer into citadel by Wilfried Goesgens
12  *
13  * vim: ts=4
14  */
15
16 #include <stdio.h>
17 #include <unistd.h>
18 #include <stdlib.h>
19 #include <stdarg.h>
20 #include <string.h>
21 #include <fcntl.h>
22 #include <syslog.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <pthread.h>
26 #include "sysdep_decls.h"
27 #include "ecrash.h"
28
29 #define NIY()   printf("%s: Not Implemented Yet!\n", __FUNCTION__)
30 #ifdef HAVE_BACKTRACE
31 #include <execinfo.h>
32 static eCrashParameters gbl_params;
33
34 static int    gbl_backtraceEntries;
35 static void **gbl_backtraceBuffer;
36 static char **gbl_backtraceSymbols;
37 static int    gbl_backtraceDoneFlag = 0;
38
39 static void *stack_frames[50];
40 static size_t size, NThread;
41 static char **strings;
42 static char StrBuf[SIZ];
43
44 /* 
45  * Private structures for our thread list
46  */
47 typedef struct thread_list_node{
48         char *threadName;
49         pthread_t thread;
50         int backtraceSignal;
51         sighandler_t oldHandler;
52         struct thread_list_node *Next;
53 } ThreadListNode;
54
55 static pthread_mutex_t ThreadListMutex = PTHREAD_MUTEX_INITIALIZER;
56 static ThreadListNode *ThreadList = NULL;
57
58 /*********************************************************************
59  *********************************************************************
60  **     P  R  I  V  A  T  E      F  U  N  C  T  I  O  N  S
61  *********************************************************************
62  ********************************************************************/
63
64
65 /*!
66  * Insert a node into our threadList
67  *
68  * @param name   Text string indicating our thread
69  * @param thread Our Thread Id
70  * @param signo  Signal to create backtrace with
71  * @param old_handler Our old handler for signo
72  *
73  * @returns zero on success
74  */
75 static int addThreadToList(char *name, pthread_t thread,int signo,
76                                            sighandler_t old_handler)
77 {
78         ThreadListNode *node;
79
80         node = malloc(sizeof(ThreadListNode));
81         if (!node) return -1;
82
83         DPRINTF(ECRASH_DEBUG_VERBOSE,
84                                         "Adding thread 0x%08x (%s)\n", (unsigned int)thread, name);
85         node->threadName = strdup(name);
86         node->thread = thread;
87         node->backtraceSignal = signo;
88         node->oldHandler = old_handler;
89
90         /* And, add it to the list */
91         pthread_mutex_lock(&ThreadListMutex);
92         node->Next = ThreadList;
93         ThreadList = node;
94         pthread_mutex_unlock(&ThreadListMutex);
95         
96         return 0;
97
98 } // addThreadToList
99
100 /*!
101  * Remove a node from our threadList
102  *
103  * @param thread Our Thread Id
104  *
105  * @returns zero on success
106  */
107 static int removeThreadFromList(pthread_t thread)
108 {
109         ThreadListNode *Probe, *Prev=NULL;
110         ThreadListNode *Removed = NULL;
111
112         DPRINTF(ECRASH_DEBUG_VERBOSE,
113                                         "Removing thread 0x%08x from list . . .\n", (unsigned int)thread);
114         pthread_mutex_lock(&ThreadListMutex);
115         for (Probe=ThreadList;Probe != NULL; Probe = Probe->Next) {
116                 if (Probe->thread == thread) {
117                         // We found it!  Unlink it and move on!
118                         Removed = Probe;
119                         if (Prev == NULL) { // head of list
120                                 ThreadList = Probe->Next;
121                         } else {
122                                 // Prev != null, so we need to link around ourselves.
123                                 Prev->Next = Probe->Next;
124                         }
125                         Removed->Next = NULL;
126                         break;
127                 }
128
129                 Prev = Probe;
130         }
131         pthread_mutex_unlock(&ThreadListMutex);
132
133         // Now, if something is in Removed, free it, and return success
134         if (Removed) {
135             DPRINTF(ECRASH_DEBUG_VERBOSE,
136                                                 "   Found %s -- removing\n", Removed->threadName);
137                 // Reset the signal handler
138                 signal(Removed->backtraceSignal, Removed->oldHandler);
139
140                 // And free the allocated memory
141                 free (Removed->threadName);
142                 free (Removed);
143
144                 return 0;
145         } else {
146             DPRINTF(ECRASH_DEBUG_VERBOSE,
147                                                 "   Not Found\n");
148                 return -1; // Not Found
149         }
150 } // removeThreadFromList
151
152 /*!
153  * Print out a line of output to all our destinations
154  *
155  * One by one, output a line of text to all of our output destinations.
156  *
157  * Return failure if we fail to output to any of them.
158  *
159  * @param format   Normal printf style vararg format
160  *
161  * @returns nothing// bytes written, or error on failure.
162  */
163 static void outputPrintf(char *format, ...)
164 {
165         va_list ap;
166
167         va_start(ap, format);
168
169         if (enable_syslog)
170         {
171                 snprintf (StrBuf, SIZ, format, ap);
172                 syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StrBuf);
173         }
174         else
175                 lprintf(CTDL_EMERG, format, ap);
176
177 } // outputPrintf
178
179
180
181 /*!
182  * Dump our backtrace into a global location
183  *
184  * This function will dump out our backtrace into our
185  * global holding area.
186  *
187  */
188 static void createGlobalBacktrace( void )
189 {
190
191         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
192         if (enable_syslog)
193                 for (NThread = 0; NThread < size; NThread++) 
194                 {
195                         snprintf (StrBuf, SIZ, "RAW: %p  ", stack_frames[NThread]);
196                         syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StrBuf);
197                 }
198         else 
199                 for (NThread = 0; NThread < size; NThread++) 
200                         lprintf(1, "RAW: %p\n", stack_frames[NThread]);
201         strings = backtrace_symbols(stack_frames, size);
202         for (NThread = 0; NThread < size; NThread++) {
203                 if (strings != NULL) {
204                         if (enable_syslog)
205                         {// vsyslogs printf compliance sucks.
206                                 snprintf (StrBuf, SIZ, "RAW: %p  ", strings[NThread]);
207                                 syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StrBuf);
208                         }
209                         else
210                                 lprintf(1, "%s\n", strings[NThread]);
211                 }
212         }
213 } /* createGlobalBacktrace */
214 static void outputRawtrace( void )
215 {
216
217         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
218         if (enable_syslog)
219                 for (NThread = 0; NThread < size; NThread++) 
220                 {
221                         snprintf (StrBuf, SIZ, "RAW: %p  ", stack_frames[NThread]);
222                         syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StrBuf);
223                 }
224         else 
225                 for (NThread = 0; NThread < size; NThread++) 
226                         lprintf(1, "RAW: %p\n", stack_frames[NThread]);
227 } /* createGlobalBacktrace */
228
229 /*!
230  * Print out (to all the fds, etc), or global backtrace
231  */
232 static void outputGlobalBacktrace ( void )
233 {
234         int i;
235
236         for (i=0; i < gbl_backtraceEntries; i++) {
237                 if (gbl_backtraceSymbols != FALSE) {
238                         outputPrintf("*      Frame %02x: %s\n",
239                                      i, gbl_backtraceSymbols[i]);
240                 } else {
241                         outputPrintf("*      Frame %02x: %p\n", i,
242                                      gbl_backtraceBuffer[i]);
243                 }
244         }
245 } // outputGlobalBacktrace
246
247 /*!
248  * Output our current stack's backtrace
249  */
250 static void outputBacktrace( void )
251 {
252         createGlobalBacktrace();
253         outputGlobalBacktrace();
254 } /* outputBacktrace */
255
256 static void outputBacktraceThreads( void )
257 {
258         ThreadListNode *probe;
259         int i;
260
261         // When we're backtracing, don't worry about the mutex . . hopefully
262         // we're in a safe place.
263
264         for (probe=ThreadList; probe; probe=probe->Next) {
265                 gbl_backtraceDoneFlag = 0;
266                 pthread_kill(probe->thread, probe->backtraceSignal);
267                 for (i=0; i < gbl_params.threadWaitTime; i++) {
268                         if (gbl_backtraceDoneFlag)
269                                 break;
270                         sleep(1);
271                 }
272                 if (gbl_backtraceDoneFlag) {
273                         outputPrintf("*  Backtrace of \"%s\" (0x%08x)\n", 
274                                                  probe->threadName, (unsigned int)probe->thread);
275                         outputGlobalBacktrace();
276                 } else {
277                         outputPrintf("*  Error: unable to get backtrace of \"%s\" (0x%08x)\n", 
278                                                  probe->threadName, (unsigned int)probe->thread);
279                 }
280                 outputPrintf("*\n");
281         }
282 } // outputBacktraceThreads
283
284
285 /*!
286  * Handle signals (crash signals)
287  *
288  * This function will catch all crash signals, and will output the
289  * crash dump.  
290  *
291  * It will physically write (and sync) the current thread's information
292  * before it attempts to send signals to other threads.
293  * 
294  * @param signum Signal received.
295  */
296 static void crash_handler(int signo)
297 {
298         outputRawtrace();
299         outputPrintf("*********************************************************\n");
300         outputPrintf("*               eCrash Crash Handler\n");
301         outputPrintf("*********************************************************\n");
302         outputPrintf("*\n");
303         outputPrintf("*  Got a crash! signo=%d\n", signo);
304         outputPrintf("*\n");
305         outputPrintf("*  Offending Thread's Backtrace:\n");
306         outputPrintf("*\n");
307         outputBacktrace();
308         outputPrintf("*\n");
309
310         if (gbl_params.dumpAllThreads != FALSE) {
311                 outputBacktraceThreads();
312         }
313
314         outputPrintf("*\n");
315         outputPrintf("*********************************************************\n");
316         outputPrintf("*               eCrash Crash Handler\n");
317         outputPrintf("*********************************************************\n");
318
319         exit(signo);
320 } // crash_handler
321
322 /*!
323  * Handle signals (bt signals)
324  *
325  * This function shoudl be called to generate a crashdump into our
326  * global area.  Once the dump has been completed, this function will
327  * return after tickling a global.  Since mutexes are not async
328  * signal safe, the main thread, after signaling us to generate our
329  * own backtrace, will sleep for a few seconds waiting for us to complete.
330  *
331  * @param signum Signal received.
332  */
333 static void bt_handler(int signo)
334 {
335         createGlobalBacktrace();
336         gbl_backtraceDoneFlag=1;
337 } // bt_handler
338
339 /*!
340  * Validate a passed-in symbol table
341  *
342  * For now, just print it out (if verbose), and make sure it's
343  * sorted and none of the pointers are zero.
344  */
345 static int ValidateSymbolTable( void )
346 {
347         int i;
348         int rc=0;
349         unsigned long lastAddress =0;
350
351         // Get out of here if the table is empty
352         if (!gbl_params.symbolTable) return 0;
353
354         // Dump it in verbose mode
355         DPRINTF(ECRASH_DEBUG_VERBOSE,
356                                         "Symbol Table Provided with %d symbols\n",
357                                         gbl_params.symbolTable->numSymbols);
358         for (i=0; i < gbl_params.symbolTable->numSymbols; i++){
359                 // Dump it in verbose mode
360                 DPRINTF(ECRASH_DEBUG_VERBOSE, 
361                                 "%-30s %p\n",
362                                 gbl_params.symbolTable->symbols[i].function,
363                                 gbl_params.symbolTable->symbols[i].address);
364                 if (lastAddress >
365                     (unsigned long)gbl_params.symbolTable->symbols[i].address) {
366                         DPRINTF(ECRASH_DEBUG_ERROR,
367                                         "Error: symbol table is not sorted (last=%p, current=%p)\n",
368                                         (void *)lastAddress,
369                                         gbl_params.symbolTable->symbols[i].address);
370                         rc = -1;
371                 }
372
373         } // for
374
375         return rc;
376         
377 } // ValidateSymbolTable
378
379 /*********************************************************************
380  *********************************************************************
381  **      P  U  B  L  I  C      F  U  N  C  T  I  O  N  S
382  *********************************************************************
383  ********************************************************************/
384
385 /*!
386  * Initialize eCrash.
387  * 
388  * This function must be called before calling any other eCrash
389  * functions.  It sets up the global behavior of the system, and
390  * registers the calling thread for crash dumps.
391  *
392  * @param params Our input parameters.  The passed in structure will be copied.
393  *
394  * @return Zero on success.
395  */
396 int eCrash_Init(eCrashParameters *params)
397 {
398         int sigIndex;
399         int ret = 0;
400 #ifdef DO_SIGNALS_RIGHT
401         sigset_t blocked;
402         struct sigaction act;
403 #endif
404
405         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,"Init Starting params = %p\n", params);
406
407         // Allocate our backtrace area
408         gbl_backtraceBuffer = malloc(sizeof(void *) * (params->maxStackDepth+5));
409
410 #ifdef DO_SIGNALS_RIGHT
411         sigemptyset(&blocked);
412         act.sa_sigaction = crash_handler;
413         act.sa_mask = blocked;
414         act.sa_flags = SA_SIGINFO;
415 #endif
416
417         if (params != NULL) {
418                 // Make ourselves a global copy of params.
419                 gbl_params = *params;
420                 gbl_params.filename = strdup(params->filename);
421
422                 // Set our defaults, if they weren't specified
423                 if (gbl_params.maxStackDepth == 0 )
424                         gbl_params.maxStackDepth = ECRASH_DEFAULT_STACK_DEPTH;
425
426                 if (gbl_params.defaultBacktraceSignal == 0 )
427                         gbl_params.defaultBacktraceSignal = ECRASH_DEFAULT_BACKTRACE_SIGNAL;
428
429                 if (gbl_params.threadWaitTime == 0 )
430                         gbl_params.threadWaitTime = ECRASH_DEFAULT_THREAD_WAIT_TIME;
431
432                 if (gbl_params.debugLevel == 0 )
433                         gbl_params.debugLevel = ECRASH_DEBUG_DEFAULT;
434
435                 // Copy our symbol table
436                 if (gbl_params.symbolTable) {
437                     DPRINTF(ECRASH_DEBUG_VERBOSE,
438                                                         "symbolTable @ %p -- %d symbols\n", gbl_params.symbolTable,
439                                                 gbl_params.symbolTable->numSymbols);
440                         // Make a copy of our symbol table
441                         gbl_params.symbolTable = malloc(sizeof(eCrashSymbolTable));
442                         memcpy(gbl_params.symbolTable, params->symbolTable,
443                                    sizeof(eCrashSymbolTable));
444
445                         // Now allocate / copy the actual table.
446                         gbl_params.symbolTable->symbols = malloc(sizeof(eCrashSymbol) *
447                                                                      gbl_params.symbolTable->numSymbols);
448                         memcpy(gbl_params.symbolTable->symbols,
449                                    params->symbolTable->symbols,
450                                    sizeof(eCrashSymbol) * gbl_params.symbolTable->numSymbols);
451
452                         ValidateSymbolTable();
453                 }
454         
455                 // And, finally, register for our signals
456                 for (sigIndex=0; gbl_params.signals[sigIndex] != 0; sigIndex++) {
457                         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,
458                                                         "   Catching signal[%d] %d\n", sigIndex,
459                                         gbl_params.signals[sigIndex]);
460
461                         // I know there's a better way to catch signals with pthreads.
462                         // I'll do it later TODO
463                         signal(gbl_params.signals[sigIndex], crash_handler);
464                 }
465         } else {
466                 DPRINTF(ECRASH_DEBUG_ERROR, "   Error:  Null Params!\n");
467                 ret = -1;
468         }
469         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE, "Init Complete ret=%d\n", ret);
470         return ret;
471 } /* eCrash_Init */
472
473 /*!
474  * UnInitialize eCrash.
475  * 
476  * This function may be called to de-activate eCrash, release the
477  * signal handlers, and free any memory allocated by eCrash.
478  *
479  * @return Zero on success.
480  */
481 int eCrash_Uninit( void )
482 {
483         NIY();
484
485         return 0;
486 } /* eCrash_Uninit */
487
488 /*!
489  * Register a thread for backtracing on crash.
490  * 
491  * This function must be called by any thread wanting it's stack
492  * dumped in the event of a crash.  The thread my specify what 
493  * signal should be used, or the default, SIGUSR1 will be used.
494  *
495  * @param signo Signal to use to generate dump (default: SIGUSR1)
496  *
497  * @return Zero on success.
498  */
499 int eCrash_RegisterThread(char *name, int signo)
500 {
501         sighandler_t old_handler;
502
503         // Register for our signal
504         if (signo == 0) {
505                 signo = gbl_params.defaultBacktraceSignal;
506         }
507
508         old_handler = signal(signo, bt_handler);
509         return addThreadToList(name, pthread_self(), signo, old_handler);
510
511 } /* eCrash_RegisterThread */
512
513 /*!
514  * Un-register a thread for stack dumps.
515  * 
516  * This function may be called to un-register any previously 
517  * registered thread.
518  *
519  * @return Zero on success.
520  */
521 int eCrash_UnregisterThread( void )
522 {
523         return removeThreadFromList(pthread_self());
524 } /* eCrash_UnregisterThread */
525
526 #endif