6a8ef3a0c1ce905151df7664d5f85ed6ec514474
[citadel.git] / citadel / ecrash.c
1 /*
2  * File: eCrash.c
3  * @author David Frascone
4  * 
5  *  eCrash Implementation
6  *
7  *  eCrash will allow you to capture stack traces in the
8  *  event of a crash, and write those traces to disk, stdout,
9  *  or any other file handle.
10  *
11  *  modified to integrate closer into citadel by Wilfried Goesgens
12  *
13  * vim: ts=4
14  */
15
16 #include "sysdep.h"
17 #include <stdio.h>
18 #include <unistd.h>
19 #include <stdlib.h>
20 #include <stdarg.h>
21 #include <string.h>
22 #include <fcntl.h>
23 #include <syslog.h>
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <pthread.h>
27 #include <libcitadel.h>
28 #include "server.h"
29 #include "sysdep_decls.h"
30 #include "support.h"
31 #include "config.h"
32 #include "citserver.h"
33 #include "ecrash.h"
34
35 #define NIY()   printf("%s: Not Implemented Yet!\n", __FUNCTION__)
36 #ifdef HAVE_BACKTRACE
37 #include <execinfo.h>
38 static eCrashParameters gbl_params;
39
40 static int    gbl_backtraceEntries;
41 static void **gbl_backtraceBuffer;
42 static char **gbl_backtraceSymbols;
43 static int    gbl_backtraceDoneFlag = 0;
44
45 static void *stack_frames[50];
46 static size_t size, NThread;
47 static char **strings;
48 static char StaticBuf[SIZ];
49
50 /* 
51  * Private structures for our thread list
52  */
53 typedef struct thread_list_node{
54         char *threadName;
55         pthread_t thread;
56         int backtraceSignal;
57         sighandler_t oldHandler;
58         struct thread_list_node *Next;
59 } ThreadListNode;
60
61 static pthread_mutex_t ThreadListMutex = PTHREAD_MUTEX_INITIALIZER;
62 static ThreadListNode *ThreadList = NULL;
63
64 /*********************************************************************
65  *********************************************************************
66  **     P  R  I  V  A  T  E      F  U  N  C  T  I  O  N  S
67  *********************************************************************
68  ********************************************************************/
69
70
71 /*!
72  * Insert a node into our threadList
73  *
74  * @param name   Text string indicating our thread
75  * @param thread Our Thread Id
76  * @param signo  Signal to create backtrace with
77  * @param old_handler Our old handler for signo
78  *
79  * @returns zero on success
80  */
81 static int addThreadToList(char *name, pthread_t thread,int signo,
82                                            sighandler_t old_handler)
83 {
84         ThreadListNode *node;
85
86         node = malloc(sizeof(ThreadListNode));
87         if (!node) return -1;
88
89         DPRINTF(ECRASH_DEBUG_VERBOSE,
90                                         "Adding thread 0x%08x (%s)\n", (unsigned int)thread, name);
91         node->threadName = strdup(name);
92         node->thread = thread;
93         node->backtraceSignal = signo;
94         node->oldHandler = old_handler;
95
96         /* And, add it to the list */
97         pthread_mutex_lock(&ThreadListMutex);
98         node->Next = ThreadList;
99         ThreadList = node;
100         pthread_mutex_unlock(&ThreadListMutex);
101         
102         return 0;
103
104 } // addThreadToList
105
106 /*!
107  * Remove a node from our threadList
108  *
109  * @param thread Our Thread Id
110  *
111  * @returns zero on success
112  */
113 static int removeThreadFromList(pthread_t thread)
114 {
115         ThreadListNode *Probe, *Prev=NULL;
116         ThreadListNode *Removed = NULL;
117
118         DPRINTF(ECRASH_DEBUG_VERBOSE,
119                                         "Removing thread 0x%08x from list . . .\n", (unsigned int)thread);
120         pthread_mutex_lock(&ThreadListMutex);
121         for (Probe=ThreadList;Probe != NULL; Probe = Probe->Next) {
122                 if (Probe->thread == thread) {
123                         // We found it!  Unlink it and move on!
124                         Removed = Probe;
125                         if (Prev == NULL) { // head of list
126                                 ThreadList = Probe->Next;
127                         } else {
128                                 // Prev != null, so we need to link around ourselves.
129                                 Prev->Next = Probe->Next;
130                         }
131                         Removed->Next = NULL;
132                         break;
133                 }
134
135                 Prev = Probe;
136         }
137         pthread_mutex_unlock(&ThreadListMutex);
138
139         // Now, if something is in Removed, free it, and return success
140         if (Removed) {
141             DPRINTF(ECRASH_DEBUG_VERBOSE,
142                                                 "   Found %s -- removing\n", Removed->threadName);
143                 // Reset the signal handler
144                 signal(Removed->backtraceSignal, Removed->oldHandler);
145
146                 // And free the allocated memory
147                 free (Removed->threadName);
148                 free (Removed);
149
150                 return 0;
151         } else {
152             DPRINTF(ECRASH_DEBUG_VERBOSE,
153                                                 "   Not Found\n");
154                 return -1; // Not Found
155         }
156 } // removeThreadFromList
157
158 /*!
159  * Print out a line of output to all our destinations
160  *
161  * One by one, output a line of text to all of our output destinations.
162  *
163  * Return failure if we fail to output to any of them.
164  *
165  * @param format   Normal printf style vararg format
166  *
167  * @returns nothing// bytes written, or error on failure.
168  */
169 static void outputPrintf(char *format, ...)
170 {
171         va_list ap;
172
173         va_start(ap, format);
174
175         if (enable_syslog)
176         {
177                 snprintf (StaticBuf, SIZ, format, ap);
178                 syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
179         }
180         else
181                 CtdlLogPrintf(CTDL_EMERG, format, ap);
182
183 } // outputPrintf
184
185
186
187 /*!
188  * Dump our backtrace into a global location
189  *
190  * This function will dump out our backtrace into our
191  * global holding area.
192  *
193  */
194 static void createGlobalBacktrace( void )
195 {
196
197         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
198         if (enable_syslog)
199                 for (NThread = 0; NThread < size; NThread++) 
200                 {
201                         snprintf (StaticBuf, SIZ, "RAW: %p  ", stack_frames[NThread]);
202                         syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
203                 }
204         else 
205                 for (NThread = 0; NThread < size; NThread++) 
206                         CtdlLogPrintf(1, "RAW: %p\n", stack_frames[NThread]);
207         strings = backtrace_symbols(stack_frames, size);
208         for (NThread = 0; NThread < size; NThread++) {
209                 if (strings != NULL) {
210                         if (enable_syslog)
211                         {// vsyslogs printf compliance sucks.
212                                 snprintf (StaticBuf, SIZ, "RAW: %p  ", strings[NThread]);
213                                 syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
214                         }
215                         else
216                                 CtdlLogPrintf(1, "%s\n", strings[NThread]);
217                 }
218         }
219 } /* createGlobalBacktrace */
220 static void outputRawtrace( void )
221 {
222
223         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
224         if (enable_syslog)
225                 for (NThread = 0; NThread < size; NThread++) 
226                 {
227                         snprintf (StaticBuf, SIZ, "RAW: %p  ", stack_frames[NThread]);
228                         syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
229                 }
230         else 
231                 for (NThread = 0; NThread < size; NThread++) 
232                         CtdlLogPrintf(1, "RAW: %p\n", stack_frames[NThread]);
233 } /* createGlobalBacktrace */
234
235 /*!
236  * Print out (to all the fds, etc), or global backtrace
237  */
238 static void outputGlobalBacktrace ( void )
239 {
240         int i;
241
242         for (i=0; i < gbl_backtraceEntries; i++) {
243                 if (gbl_backtraceSymbols != FALSE) {
244                         outputPrintf("*      Frame %02x: %s\n",
245                                      i, gbl_backtraceSymbols[i]);
246                 } else {
247                         outputPrintf("*      Frame %02x: %p\n", i,
248                                      gbl_backtraceBuffer[i]);
249                 }
250         }
251 } // outputGlobalBacktrace
252
253 /*!
254  * Output our current stack's backtrace
255  */
256 static void outputBacktrace( void )
257 {
258         createGlobalBacktrace();
259         outputGlobalBacktrace();
260 } /* outputBacktrace */
261
262 static void outputBacktraceThreads( void )
263 {
264         ThreadListNode *probe;
265         int i;
266
267         // When we're backtracing, don't worry about the mutex . . hopefully
268         // we're in a safe place.
269
270         for (probe=ThreadList; probe; probe=probe->Next) {
271                 gbl_backtraceDoneFlag = 0;
272                 pthread_kill(probe->thread, probe->backtraceSignal);
273                 for (i=0; i < gbl_params.threadWaitTime; i++) {
274                         if (gbl_backtraceDoneFlag)
275                                 break;
276                         sleep(1);
277                 }
278                 if (gbl_backtraceDoneFlag) {
279                         outputPrintf("*  Backtrace of \"%s\" (0x%08x)\n", 
280                                                  probe->threadName, (unsigned int)probe->thread);
281                         outputGlobalBacktrace();
282                 } else {
283                         outputPrintf("*  Error: unable to get backtrace of \"%s\" (0x%08x)\n", 
284                                                  probe->threadName, (unsigned int)probe->thread);
285                 }
286                 outputPrintf("*\n");
287         }
288 } // outputBacktraceThreads
289
290
291 /*!
292  * Handle signals (crash signals)
293  *
294  * This function will catch all crash signals, and will output the
295  * crash dump.  
296  *
297  * It will physically write (and sync) the current thread's information
298  * before it attempts to send signals to other threads.
299  * 
300  * @param signum Signal received.
301  */
302 static void crash_handler(int signo)
303 {
304         outputRawtrace();
305         outputPrintf("*********************************************************\n");
306         outputPrintf("*               eCrash Crash Handler\n");
307         outputPrintf("*********************************************************\n");
308         outputPrintf("*\n");
309         outputPrintf("*  Got a crash! signo=%d\n", signo);
310         outputPrintf("*\n");
311         outputPrintf("*  Offending Thread's Backtrace:\n");
312         outputPrintf("*\n");
313         outputBacktrace();
314         outputPrintf("*\n");
315
316         if (gbl_params.dumpAllThreads != FALSE) {
317                 outputBacktraceThreads();
318         }
319
320         outputPrintf("*\n");
321         outputPrintf("*********************************************************\n");
322         outputPrintf("*               eCrash Crash Handler\n");
323         outputPrintf("*********************************************************\n");
324
325         exit(signo);
326 } // crash_handler
327
328 /*!
329  * Handle signals (bt signals)
330  *
331  * This function shoudl be called to generate a crashdump into our
332  * global area.  Once the dump has been completed, this function will
333  * return after tickling a global.  Since mutexes are not async
334  * signal safe, the main thread, after signaling us to generate our
335  * own backtrace, will sleep for a few seconds waiting for us to complete.
336  *
337  * @param signum Signal received.
338  */
339 static void bt_handler(int signo)
340 {
341         createGlobalBacktrace();
342         gbl_backtraceDoneFlag=1;
343 } // bt_handler
344
345 /*!
346  * Validate a passed-in symbol table
347  *
348  * For now, just print it out (if verbose), and make sure it's
349  * sorted and none of the pointers are zero.
350  */
351 static int ValidateSymbolTable( void )
352 {
353         int i;
354         int rc=0;
355         unsigned long lastAddress =0;
356
357         // Get out of here if the table is empty
358         if (!gbl_params.symbolTable) return 0;
359
360         // Dump it in verbose mode
361         DPRINTF(ECRASH_DEBUG_VERBOSE,
362                                         "Symbol Table Provided with %d symbols\n",
363                                         gbl_params.symbolTable->numSymbols);
364         for (i=0; i < gbl_params.symbolTable->numSymbols; i++){
365                 // Dump it in verbose mode
366                 DPRINTF(ECRASH_DEBUG_VERBOSE, 
367                                 "%-30s %p\n",
368                                 gbl_params.symbolTable->symbols[i].function,
369                                 gbl_params.symbolTable->symbols[i].address);
370                 if (lastAddress >
371                     (unsigned long)gbl_params.symbolTable->symbols[i].address) {
372                         DPRINTF(ECRASH_DEBUG_ERROR,
373                                         "Error: symbol table is not sorted (last=%p, current=%p)\n",
374                                         (void *)lastAddress,
375                                         gbl_params.symbolTable->symbols[i].address);
376                         rc = -1;
377                 }
378
379         } // for
380
381         return rc;
382         
383 } // ValidateSymbolTable
384
385 /*********************************************************************
386  *********************************************************************
387  **      P  U  B  L  I  C      F  U  N  C  T  I  O  N  S
388  *********************************************************************
389  ********************************************************************/
390
391 /*!
392  * Initialize eCrash.
393  * 
394  * This function must be called before calling any other eCrash
395  * functions.  It sets up the global behavior of the system, and
396  * registers the calling thread for crash dumps.
397  *
398  * @param params Our input parameters.  The passed in structure will be copied.
399  *
400  * @return Zero on success.
401  */
402 int eCrash_Init(eCrashParameters *params)
403 {
404         int sigIndex;
405         int ret = 0;
406 #ifdef DO_SIGNALS_RIGHT
407         sigset_t blocked;
408         struct sigaction act;
409 #endif
410
411         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,"Init Starting params = %p\n", params);
412
413         // Allocate our backtrace area
414         gbl_backtraceBuffer = malloc(sizeof(void *) * (params->maxStackDepth+5));
415
416 #ifdef DO_SIGNALS_RIGHT
417         sigemptyset(&blocked);
418         act.sa_sigaction = crash_handler;
419         act.sa_mask = blocked;
420         act.sa_flags = SA_SIGINFO;
421 #endif
422
423         if (params != NULL) {
424                 // Make ourselves a global copy of params.
425                 gbl_params = *params;
426                 gbl_params.filename = strdup(params->filename);
427
428                 // Set our defaults, if they weren't specified
429                 if (gbl_params.maxStackDepth == 0 )
430                         gbl_params.maxStackDepth = ECRASH_DEFAULT_STACK_DEPTH;
431
432                 if (gbl_params.defaultBacktraceSignal == 0 )
433                         gbl_params.defaultBacktraceSignal = ECRASH_DEFAULT_BACKTRACE_SIGNAL;
434
435                 if (gbl_params.threadWaitTime == 0 )
436                         gbl_params.threadWaitTime = ECRASH_DEFAULT_THREAD_WAIT_TIME;
437
438                 if (gbl_params.debugLevel == 0 )
439                         gbl_params.debugLevel = ECRASH_DEBUG_DEFAULT;
440
441                 // Copy our symbol table
442                 if (gbl_params.symbolTable) {
443                     DPRINTF(ECRASH_DEBUG_VERBOSE,
444                                                         "symbolTable @ %p -- %d symbols\n", gbl_params.symbolTable,
445                                                 gbl_params.symbolTable->numSymbols);
446                         // Make a copy of our symbol table
447                         gbl_params.symbolTable = malloc(sizeof(eCrashSymbolTable));
448                         memcpy(gbl_params.symbolTable, params->symbolTable,
449                                    sizeof(eCrashSymbolTable));
450
451                         // Now allocate / copy the actual table.
452                         gbl_params.symbolTable->symbols = malloc(sizeof(eCrashSymbol) *
453                                                                      gbl_params.symbolTable->numSymbols);
454                         memcpy(gbl_params.symbolTable->symbols,
455                                    params->symbolTable->symbols,
456                                    sizeof(eCrashSymbol) * gbl_params.symbolTable->numSymbols);
457
458                         ValidateSymbolTable();
459                 }
460         
461                 // And, finally, register for our signals
462                 for (sigIndex=0; gbl_params.signals[sigIndex] != 0; sigIndex++) {
463                         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,
464                                                         "   Catching signal[%d] %d\n", sigIndex,
465                                         gbl_params.signals[sigIndex]);
466
467                         // I know there's a better way to catch signals with pthreads.
468                         // I'll do it later TODO
469                         signal(gbl_params.signals[sigIndex], crash_handler);
470                 }
471         } else {
472                 DPRINTF(ECRASH_DEBUG_ERROR, "   Error:  Null Params!\n");
473                 ret = -1;
474         }
475         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE, "Init Complete ret=%d\n", ret);
476         return ret;
477 } /* eCrash_Init */
478
479 /*!
480  * UnInitialize eCrash.
481  * 
482  * This function may be called to de-activate eCrash, release the
483  * signal handlers, and free any memory allocated by eCrash.
484  *
485  * @return Zero on success.
486  */
487 int eCrash_Uninit( void )
488 {
489         NIY();
490
491         return 0;
492 } /* eCrash_Uninit */
493
494 /*!
495  * Register a thread for backtracing on crash.
496  * 
497  * This function must be called by any thread wanting it's stack
498  * dumped in the event of a crash.  The thread my specify what 
499  * signal should be used, or the default, SIGUSR1 will be used.
500  *
501  * @param signo Signal to use to generate dump (default: SIGUSR1)
502  *
503  * @return Zero on success.
504  */
505 int eCrash_RegisterThread(char *name, int signo)
506 {
507         sighandler_t old_handler;
508
509         // Register for our signal
510         if (signo == 0) {
511                 signo = gbl_params.defaultBacktraceSignal;
512         }
513
514         old_handler = signal(signo, bt_handler);
515         return addThreadToList(name, pthread_self(), signo, old_handler);
516
517 } /* eCrash_RegisterThread */
518
519 /*!
520  * Un-register a thread for stack dumps.
521  * 
522  * This function may be called to un-register any previously 
523  * registered thread.
524  *
525  * @return Zero on success.
526  */
527 int eCrash_UnregisterThread( void )
528 {
529         return removeThreadFromList(pthread_self());
530 } /* eCrash_UnregisterThread */
531
532 #endif