ca7944415348c35993a9e3d73422f6a151a7563c
[citadel.git] / citadel / ecrash.c
1 /*
2  * $Id$
3  *
4  * author: David Frascone
5  * 
6  *  eCrash Implementation
7  *
8  *  eCrash will allow you to capture stack traces in the
9  *  event of a crash, and write those traces to disk, stdout,
10  *  or any other file handle.
11  *
12  *  modified to integrate closer into citadel by Wilfried Goesgens
13  *
14  * vim: ts=4
15  */
16
17 #include "sysdep.h"
18 #include <stdio.h>
19 #include <unistd.h>
20 #include <stdlib.h>
21 #include <stdarg.h>
22 #include <string.h>
23 #include <fcntl.h>
24 #include <syslog.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <pthread.h>
28 #include <libcitadel.h>
29 #include "server.h"
30 #include "sysdep_decls.h"
31 #include "support.h"
32 #include "config.h"
33 #include "citserver.h"
34 #include "ecrash.h"
35
36 #define NIY()   printf("%s: Not Implemented Yet!\n", __FUNCTION__)
37 #ifdef HAVE_BACKTRACE
38 #include <execinfo.h>
39 static eCrashParameters gbl_params;
40
41 static int    gbl_backtraceEntries;
42 static void **gbl_backtraceBuffer;
43 static char **gbl_backtraceSymbols;
44 static int    gbl_backtraceDoneFlag = 0;
45
46 static void *stack_frames[50];
47 static size_t size, NThread;
48 static char **strings;
49 static char StaticBuf[SIZ];
50
51 /* 
52  * Private structures for our thread list
53  */
54 typedef struct thread_list_node{
55         char *threadName;
56         pthread_t thread;
57         int backtraceSignal;
58         sighandler_t oldHandler;
59         struct thread_list_node *Next;
60 } ThreadListNode;
61
62 static pthread_mutex_t ThreadListMutex = PTHREAD_MUTEX_INITIALIZER;
63 static ThreadListNode *ThreadList = NULL;
64
65 /*********************************************************************
66  *********************************************************************
67  **     P  R  I  V  A  T  E      F  U  N  C  T  I  O  N  S
68  *********************************************************************
69  ********************************************************************/
70
71
72 /*!
73  * Insert a node into our threadList
74  *
75  * @param name   Text string indicating our thread
76  * @param thread Our Thread Id
77  * @param signo  Signal to create backtrace with
78  * @param old_handler Our old handler for signo
79  *
80  * @returns zero on success
81  */
82 static int addThreadToList(char *name, pthread_t thread,int signo,
83                                            sighandler_t old_handler)
84 {
85         ThreadListNode *node;
86
87         node = malloc(sizeof(ThreadListNode));
88         if (!node) return -1;
89
90         DPRINTF(ECRASH_DEBUG_VERBOSE,
91                                         "Adding thread 0x%08x (%s)\n", (unsigned int)thread, name);
92         node->threadName = strdup(name);
93         node->thread = thread;
94         node->backtraceSignal = signo;
95         node->oldHandler = old_handler;
96
97         /* And, add it to the list */
98         pthread_mutex_lock(&ThreadListMutex);
99         node->Next = ThreadList;
100         ThreadList = node;
101         pthread_mutex_unlock(&ThreadListMutex);
102         
103         return 0;
104
105 } // addThreadToList
106
107 /*!
108  * Remove a node from our threadList
109  *
110  * @param thread Our Thread Id
111  *
112  * @returns zero on success
113  */
114 static int removeThreadFromList(pthread_t thread)
115 {
116         ThreadListNode *Probe, *Prev=NULL;
117         ThreadListNode *Removed = NULL;
118
119         DPRINTF(ECRASH_DEBUG_VERBOSE,
120                                         "Removing thread 0x%08x from list . . .\n", (unsigned int)thread);
121         pthread_mutex_lock(&ThreadListMutex);
122         for (Probe=ThreadList;Probe != NULL; Probe = Probe->Next) {
123                 if (Probe->thread == thread) {
124                         // We found it!  Unlink it and move on!
125                         Removed = Probe;
126                         if (Prev == NULL) { // head of list
127                                 ThreadList = Probe->Next;
128                         } else {
129                                 // Prev != null, so we need to link around ourselves.
130                                 Prev->Next = Probe->Next;
131                         }
132                         Removed->Next = NULL;
133                         break;
134                 }
135
136                 Prev = Probe;
137         }
138         pthread_mutex_unlock(&ThreadListMutex);
139
140         // Now, if something is in Removed, free it, and return success
141         if (Removed) {
142             DPRINTF(ECRASH_DEBUG_VERBOSE,
143                                                 "   Found %s -- removing\n", Removed->threadName);
144                 // Reset the signal handler
145                 signal(Removed->backtraceSignal, Removed->oldHandler);
146
147                 // And free the allocated memory
148                 free (Removed->threadName);
149                 free (Removed);
150
151                 return 0;
152         } else {
153             DPRINTF(ECRASH_DEBUG_VERBOSE,
154                                                 "   Not Found\n");
155                 return -1; // Not Found
156         }
157 } // removeThreadFromList
158
159 /*!
160  * Print out a line of output to all our destinations
161  *
162  * One by one, output a line of text to all of our output destinations.
163  *
164  * Return failure if we fail to output to any of them.
165  *
166  * @param format   Normal printf style vararg format
167  *
168  * @returns nothing// bytes written, or error on failure.
169  */
170 static void outputPrintf(char *format, ...)
171 {
172         va_list ap;
173
174         va_start(ap, format);
175
176         if (enable_syslog)
177         {
178                 snprintf (StaticBuf, SIZ, format, ap);
179                 syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
180         }
181         else
182                 CtdlLogPrintf(CTDL_EMERG, format, ap);
183
184 } // outputPrintf
185
186
187
188 /*!
189  * Dump our backtrace into a global location
190  *
191  * This function will dump out our backtrace into our
192  * global holding area.
193  *
194  */
195 static void createGlobalBacktrace( void )
196 {
197
198         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
199         if (enable_syslog)
200                 for (NThread = 0; NThread < size; NThread++) 
201                 {
202                         snprintf (StaticBuf, SIZ, "RAW: %p  ", stack_frames[NThread]);
203                         syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
204                 }
205         else 
206                 for (NThread = 0; NThread < size; NThread++) 
207                         CtdlLogPrintf(1, "RAW: %p\n", stack_frames[NThread]);
208         strings = backtrace_symbols(stack_frames, size);
209         for (NThread = 0; NThread < size; NThread++) {
210                 if (strings != NULL) {
211                         if (enable_syslog)
212                         {// vsyslogs printf compliance sucks.
213                                 snprintf (StaticBuf, SIZ, "RAW: %p  ", strings[NThread]);
214                                 syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
215                         }
216                         else
217                                 CtdlLogPrintf(1, "%s\n", strings[NThread]);
218                 }
219         }
220 } /* createGlobalBacktrace */
221 static void outputRawtrace( void )
222 {
223
224         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
225         if (enable_syslog)
226                 for (NThread = 0; NThread < size; NThread++) 
227                 {
228                         snprintf (StaticBuf, SIZ, "RAW: %p  ", stack_frames[NThread]);
229                         syslog( LOG_CRIT|LOG_NDELAY|LOG_MAIL, StaticBuf);
230                 }
231         else 
232                 for (NThread = 0; NThread < size; NThread++) 
233                         CtdlLogPrintf(1, "RAW: %p\n", stack_frames[NThread]);
234 } /* createGlobalBacktrace */
235
236 /*!
237  * Print out (to all the fds, etc), or global backtrace
238  */
239 static void outputGlobalBacktrace ( void )
240 {
241         int i;
242
243         for (i=0; i < gbl_backtraceEntries; i++) {
244                 if (gbl_backtraceSymbols != FALSE) {
245                         outputPrintf("*      Frame %02x: %s\n",
246                                      i, gbl_backtraceSymbols[i]);
247                 } else {
248                         outputPrintf("*      Frame %02x: %p\n", i,
249                                      gbl_backtraceBuffer[i]);
250                 }
251         }
252 } // outputGlobalBacktrace
253
254 /*!
255  * Output our current stack's backtrace
256  */
257 static void outputBacktrace( void )
258 {
259         createGlobalBacktrace();
260         outputGlobalBacktrace();
261 } /* outputBacktrace */
262
263 static void outputBacktraceThreads( void )
264 {
265         ThreadListNode *probe;
266         int i;
267
268         // When we're backtracing, don't worry about the mutex . . hopefully
269         // we're in a safe place.
270
271         for (probe=ThreadList; probe; probe=probe->Next) {
272                 gbl_backtraceDoneFlag = 0;
273                 pthread_kill(probe->thread, probe->backtraceSignal);
274                 for (i=0; i < gbl_params.threadWaitTime; i++) {
275                         if (gbl_backtraceDoneFlag)
276                                 break;
277                         sleep(1);
278                 }
279                 if (gbl_backtraceDoneFlag) {
280                         outputPrintf("*  Backtrace of \"%s\" (0x%08x)\n", 
281                                                  probe->threadName, (unsigned int)probe->thread);
282                         outputGlobalBacktrace();
283                 } else {
284                         outputPrintf("*  Error: unable to get backtrace of \"%s\" (0x%08x)\n", 
285                                                  probe->threadName, (unsigned int)probe->thread);
286                 }
287                 outputPrintf("*\n");
288         }
289 } // outputBacktraceThreads
290
291
292 /*!
293  * Handle signals (crash signals)
294  *
295  * This function will catch all crash signals, and will output the
296  * crash dump.  
297  *
298  * It will physically write (and sync) the current thread's information
299  * before it attempts to send signals to other threads.
300  * 
301  * @param signum Signal received.
302  */
303 static void crash_handler(int signo)
304 {
305         outputRawtrace();
306         outputPrintf("*********************************************************\n");
307         outputPrintf("*               eCrash Crash Handler\n");
308         outputPrintf("*********************************************************\n");
309         outputPrintf("*\n");
310         outputPrintf("*  Got a crash! signo=%d\n", signo);
311         outputPrintf("*\n");
312         outputPrintf("*  Offending Thread's Backtrace:\n");
313         outputPrintf("*\n");
314         outputBacktrace();
315         outputPrintf("*\n");
316
317         if (gbl_params.dumpAllThreads != FALSE) {
318                 outputBacktraceThreads();
319         }
320
321         outputPrintf("*\n");
322         outputPrintf("*********************************************************\n");
323         outputPrintf("*               eCrash Crash Handler\n");
324         outputPrintf("*********************************************************\n");
325
326         exit(signo);
327 } // crash_handler
328
329 /*!
330  * Handle signals (bt signals)
331  *
332  * This function shoudl be called to generate a crashdump into our
333  * global area.  Once the dump has been completed, this function will
334  * return after tickling a global.  Since mutexes are not async
335  * signal safe, the main thread, after signaling us to generate our
336  * own backtrace, will sleep for a few seconds waiting for us to complete.
337  *
338  * @param signum Signal received.
339  */
340 static void bt_handler(int signo)
341 {
342         createGlobalBacktrace();
343         gbl_backtraceDoneFlag=1;
344 } // bt_handler
345
346 /*!
347  * Validate a passed-in symbol table
348  *
349  * For now, just print it out (if verbose), and make sure it's
350  * sorted and none of the pointers are zero.
351  */
352 static int ValidateSymbolTable( void )
353 {
354         int i;
355         int rc=0;
356         unsigned long lastAddress =0;
357
358         // Get out of here if the table is empty
359         if (!gbl_params.symbolTable) return 0;
360
361         // Dump it in verbose mode
362         DPRINTF(ECRASH_DEBUG_VERBOSE,
363                                         "Symbol Table Provided with %d symbols\n",
364                                         gbl_params.symbolTable->numSymbols);
365         for (i=0; i < gbl_params.symbolTable->numSymbols; i++){
366                 // Dump it in verbose mode
367                 DPRINTF(ECRASH_DEBUG_VERBOSE, 
368                                 "%-30s %p\n",
369                                 gbl_params.symbolTable->symbols[i].function,
370                                 gbl_params.symbolTable->symbols[i].address);
371                 if (lastAddress >
372                     (unsigned long)gbl_params.symbolTable->symbols[i].address) {
373                         DPRINTF(ECRASH_DEBUG_ERROR,
374                                         "Error: symbol table is not sorted (last=%p, current=%p)\n",
375                                         (void *)lastAddress,
376                                         gbl_params.symbolTable->symbols[i].address);
377                         rc = -1;
378                 }
379
380         } // for
381
382         return rc;
383         
384 } // ValidateSymbolTable
385
386 /*********************************************************************
387  *********************************************************************
388  **      P  U  B  L  I  C      F  U  N  C  T  I  O  N  S
389  *********************************************************************
390  ********************************************************************/
391
392 /*!
393  * Initialize eCrash.
394  * 
395  * This function must be called before calling any other eCrash
396  * functions.  It sets up the global behavior of the system, and
397  * registers the calling thread for crash dumps.
398  *
399  * @param params Our input parameters.  The passed in structure will be copied.
400  *
401  * @return Zero on success.
402  */
403 int eCrash_Init(eCrashParameters *params)
404 {
405         int sigIndex;
406         int ret = 0;
407 #ifdef DO_SIGNALS_RIGHT
408         sigset_t blocked;
409         struct sigaction act;
410 #endif
411
412         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,"Init Starting params = %p\n", params);
413
414         // Allocate our backtrace area
415         gbl_backtraceBuffer = malloc(sizeof(void *) * (params->maxStackDepth+5));
416
417 #ifdef DO_SIGNALS_RIGHT
418         sigemptyset(&blocked);
419         act.sa_sigaction = crash_handler;
420         act.sa_mask = blocked;
421         act.sa_flags = SA_SIGINFO;
422 #endif
423
424         if (params != NULL) {
425                 // Make ourselves a global copy of params.
426                 gbl_params = *params;
427                 gbl_params.filename = strdup(params->filename);
428
429                 // Set our defaults, if they weren't specified
430                 if (gbl_params.maxStackDepth == 0 )
431                         gbl_params.maxStackDepth = ECRASH_DEFAULT_STACK_DEPTH;
432
433                 if (gbl_params.defaultBacktraceSignal == 0 )
434                         gbl_params.defaultBacktraceSignal = ECRASH_DEFAULT_BACKTRACE_SIGNAL;
435
436                 if (gbl_params.threadWaitTime == 0 )
437                         gbl_params.threadWaitTime = ECRASH_DEFAULT_THREAD_WAIT_TIME;
438
439                 if (gbl_params.debugLevel == 0 )
440                         gbl_params.debugLevel = ECRASH_DEBUG_DEFAULT;
441
442                 // Copy our symbol table
443                 if (gbl_params.symbolTable) {
444                     DPRINTF(ECRASH_DEBUG_VERBOSE,
445                                                         "symbolTable @ %p -- %d symbols\n", gbl_params.symbolTable,
446                                                 gbl_params.symbolTable->numSymbols);
447                         // Make a copy of our symbol table
448                         gbl_params.symbolTable = malloc(sizeof(eCrashSymbolTable));
449                         memcpy(gbl_params.symbolTable, params->symbolTable,
450                                    sizeof(eCrashSymbolTable));
451
452                         // Now allocate / copy the actual table.
453                         gbl_params.symbolTable->symbols = malloc(sizeof(eCrashSymbol) *
454                                                                      gbl_params.symbolTable->numSymbols);
455                         memcpy(gbl_params.symbolTable->symbols,
456                                    params->symbolTable->symbols,
457                                    sizeof(eCrashSymbol) * gbl_params.symbolTable->numSymbols);
458
459                         ValidateSymbolTable();
460                 }
461         
462                 // And, finally, register for our signals
463                 for (sigIndex=0; gbl_params.signals[sigIndex] != 0; sigIndex++) {
464                         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,
465                                                         "   Catching signal[%d] %d\n", sigIndex,
466                                         gbl_params.signals[sigIndex]);
467
468                         // I know there's a better way to catch signals with pthreads.
469                         // I'll do it later TODO
470                         signal(gbl_params.signals[sigIndex], crash_handler);
471                 }
472         } else {
473                 DPRINTF(ECRASH_DEBUG_ERROR, "   Error:  Null Params!\n");
474                 ret = -1;
475         }
476         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE, "Init Complete ret=%d\n", ret);
477         return ret;
478 } /* eCrash_Init */
479
480 /*!
481  * UnInitialize eCrash.
482  * 
483  * This function may be called to de-activate eCrash, release the
484  * signal handlers, and free any memory allocated by eCrash.
485  *
486  * @return Zero on success.
487  */
488 int eCrash_Uninit( void )
489 {
490         NIY();
491
492         return 0;
493 } /* eCrash_Uninit */
494
495 /*!
496  * Register a thread for backtracing on crash.
497  * 
498  * This function must be called by any thread wanting it's stack
499  * dumped in the event of a crash.  The thread my specify what 
500  * signal should be used, or the default, SIGUSR1 will be used.
501  *
502  * @param signo Signal to use to generate dump (default: SIGUSR1)
503  *
504  * @return Zero on success.
505  */
506 int eCrash_RegisterThread(char *name, int signo)
507 {
508         sighandler_t old_handler;
509
510         // Register for our signal
511         if (signo == 0) {
512                 signo = gbl_params.defaultBacktraceSignal;
513         }
514
515         old_handler = signal(signo, bt_handler);
516         return addThreadToList(name, pthread_self(), signo, old_handler);
517
518 } /* eCrash_RegisterThread */
519
520 /*!
521  * Un-register a thread for stack dumps.
522  * 
523  * This function may be called to un-register any previously 
524  * registered thread.
525  *
526  * @return Zero on success.
527  */
528 int eCrash_UnregisterThread( void )
529 {
530         return removeThreadFromList(pthread_self());
531 } /* eCrash_UnregisterThread */
532
533 #endif