b9decd1bfb6fac625e97cec6c7e309186118cebf
[citadel.git] / citadel / ecrash.c
1 /*
2  * author: David Frascone
3  * 
4  * eCrash Implementation
5  *
6  * eCrash will allow you to capture stack traces in the
7  * event of a crash, and write those traces to disk, stdout,
8  * or any other file handle.
9  *
10  * modified to integrate closer into citadel by Wilfried Goesgens
11  *
12  * vim: ts=4
13  *
14  * This program is open source software; you can redistribute it and/or modify
15  * it under the terms of the GNU General Public License, version 3.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  */
22
23 #include "sysdep.h"
24 #include <stdio.h>
25 #include <unistd.h>
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <string.h>
29 #include <fcntl.h>
30 #include <syslog.h>
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 #include <pthread.h>
34 #include <libcitadel.h>
35 #include "server.h"
36 #include "sysdep_decls.h"
37 #include "support.h"
38 #include "config.h"
39 #include "citserver.h"
40 #include "ecrash.h"
41
42 #define NIY()   printf("function not implemented yet!\n");
43 #ifdef HAVE_BACKTRACE
44 #include <execinfo.h>
45 static eCrashParameters gbl_params;
46
47 static int    gbl_backtraceEntries;
48 static void **gbl_backtraceBuffer;
49 static char **gbl_backtraceSymbols;
50 static int    gbl_backtraceDoneFlag = 0;
51
52 static void *stack_frames[50];
53 static size_t size, NThread;
54 static char **strings;
55
56 /* 
57  * Private structures for our thread list
58  */
59 typedef struct thread_list_node{
60         char *threadName;
61         pthread_t thread;
62         int backtraceSignal;
63         sighandler_t oldHandler;
64         struct thread_list_node *Next;
65 } ThreadListNode;
66
67 static pthread_mutex_t ThreadListMutex = PTHREAD_MUTEX_INITIALIZER;
68 static ThreadListNode *ThreadList = NULL;
69
70 /*********************************************************************
71  *********************************************************************
72  **     P  R  I  V  A  T  E      F  U  N  C  T  I  O  N  S
73  *********************************************************************
74  ********************************************************************/
75
76
77 /*!
78  * Insert a node into our threadList
79  *
80  * @param name   Text string indicating our thread
81  * @param thread Our Thread Id
82  * @param signo  Signal to create backtrace with
83  * @param old_handler Our old handler for signo
84  *
85  * @returns zero on success
86  */
87 static int addThreadToList(char *name, pthread_t thread,int signo,
88                                            sighandler_t old_handler)
89 {
90         ThreadListNode *node;
91
92         node = malloc(sizeof(ThreadListNode));
93         if (!node) return -1;
94
95         DPRINTF(ECRASH_DEBUG_VERBOSE,
96                                         "Adding thread 0x%08x (%s)\n", (unsigned int)thread, name);
97         node->threadName = strdup(name);
98         node->thread = thread;
99         node->backtraceSignal = signo;
100         node->oldHandler = old_handler;
101
102         /* And, add it to the list */
103         pthread_mutex_lock(&ThreadListMutex);
104         node->Next = ThreadList;
105         ThreadList = node;
106         pthread_mutex_unlock(&ThreadListMutex);
107         
108         return 0;
109
110 } // addThreadToList
111
112 /*!
113  * Remove a node from our threadList
114  *
115  * @param thread Our Thread Id
116  *
117  * @returns zero on success
118  */
119 static int removeThreadFromList(pthread_t thread)
120 {
121         ThreadListNode *Probe, *Prev=NULL;
122         ThreadListNode *Removed = NULL;
123
124         DPRINTF(ECRASH_DEBUG_VERBOSE,
125                                         "Removing thread 0x%08x from list . . .\n", (unsigned int)thread);
126         pthread_mutex_lock(&ThreadListMutex);
127         for (Probe=ThreadList;Probe != NULL; Probe = Probe->Next) {
128                 if (Probe->thread == thread) {
129                         // We found it!  Unlink it and move on!
130                         Removed = Probe;
131                         if (Prev == NULL) { // head of list
132                                 ThreadList = Probe->Next;
133                         } else {
134                                 // Prev != null, so we need to link around ourselves.
135                                 Prev->Next = Probe->Next;
136                         }
137                         Removed->Next = NULL;
138                         break;
139                 }
140
141                 Prev = Probe;
142         }
143         pthread_mutex_unlock(&ThreadListMutex);
144
145         // Now, if something is in Removed, free it, and return success
146         if (Removed) {
147             DPRINTF(ECRASH_DEBUG_VERBOSE,
148                                                 "   Found %s -- removing\n", Removed->threadName);
149                 // Reset the signal handler
150                 signal(Removed->backtraceSignal, Removed->oldHandler);
151
152                 // And free the allocated memory
153                 free (Removed->threadName);
154                 free (Removed);
155
156                 return 0;
157         } else {
158             DPRINTF(ECRASH_DEBUG_VERBOSE,
159                                                 "   Not Found\n");
160                 return -1; // Not Found
161         }
162 } // removeThreadFromList
163
164 /*!
165  * Print out a line of output to all our destinations
166  *
167  * One by one, output a line of text to all of our output destinations.
168  *
169  * Return failure if we fail to output to any of them.
170  *
171  * @param format   Normal printf style vararg format
172  *
173  * @returns nothing// bytes written, or error on failure.
174  */
175 static void outputPrintf(char *format, ...)
176 {
177         va_list ap;
178
179         va_start(ap, format);
180
181         vsyslog(LOG_CRIT|LOG_NDELAY|LOG_MAIL, format, ap);
182 } // outputPrintf
183
184
185
186 /*!
187  * Dump our backtrace into a global location
188  *
189  * This function will dump out our backtrace into our
190  * global holding area.
191  *
192  */
193 static void createGlobalBacktrace( void )
194 {
195
196         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
197         for (NThread = 0; NThread < size; NThread++) 
198         {
199                 syslog(LOG_CRIT|LOG_NDELAY|LOG_MAIL, "RAW: %p  ", stack_frames[NThread]);
200         }
201         strings = backtrace_symbols(stack_frames, size);
202         for (NThread = 0; NThread < size; NThread++) {
203                 if (strings != NULL) {
204                         syslog(LOG_CRIT|LOG_NDELAY|LOG_MAIL, "RAW: %p  ", strings[NThread]);
205                 }
206         }
207 } /* createGlobalBacktrace */
208 static void outputRawtrace( void )
209 {
210
211         size = backtrace(stack_frames, sizeof(stack_frames) / sizeof(void*));
212         for (NThread = 0; NThread < size; NThread++) 
213         {
214                 syslog(LOG_CRIT|LOG_NDELAY|LOG_MAIL, "RAW: %p  ", stack_frames[NThread]);
215         }
216 } /* createGlobalBacktrace */
217
218 /*!
219  * Print out (to all the fds, etc), or global backtrace
220  */
221 static void outputGlobalBacktrace ( void )
222 {
223         int i;
224
225         for (i=0; i < gbl_backtraceEntries; i++) {
226                 if (gbl_backtraceSymbols != FALSE) {
227                         outputPrintf("*      Frame %02x: %s\n",
228                                      i, gbl_backtraceSymbols[i]);
229                 } else {
230                         outputPrintf("*      Frame %02x: %p\n", i,
231                                      gbl_backtraceBuffer[i]);
232                 }
233         }
234 } // outputGlobalBacktrace
235
236 /*!
237  * Output our current stack's backtrace
238  */
239 static void outputBacktrace( void )
240 {
241         createGlobalBacktrace();
242         outputGlobalBacktrace();
243 } /* outputBacktrace */
244
245 static void outputBacktraceThreads( void )
246 {
247         ThreadListNode *probe;
248         int i;
249
250         // When we're backtracing, don't worry about the mutex . . hopefully
251         // we're in a safe place.
252
253         for (probe=ThreadList; probe; probe=probe->Next) {
254                 gbl_backtraceDoneFlag = 0;
255                 pthread_kill(probe->thread, probe->backtraceSignal);
256                 for (i=0; i < gbl_params.threadWaitTime; i++) {
257                         if (gbl_backtraceDoneFlag)
258                                 break;
259                         sleep(1);
260                 }
261                 if (gbl_backtraceDoneFlag) {
262                         outputPrintf("*  Backtrace of \"%s\" (0x%08x)\n", 
263                                                  probe->threadName, (unsigned int)probe->thread);
264                         outputGlobalBacktrace();
265                 } else {
266                         outputPrintf("*  Error: unable to get backtrace of \"%s\" (0x%08x)\n", 
267                                                  probe->threadName, (unsigned int)probe->thread);
268                 }
269                 outputPrintf("*\n");
270         }
271 } // outputBacktraceThreads
272
273
274 /*!
275  * Handle signals (crash signals)
276  *
277  * This function will catch all crash signals, and will output the
278  * crash dump.  
279  *
280  * It will physically write (and sync) the current thread's information
281  * before it attempts to send signals to other threads.
282  * 
283  * @param signum Signal received.
284  */
285 static void crash_handler(int signo)
286 {
287         outputRawtrace();
288         outputPrintf("*********************************************************\n");
289         outputPrintf("*               eCrash Crash Handler\n");
290         outputPrintf("*********************************************************\n");
291         outputPrintf("*\n");
292         outputPrintf("*  Got a crash! signo=%d\n", signo);
293         outputPrintf("*\n");
294         outputPrintf("*  Offending Thread's Backtrace:\n");
295         outputPrintf("*\n");
296         outputBacktrace();
297         outputPrintf("*\n");
298
299         if (gbl_params.dumpAllThreads != FALSE) {
300                 outputBacktraceThreads();
301         }
302
303         outputPrintf("*\n");
304         outputPrintf("*********************************************************\n");
305         outputPrintf("*               eCrash Crash Handler\n");
306         outputPrintf("*********************************************************\n");
307
308         exit(signo);
309 } // crash_handler
310
311 /*!
312  * Handle signals (bt signals)
313  *
314  * This function shoudl be called to generate a crashdump into our
315  * global area.  Once the dump has been completed, this function will
316  * return after tickling a global.  Since mutexes are not async
317  * signal safe, the main thread, after signaling us to generate our
318  * own backtrace, will sleep for a few seconds waiting for us to complete.
319  *
320  * @param signum Signal received.
321  */
322 static void bt_handler(int signo)
323 {
324         createGlobalBacktrace();
325         gbl_backtraceDoneFlag=1;
326 } // bt_handler
327
328 /*!
329  * Validate a passed-in symbol table
330  *
331  * For now, just print it out (if verbose), and make sure it's
332  * sorted and none of the pointers are zero.
333  */
334 static int ValidateSymbolTable( void )
335 {
336         int i;
337         int rc=0;
338         unsigned long lastAddress =0;
339
340         // Get out of here if the table is empty
341         if (!gbl_params.symbolTable) return 0;
342
343         // Dump it in verbose mode
344         DPRINTF(ECRASH_DEBUG_VERBOSE,
345                                         "Symbol Table Provided with %d symbols\n",
346                                         gbl_params.symbolTable->numSymbols);
347         for (i=0; i < gbl_params.symbolTable->numSymbols; i++){
348                 // Dump it in verbose mode
349                 DPRINTF(ECRASH_DEBUG_VERBOSE, 
350                                 "%-30s %p\n",
351                                 gbl_params.symbolTable->symbols[i].function,
352                                 gbl_params.symbolTable->symbols[i].address);
353                 if (lastAddress >
354                     (unsigned long)gbl_params.symbolTable->symbols[i].address) {
355                         DPRINTF(ECRASH_DEBUG_ERROR,
356                                         "Error: symbol table is not sorted (last=%p, current=%p)\n",
357                                         (void *)lastAddress,
358                                         gbl_params.symbolTable->symbols[i].address);
359                         rc = -1;
360                 }
361
362         } // for
363
364         return rc;
365         
366 } // ValidateSymbolTable
367
368 /*********************************************************************
369  *********************************************************************
370  **      P  U  B  L  I  C      F  U  N  C  T  I  O  N  S
371  *********************************************************************
372  ********************************************************************/
373
374 /*!
375  * Initialize eCrash.
376  * 
377  * This function must be called before calling any other eCrash
378  * functions.  It sets up the global behavior of the system, and
379  * registers the calling thread for crash dumps.
380  *
381  * @param params Our input parameters.  The passed in structure will be copied.
382  *
383  * @return Zero on success.
384  */
385 int eCrash_Init(eCrashParameters *params)
386 {
387         int sigIndex;
388         int ret = 0;
389 #ifdef DO_SIGNALS_RIGHT
390         sigset_t blocked;
391         struct sigaction act;
392 #endif
393
394         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,"Init Starting params = %p\n", params);
395
396         // Allocate our backtrace area
397         gbl_backtraceBuffer = malloc(sizeof(void *) * (params->maxStackDepth+5));
398
399 #ifdef DO_SIGNALS_RIGHT
400         sigemptyset(&blocked);
401         act.sa_sigaction = crash_handler;
402         act.sa_mask = blocked;
403         act.sa_flags = SA_SIGINFO;
404 #endif
405
406         if (params != NULL) {
407                 // Make ourselves a global copy of params.
408                 gbl_params = *params;
409                 gbl_params.filename = strdup(params->filename);
410
411                 // Set our defaults, if they weren't specified
412                 if (gbl_params.maxStackDepth == 0 )
413                         gbl_params.maxStackDepth = ECRASH_DEFAULT_STACK_DEPTH;
414
415                 if (gbl_params.defaultBacktraceSignal == 0 )
416                         gbl_params.defaultBacktraceSignal = ECRASH_DEFAULT_BACKTRACE_SIGNAL;
417
418                 if (gbl_params.threadWaitTime == 0 )
419                         gbl_params.threadWaitTime = ECRASH_DEFAULT_THREAD_WAIT_TIME;
420
421                 if (gbl_params.debugLevel == 0 )
422                         gbl_params.debugLevel = ECRASH_DEBUG_DEFAULT;
423
424                 // Copy our symbol table
425                 if (gbl_params.symbolTable) {
426                     DPRINTF(ECRASH_DEBUG_VERBOSE,
427                                                         "symbolTable @ %p -- %d symbols\n", gbl_params.symbolTable,
428                                                 gbl_params.symbolTable->numSymbols);
429                         // Make a copy of our symbol table
430                         gbl_params.symbolTable = malloc(sizeof(eCrashSymbolTable));
431                         memcpy(gbl_params.symbolTable, params->symbolTable,
432                                    sizeof(eCrashSymbolTable));
433
434                         // Now allocate / copy the actual table.
435                         gbl_params.symbolTable->symbols = malloc(sizeof(eCrashSymbol) *
436                                                                      gbl_params.symbolTable->numSymbols);
437                         memcpy(gbl_params.symbolTable->symbols,
438                                    params->symbolTable->symbols,
439                                    sizeof(eCrashSymbol) * gbl_params.symbolTable->numSymbols);
440
441                         ValidateSymbolTable();
442                 }
443         
444                 // And, finally, register for our signals
445                 for (sigIndex=0; gbl_params.signals[sigIndex] != 0; sigIndex++) {
446                         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,
447                                                         "   Catching signal[%d] %d\n", sigIndex,
448                                         gbl_params.signals[sigIndex]);
449
450                         // I know there's a better way to catch signals with pthreads.
451                         // I'll do it later TODO
452                         signal(gbl_params.signals[sigIndex], crash_handler);
453                 }
454         } else {
455                 DPRINTF(ECRASH_DEBUG_ERROR, "   Error:  Null Params!\n");
456                 ret = -1;
457         }
458         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE, "Init Complete ret=%d\n", ret);
459         return ret;
460 } /* eCrash_Init */
461
462 /*!
463  * UnInitialize eCrash.
464  * 
465  * This function may be called to de-activate eCrash, release the
466  * signal handlers, and free any memory allocated by eCrash.
467  *
468  * @return Zero on success.
469  */
470 int eCrash_Uninit( void )
471 {
472         NIY();
473
474         return 0;
475 } /* eCrash_Uninit */
476
477 /*!
478  * Register a thread for backtracing on crash.
479  * 
480  * This function must be called by any thread wanting it's stack
481  * dumped in the event of a crash.  The thread my specify what 
482  * signal should be used, or the default, SIGUSR1 will be used.
483  *
484  * @param signo Signal to use to generate dump (default: SIGUSR1)
485  *
486  * @return Zero on success.
487  */
488 int eCrash_RegisterThread(char *name, int signo)
489 {
490         sighandler_t old_handler;
491
492         // Register for our signal
493         if (signo == 0) {
494                 signo = gbl_params.defaultBacktraceSignal;
495         }
496
497         old_handler = signal(signo, bt_handler);
498         return addThreadToList(name, pthread_self(), signo, old_handler);
499
500 } /* eCrash_RegisterThread */
501
502 /*!
503  * Un-register a thread for stack dumps.
504  * 
505  * This function may be called to un-register any previously 
506  * registered thread.
507  *
508  * @return Zero on success.
509  */
510 int eCrash_UnregisterThread( void )
511 {
512         return removeThreadFromList(pthread_self());
513 } /* eCrash_UnregisterThread */
514
515 #endif