* modules registering serverports now need to specify a plaintext name.
[citadel.git] / citadel / ecrash.c
1 /*
2  * File: eCrash.c
3  * @author David Frascone
4  * 
5  *  eCrash Implementation
6  *
7  *  eCrash will allow you to capture stack traces in the
8  *  event of a crash, and write those traces to disk, stdout,
9  *  or any other file handle.
10  *
11  *  modified to integrate closer into citadel by Wilfried Goesgens
12  *
13  * vim: ts=4
14  */
15
16 #include <stdio.h>
17 #include <unistd.h>
18 #include <stdlib.h>
19 #include <stdarg.h>
20 #include <string.h>
21 #include <fcntl.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <execinfo.h>
25 #include <pthread.h>
26 #include "sysdep_decls.h"
27 #include "ecrash.h"
28
29 #define NIY()   printf("%s: Not Implemented Yet!\n", __FUNCTION__)
30
31 static eCrashParameters gbl_params;
32 static int gbl_fd=-1;
33
34 static int    gbl_backtraceEntries;
35 static void **gbl_backtraceBuffer;
36 static char **gbl_backtraceSymbols;
37 static int    gbl_backtraceDoneFlag = 0;
38
39 /* 
40  * Private structures for our thread list
41  */
42 typedef struct thread_list_node{
43         char *threadName;
44         pthread_t thread;
45         int backtraceSignal;
46         sighandler_t oldHandler;
47         struct thread_list_node *Next;
48 } ThreadListNode;
49
50 static pthread_mutex_t ThreadListMutex = PTHREAD_MUTEX_INITIALIZER;
51 static ThreadListNode *ThreadList = NULL;
52
53 /*********************************************************************
54  *********************************************************************
55  **     P  R  I  V  A  T  E      F  U  N  C  T  I  O  N  S
56  *********************************************************************
57  ********************************************************************/
58
59
60 /*!
61  * Insert a node into our threadList
62  *
63  * @param name   Text string indicating our thread
64  * @param thread Our Thread Id
65  * @param signo  Signal to create backtrace with
66  * @param old_handler Our old handler for signo
67  *
68  * @returns zero on success
69  */
70 static int addThreadToList(char *name, pthread_t thread,int signo,
71                                            sighandler_t old_handler)
72 {
73         ThreadListNode *node;
74
75         node = malloc(sizeof(ThreadListNode));
76         if (!node) return -1;
77
78         DPRINTF(ECRASH_DEBUG_VERBOSE,
79                                         "Adding thread 0x%08x (%s)\n", (unsigned int)thread, name);
80         node->threadName = strdup(name);
81         node->thread = thread;
82         node->backtraceSignal = signo;
83         node->oldHandler = old_handler;
84
85         /* And, add it to the list */
86         pthread_mutex_lock(&ThreadListMutex);
87         node->Next = ThreadList;
88         ThreadList = node;
89         pthread_mutex_unlock(&ThreadListMutex);
90         
91         return 0;
92
93 } // addThreadToList
94
95 /*!
96  * Remove a node from our threadList
97  *
98  * @param thread Our Thread Id
99  *
100  * @returns zero on success
101  */
102 static int removeThreadFromList(pthread_t thread)
103 {
104         ThreadListNode *Probe, *Prev=NULL;
105         ThreadListNode *Removed = NULL;
106
107         DPRINTF(ECRASH_DEBUG_VERBOSE,
108                                         "Removing thread 0x%08x from list . . .\n", (unsigned int)thread);
109         pthread_mutex_lock(&ThreadListMutex);
110         for (Probe=ThreadList;Probe != NULL; Probe = Probe->Next) {
111                 if (Probe->thread == thread) {
112                         // We found it!  Unlink it and move on!
113                         Removed = Probe;
114                         if (Prev == NULL) { // head of list
115                                 ThreadList = Probe->Next;
116                         } else {
117                                 // Prev != null, so we need to link around ourselves.
118                                 Prev->Next = Probe->Next;
119                         }
120                         Removed->Next = NULL;
121                         break;
122                 }
123
124                 Prev = Probe;
125         }
126         pthread_mutex_unlock(&ThreadListMutex);
127
128         // Now, if something is in Removed, free it, and return success
129         if (Removed) {
130             DPRINTF(ECRASH_DEBUG_VERBOSE,
131                                                 "   Found %s -- removing\n", Removed->threadName);
132                 // Reset the signal handler
133                 signal(Removed->backtraceSignal, Removed->oldHandler);
134
135                 // And free the allocated memory
136                 free (Removed->threadName);
137                 free (Removed);
138
139                 return 0;
140         } else {
141             DPRINTF(ECRASH_DEBUG_VERBOSE,
142                                                 "   Not Found\n");
143                 return -1; // Not Found
144         }
145 } // removeThreadFromList
146
147 /*!
148  * Output text to a fd, looping to avoid being interrupted.
149  *
150  * @param str   String to output
151  * @param bytes String length
152  * @param fd    File descriptor to write to
153  *
154  * @returns bytes written, or error on failure.
155  */
156 static int blockingWrite(char *str, int bytes, int fd)
157 {
158         int offset=0;
159         int bytesWritten;
160         int totalWritten = 0;
161         
162         while (bytes > 0) {
163                 bytesWritten = write(fd, &str[offset], bytes);
164                 if (bytesWritten < 1) break;
165                 totalWritten += bytesWritten;
166                 bytes -= bytesWritten;
167         }
168
169         return totalWritten;
170
171 } // blockingWrite
172 /*!
173  * Print out a line of output to all our destinations
174  *
175  * One by one, output a line of text to all of our output destinations.
176  *
177  * Return failure if we fail to output to any of them.
178  *
179  * @param format   Normal printf style vararg format
180  *
181  * @returns nothing// bytes written, or error on failure.
182  */
183 static void outputPrintf(char *format, ...)
184 {
185         // Our output line of text
186         static char outputLine[MAX_LINE_LEN];
187         //int bytesInLine;
188         va_list ap;
189         //int return_value=0;
190
191         va_start(ap, format);
192
193         return lprintf(CTDL_EMERG, format, ap);
194
195 /*
196         bytesInLine = vsnprintf(outputLine, MAX_LINE_LEN-1, format, ap);
197         if (bytesInLine > -1 && bytesInLine < (MAX_LINE_LEN-1)) {
198                 // We're a happy camper -- start printing
199                 if (gbl_params.filename) {
200                         // append to our file -- hopefully it's been opened
201                         if (gbl_fd != -1) {
202                            if (blockingWrite(outputLine, bytesInLine, gbl_fd)) {
203                                         return_value=-2;
204                            }
205                         }
206                 }
207
208                 // Write to our file pointer
209                 if (gbl_params.filep != NULL) {
210                         if (fwrite(outputLine, bytesInLine, 1, gbl_params.filep) != 1) {
211                                 return_value=-3;
212                         }
213                         fflush(gbl_params.filep);
214                 }
215
216                 // Write to our fd
217                 if (gbl_params.fd != -1) {
218                    if (blockingWrite(outputLine, bytesInLine, gbl_params.fd)) {
219                                 return_value=-4;
220                    }
221                 }
222         } else {
223                 // We overran our string.
224                 return_value=-1;
225         }
226 */
227 } // outputPrintf
228
229 /*!
230  * Initialize our output (open files, etc)
231  *
232  * This file initializes all output streams, since we're about
233  * to have output.
234  *
235  */
236 static void outputInit( void )
237 {
238         if (gbl_params.filename) {
239                 /* First try append */
240                 gbl_fd = open(gbl_params.filename, O_WRONLY|O_APPEND);
241                 if (gbl_fd < 0) {
242                         gbl_fd = open(gbl_params.filename, O_RDWR|O_CREAT,
243                                                   S_IREAD|S_IWRITE|S_IRGRP|S_IROTH); // 0644
244                         if (gbl_fd < 0) {
245                                 gbl_fd = -1;
246                         }
247                 }
248         }
249 } // outputInit
250
251
252 /*!
253  * Finalize our output (close files, etc)
254  *
255  * This file closes all output streams.
256  *
257  */
258 static void outputFini( void )
259 {
260 /* -> these seem to run into mutexes in the libc...
261         if (gbl_fd > -1)
262                 close(gbl_fd);
263
264         if (gbl_params.filep != NULL)
265                 fclose(gbl_params.filep);
266
267         if (gbl_params.fd > -1)
268                 close(gbl_params.fd);
269 */
270         // Just in case someone tries to call outputPrintf after outputFini
271         gbl_fd = gbl_params.fd = -1;
272         gbl_params.filep = NULL;
273
274         sync();
275
276 } // outputFini
277
278 static void *lookupClosestSymbol(eCrashSymbolTable *table,
279                                                  void *address)
280 {
281         int addr;
282         eCrashSymbol *last=NULL;
283
284         // For now, use a linear lookup.
285         DPRINTF(ECRASH_DEBUG_VERBOSE,
286                                         "Looking for %p in %d symbols\n", address, table->numSymbols);
287         for (addr=0; addr < table->numSymbols; addr++) {
288                 DPRINTF(ECRASH_DEBUG_VERBOSE,
289                         "  Examining [%d] %p\n", addr,
290                                                 table->symbols[addr].address);
291                 if (table->symbols[addr].address > address) {
292                         break;
293                 }
294                 last = &table->symbols[addr];
295         }
296
297         // last will either be NULL, or the last address less than the
298         // one we're looking for.
299         DPRINTF(ECRASH_DEBUG_VERBOSE,
300                                         "Returning %s (%p)\n", last?last->function:"(nil)",
301                     last?last->address:0);
302         return last;
303
304 } // lookupClosestSymbol
305
306 /*!
307  * Dump our backtrace into a global location
308  *
309  * This function will dump out our backtrace into our
310  * global holding area.
311  *
312  */
313 static void createGlobalBacktrace( void )
314 {
315
316         gbl_backtraceEntries = backtrace(gbl_backtraceBuffer,
317                                                          gbl_params.maxStackDepth);
318
319         /* This is NOT signal safe -- it calls malloc.  We need to
320            let the caller pass in a pointer to a symbol table inside of
321            our params. TODO */
322
323         if (!gbl_params.symbolTable) {
324                 if (gbl_params.useBacktraceSymbols != FALSE) {
325                         gbl_backtraceSymbols = backtrace_symbols(gbl_backtraceBuffer,
326                                                                                  gbl_backtraceEntries);
327                 }
328         }
329
330 } /* createGlobalBacktrace */
331
332 /*!
333  * Print out (to all the fds, etc), or global backtrace
334  */
335 static void outputGlobalBacktrace ( void )
336 {
337         int i;
338
339         for (i=0; i < gbl_backtraceEntries; i++) {
340                 if (gbl_params.symbolTable) {
341                         eCrashSymbol *symbol;
342
343                         symbol = lookupClosestSymbol(gbl_params.symbolTable,
344                                                                  gbl_backtraceBuffer[i]);
345                                 
346                         if (symbol) {
347                         outputPrintf("*      Frame %02d: %s+%u\n",
348                                                                 i, symbol->function,
349                                                                 gbl_backtraceBuffer[i] - symbol->address);
350                         } else {
351                                 outputPrintf("*      Frame %02d: %p\n", i,
352                                                                 gbl_backtraceBuffer[i]);
353                         }
354                 } else {
355                         if (gbl_backtraceSymbols != FALSE) {
356                         outputPrintf("*      Frame %02d: %s\n",
357                                                                 i, gbl_backtraceSymbols[i]);
358                         } else {
359                                 outputPrintf("*      Frame %02d: %p\n", i,
360                                                                 gbl_backtraceBuffer[i]);
361                         }
362                 } // symbolTable
363         }
364 } // outputGlobalBacktrace
365
366 /*!
367  * Output our current stack's backtrace
368  */
369 static void outputBacktrace( void )
370 {
371         createGlobalBacktrace();
372         outputGlobalBacktrace();
373 } /* outputBacktrace */
374
375 static void outputBacktraceThreads( void )
376 {
377         ThreadListNode *probe;
378         int i;
379
380         // When we're backtracing, don't worry about the mutex . . hopefully
381         // we're in a safe place.
382
383         for (probe=ThreadList; probe; probe=probe->Next) {
384                 gbl_backtraceDoneFlag = 0;
385                 pthread_kill(probe->thread, probe->backtraceSignal);
386                 for (i=0; i < gbl_params.threadWaitTime; i++) {
387                         if (gbl_backtraceDoneFlag)
388                                 break;
389                         sleep(1);
390                 }
391                 if (gbl_backtraceDoneFlag) {
392                         outputPrintf("*  Backtrace of \"%s\" (0x%08x)\n", 
393                                                  probe->threadName, (unsigned int)probe->thread);
394                         outputGlobalBacktrace();
395                 } else {
396                         outputPrintf("*  Error: unable to get backtrace of \"%s\" (0x%08x)\n", 
397                                                  probe->threadName, (unsigned int)probe->thread);
398                 }
399                 outputPrintf("*\n");
400         }
401 } // outputBacktraceThreads
402
403
404 /*!
405  * Handle signals (crash signals)
406  *
407  * This function will catch all crash signals, and will output the
408  * crash dump.  
409  *
410  * It will physically write (and sync) the current thread's information
411  * before it attempts to send signals to other threads.
412  * 
413  * @param signum Signal received.
414  */
415 static void crash_handler(int signo)
416 {
417         outputInit();
418         outputPrintf("*********************************************************\n");
419         outputPrintf("*               eCrash Crash Handler\n");
420         outputPrintf("*********************************************************\n");
421         outputPrintf("*\n");
422         outputPrintf("*  Got a crash! signo=%d\n", signo);
423         outputPrintf("*\n");
424         outputPrintf("*  Offending Thread's Backtrace:\n");
425         outputPrintf("*\n");
426         outputBacktrace();
427         outputPrintf("*\n");
428
429         if (gbl_params.dumpAllThreads != FALSE) {
430                 outputBacktraceThreads();
431         }
432
433         outputPrintf("*\n");
434         outputPrintf("*********************************************************\n");
435         outputPrintf("*               eCrash Crash Handler\n");
436         outputPrintf("*********************************************************\n");
437
438         outputFini();
439
440         exit(signo);
441 } // crash_handler
442
443 /*!
444  * Handle signals (bt signals)
445  *
446  * This function shoudl be called to generate a crashdump into our
447  * global area.  Once the dump has been completed, this function will
448  * return after tickling a global.  Since mutexes are not async
449  * signal safe, the main thread, after signaling us to generate our
450  * own backtrace, will sleep for a few seconds waiting for us to complete.
451  *
452  * @param signum Signal received.
453  */
454 static void bt_handler(int signo)
455 {
456         createGlobalBacktrace();
457         gbl_backtraceDoneFlag=1;
458 } // bt_handler
459
460 /*!
461  * Validate a passed-in symbol table
462  *
463  * For now, just print it out (if verbose), and make sure it's
464  * sorted and none of the pointers are zero.
465  */
466 static int ValidateSymbolTable( void )
467 {
468         int i;
469         int rc=0;
470         unsigned long lastAddress =0;
471
472         // Get out of here if the table is empty
473         if (!gbl_params.symbolTable) return 0;
474
475         // Dump it in verbose mode
476         DPRINTF(ECRASH_DEBUG_VERBOSE,
477                                         "Symbol Table Provided with %d symbols\n",
478                                         gbl_params.symbolTable->numSymbols);
479         for (i=0; i < gbl_params.symbolTable->numSymbols; i++){
480                 // Dump it in verbose mode
481                 DPRINTF(ECRASH_DEBUG_VERBOSE, 
482                                 "%-30s %p\n",
483                                 gbl_params.symbolTable->symbols[i].function,
484                                 gbl_params.symbolTable->symbols[i].address);
485                 if (lastAddress >
486                     (unsigned long)gbl_params.symbolTable->symbols[i].address) {
487                         DPRINTF(ECRASH_DEBUG_ERROR,
488                                         "Error: symbol table is not sorted (last=%p, current=%p)\n",
489                                         (void *)lastAddress,
490                                         gbl_params.symbolTable->symbols[i].address);
491                         rc = -1;
492                 }
493
494         } // for
495
496         return rc;
497         
498 } // ValidateSymbolTable
499
500 /*********************************************************************
501  *********************************************************************
502  **      P  U  B  L  I  C      F  U  N  C  T  I  O  N  S
503  *********************************************************************
504  ********************************************************************/
505
506 /*!
507  * Initialize eCrash.
508  * 
509  * This function must be called before calling any other eCrash
510  * functions.  It sets up the global behavior of the system, and
511  * registers the calling thread for crash dumps.
512  *
513  * @param params Our input parameters.  The passed in structure will be copied.
514  *
515  * @return Zero on success.
516  */
517 int eCrash_Init(eCrashParameters *params)
518 {
519         int sigIndex;
520         int ret = 0;
521 #ifdef DO_SIGNALS_RIGHT
522         sigset_t blocked;
523         struct sigaction act;
524 #endif
525
526         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,"Init Starting params = %p\n", params);
527
528         // Allocate our backtrace area
529         gbl_backtraceBuffer = malloc(sizeof(void *) * (params->maxStackDepth+5));
530
531 #ifdef DO_SIGNALS_RIGHT
532         sigemptyset(&blocked);
533         act.sa_sigaction = crash_handler;
534         act.sa_mask = blocked;
535         act.sa_flags = SA_SIGINFO;
536 #endif
537
538         if (params != NULL) {
539                 // Make ourselves a global copy of params.
540                 gbl_params = *params;
541                 gbl_params.filename = strdup(params->filename);
542
543                 // Set our defaults, if they weren't specified
544                 if (gbl_params.maxStackDepth == 0 )
545                         gbl_params.maxStackDepth = ECRASH_DEFAULT_STACK_DEPTH;
546
547                 if (gbl_params.defaultBacktraceSignal == 0 )
548                         gbl_params.defaultBacktraceSignal = ECRASH_DEFAULT_BACKTRACE_SIGNAL;
549
550                 if (gbl_params.threadWaitTime == 0 )
551                         gbl_params.threadWaitTime = ECRASH_DEFAULT_THREAD_WAIT_TIME;
552
553                 if (gbl_params.debugLevel == 0 )
554                         gbl_params.debugLevel = ECRASH_DEBUG_DEFAULT;
555
556                 // Copy our symbol table
557                 if (gbl_params.symbolTable) {
558                     DPRINTF(ECRASH_DEBUG_VERBOSE,
559                                                         "symbolTable @ %p -- %d symbols\n", gbl_params.symbolTable,
560                                                 gbl_params.symbolTable->numSymbols);
561                         // Make a copy of our symbol table
562                         gbl_params.symbolTable = malloc(sizeof(eCrashSymbolTable));
563                         memcpy(gbl_params.symbolTable, params->symbolTable,
564                                    sizeof(eCrashSymbolTable));
565
566                         // Now allocate / copy the actual table.
567                         gbl_params.symbolTable->symbols = malloc(sizeof(eCrashSymbol) *
568                                                                      gbl_params.symbolTable->numSymbols);
569                         memcpy(gbl_params.symbolTable->symbols,
570                                    params->symbolTable->symbols,
571                                    sizeof(eCrashSymbol) * gbl_params.symbolTable->numSymbols);
572
573                         ValidateSymbolTable();
574                 }
575         
576                 // And, finally, register for our signals
577                 for (sigIndex=0; gbl_params.signals[sigIndex] != 0; sigIndex++) {
578                         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE,
579                                                         "   Catching signal[%d] %d\n", sigIndex,
580                                         gbl_params.signals[sigIndex]);
581
582                         // I know there's a better way to catch signals with pthreads.
583                         // I'll do it later TODO
584                         signal(gbl_params.signals[sigIndex], crash_handler);
585                 }
586         } else {
587                 DPRINTF(ECRASH_DEBUG_ERROR, "   Error:  Null Params!\n");
588                 ret = -1;
589         }
590         DPRINTF(ECRASH_DEBUG_VERY_VERBOSE, "Init Complete ret=%d\n", ret);
591         return ret;
592 } /* eCrash_Init */
593
594 /*!
595  * UnInitialize eCrash.
596  * 
597  * This function may be called to de-activate eCrash, release the
598  * signal handlers, and free any memory allocated by eCrash.
599  *
600  * @return Zero on success.
601  */
602 int eCrash_Uninit( void )
603 {
604         NIY();
605
606         return 0;
607 } /* eCrash_Uninit */
608
609 /*!
610  * Register a thread for backtracing on crash.
611  * 
612  * This function must be called by any thread wanting it's stack
613  * dumped in the event of a crash.  The thread my specify what 
614  * signal should be used, or the default, SIGUSR1 will be used.
615  *
616  * @param signo Signal to use to generate dump (default: SIGUSR1)
617  *
618  * @return Zero on success.
619  */
620 int eCrash_RegisterThread(char *name, int signo)
621 {
622         sighandler_t old_handler;
623
624         // Register for our signal
625         if (signo == 0) {
626                 signo = gbl_params.defaultBacktraceSignal;
627         }
628
629         old_handler = signal(signo, bt_handler);
630         return addThreadToList(name, pthread_self(), signo, old_handler);
631
632 } /* eCrash_RegisterThread */
633
634 /*!
635  * Un-register a thread for stack dumps.
636  * 
637  * This function may be called to un-register any previously 
638  * registered thread.
639  *
640  * @return Zero on success.
641  */
642 int eCrash_UnregisterThread( void )
643 {
644         return removeThreadFromList(pthread_self());
645 } /* eCrash_UnregisterThread */
646