Send crashmessage just once per day.
[citadel.git] / citadel / sysdep.c
index 8f71b7ea41ec3c4a09882d727ed3454ca417f308..787a93a3df8eae8cb16b3ab76232e1af009f8fea 100644 (file)
  * Copyright (c) 1987-2011 by the citadel.org team
  *
  * This program is open source software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
+ * it under the terms of the GNU General Public License, version 3.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include "sysdep.h"
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <ctype.h>
+
+#include <errno.h>
 #include <signal.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/wait.h>
-#include <sys/socket.h>
+#include <stdio.h>
 #include <syslog.h>
 #include <sys/syslog.h>
 
-#if TIME_WITH_SYS_TIME
-# include <sys/time.h>
-# include <time.h>
-#else
-# if HAVE_SYS_TIME_H
-#  include <sys/time.h>
-# else
-#  include <time.h>
-# endif
-#endif
 
-#include <limits.h>
-#include <sys/resource.h>
+#include <sys/un.h>
+#include <sys/types.h>
+#include <sys/socket.h>
 #include <netinet/in.h>
+#include <arpa/inet.h>
 #include <netinet/tcp.h>
 #include <arpa/inet.h>
-#include <netdb.h>
-#include <sys/un.h>
-#include <string.h>
-#include <pwd.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <grp.h>
+
 #define SHOW_ME_VAPPEND_PRINTF
 #include <libcitadel.h>
-#include "citadel.h"
-#include "server.h"
-#include "sysdep_decls.h"
-#include "citserver.h"
-#include "support.h"
-#include "config.h"
-#include "database.h"
-#include "housekeeping.h"
-#include "modules/crypto/serv_crypto.h"        /* Needed for init_ssl, client_write_ssl, client_read_ssl, destruct_ssl */
-#include "ecrash.h"
-#include "context.h"
-
-#ifdef HAVE_SYS_SELECT_H
-#include <sys/select.h>
-#endif
-
-#ifndef HAVE_SNPRINTF
-#include "snprintf.h"
-#endif
 
+#include "citserver.h"
 #include "ctdl_module.h"
-#include "threads.h"
-#include "user_ops.h"
-#include "control.h"
 
+#include "sysdep_decls.h"
+#include "modules/crypto/serv_crypto.h"        /* Needed for init_ssl, client_write_ssl, client_read_ssl, destruct_ssl */
 
+#include "housekeeping.h"
+#include "context.h"
 /*
  * Signal handler to shut down the server.
  */
@@ -448,14 +405,16 @@ int client_write(const char *buf, int nbytes)
                snprintf(fn, SIZ, "/tmp/foolog_%s.%d", Ctx->ServiceName, Ctx->cs_pid);
                
                fd = fopen(fn, "a+");
-               if (fd)
-               {
-                   fprintf(fd, "Sending: BufSize: %d BufContent: [",
-                           nbytes);
-                   rv = fwrite(buf, nbytes, 1, fd);
-                   fprintf(fd, "]\n");
-                   fclose(fd);
+               if (fd == NULL) {
+                       syslog(LOG_EMERG, "failed to open file %s: %s", fn, strerror(errno));
+                       cit_backtrace();
+                       exit(1);
                }
+               fprintf(fd, "Sending: BufSize: %d BufContent: [",
+                       nbytes);
+               rv = fwrite(buf, nbytes, 1, fd);
+               fprintf(fd, "]\n");
+               fclose(fd);
        }
 #endif
 //     flush_client_inbuf();
@@ -573,6 +532,11 @@ int client_read_blob(StrBuf *Target, int bytes, int timeout)
                snprintf(fn, SIZ, "/tmp/foolog_%s.%d", CCC->ServiceName, CCC->cs_pid);
                        
                fd = fopen(fn, "a+");
+               if (fd == NULL) {
+                       syslog(LOG_EMERG, "failed to open file %s: %s", fn, strerror(errno));
+                       cit_backtrace();
+                       exit(1);
+               }
                fprintf(fd, "Reading BLOB: BufSize: %d ",
                        bytes);
                rv = fwrite(ChrPtr(Target), StrLength(Target), 1, fd);
@@ -589,6 +553,11 @@ int client_read_blob(StrBuf *Target, int bytes, int timeout)
                snprintf(fn, SIZ, "/tmp/foolog_%s.%d", CCC->ServiceName, CCC->cs_pid);
                
                fd = fopen(fn, "a+");
+               if (fd == NULL) {
+                       syslog(LOG_EMERG, "failed to open file %s: %s", fn, strerror(errno));
+                       cit_backtrace();
+                       exit(1);
+               }
                fprintf(fd, "Read: %d BufContent: [",
                        StrLength(Target));
                rv = fwrite(ChrPtr(Target), StrLength(Target), 1, fd);
@@ -609,6 +578,11 @@ int client_read_blob(StrBuf *Target, int bytes, int timeout)
                snprintf(fn, SIZ, "/tmp/foolog_%s.%d", CCC->ServiceName, CCC->cs_pid);
                        
                fd = fopen(fn, "a+");
+               if (fd == NULL) {
+                       syslog(LOG_EMERG, "failed to open file %s: %s", fn, strerror(errno));
+                       cit_backtrace();
+                       exit(1);
+               }
                fprintf(fd, "Reading BLOB: BufSize: %d ",
                        bytes);
                rv = fwrite(ChrPtr(Target), StrLength(Target), 1, fd);
@@ -634,6 +608,11 @@ int client_read_blob(StrBuf *Target, int bytes, int timeout)
                snprintf(fn, SIZ, "/tmp/foolog_%s.%d", CCC->ServiceName, CCC->cs_pid);
                
                fd = fopen(fn, "a+");
+               if (fd == NULL) {
+                       syslog(LOG_EMERG, "failed to open file %s: %s", fn, strerror(errno));
+                       cit_backtrace();
+                       exit(1);
+               }
                fprintf(fd, "Read: %d BufContent: [",
                        StrLength(Target));
                rv = fwrite(ChrPtr(Target), StrLength(Target), 1, fd);
@@ -688,6 +667,11 @@ int client_read_random_blob(StrBuf *Target, int timeout)
                                snprintf(fn, SIZ, "/tmp/foolog_%s.%d", CCC->ServiceName, CCC->cs_pid);
                        
                                fd = fopen(fn, "a+");
+                               if (fd == NULL) {
+                                       syslog(LOG_EMERG, "failed to open file %s: %s", fn, strerror(errno));
+                                       cit_backtrace();
+                                       exit(1);
+                               }
                                fprintf(fd, "Read: BufSize: %d BufContent: [",
                                        StrLength(Target));
                                rv = fwrite(ChrPtr(Target), StrLength(Target), 1, fd);
@@ -771,6 +755,11 @@ int CtdlClientGetLine(StrBuf *Target)
                snprintf(fn, SIZ, "/tmp/foolog_%s.%d", CCC->ServiceName, CCC->cs_pid);
 
                fd = fopen(fn, "a+");
+               if (fd == NULL) {
+                       syslog(LOG_EMERG, "failed to open file %s: %s", fn, strerror(errno));
+                       cit_backtrace();
+                       exit(1);
+               }
                pch = ChrPtr(CCC->RecvBuf.Buf);
                len = StrLength(CCC->RecvBuf.Buf);
                if (CCC->RecvBuf.ReadWritePointer != NULL)
@@ -824,6 +813,11 @@ int CtdlClientGetLine(StrBuf *Target)
                snprintf(fn, SIZ, "/tmp/foolog_%s.%d", CCC->ServiceName, CCC->cs_pid);
 
                fd = fopen(fn, "a+");
+               if (fd == NULL) {
+                       syslog(LOG_EMERG, "failed to open file %s: %s", fn, strerror(errno));
+                       cit_backtrace();
+                       exit(1);
+               }
                pch = ChrPtr(CCC->RecvBuf.Buf);
                len = StrLength(CCC->RecvBuf.Buf);
                if (CCC->RecvBuf.ReadWritePointer != NULL)
@@ -918,7 +912,8 @@ int client_getln(char *buf, int bufsize)
 void close_masters (void)
 {
        struct ServiceFunctionHook *serviceptr;
-       
+       const char *Text;
+
        /*
         * close all protocol master sockets
         */
@@ -927,18 +922,37 @@ void close_masters (void)
 
                if (serviceptr->tcp_port > 0)
                {
-                       syslog(LOG_INFO, "Closing %d listener on port %d\n",
+                       if (serviceptr->msock == -1)
+                               Text = "not closing again";
+                       else
+                               Text = "Closing";
+                                       
+                       syslog(LOG_INFO, "%s %d listener on port %d\n",
+                              Text,
                               serviceptr->msock,
                               serviceptr->tcp_port);
                        serviceptr->tcp_port = 0;
                }
                
                if (serviceptr->sockpath != NULL)
-                       syslog(LOG_INFO, "Closing %d listener on '%s'\n",
+               {
+                       if (serviceptr->msock == -1)
+                               Text = "not closing again";
+                       else
+                               Text = "Closing";
+
+                       syslog(LOG_INFO, "%s %d listener on '%s'\n",
+                              Text,
                               serviceptr->msock,
                               serviceptr->sockpath);
+               }
+
                 if (serviceptr->msock != -1)
+               {
                        close(serviceptr->msock);
+                       serviceptr->msock = -1;
+               }
+
                /* If it's a Unix domain socket, remove the file. */
                if (serviceptr->sockpath != NULL) {
                        unlink(serviceptr->sockpath);
@@ -969,9 +983,11 @@ void sysdep_master_cleanup(void) {
        CtdlDestroyCleanupHooks();
        CtdlDestroyFixedOutputHooks();  
        CtdlDestroySessionHooks();
+       CtdlDestroyTDAPVetoHooks();
        CtdlDestroyServiceHook();
        CtdlDestroyRoomHooks();
        CtdlDestroySearchHooks();
+       CtdlDestroyDebugTable();
        #ifdef HAVE_BACKTRACE
 ///    eCrash_Uninit();
        #endif
@@ -1051,7 +1067,7 @@ void start_daemon(int unused) {
                        }
                        waitpid(current_child, &status, 0);
                }
-               do_restart = 0;
+
                nFireUpsNonRestart = nFireUps;
                
                /* Exit code 0 means the watcher should exit */
@@ -1084,7 +1100,8 @@ void checkcrash(void)
        if (nFireUpsNonRestart != nFireUps)
        {
                StrBuf *CrashMail;
-
+               const char *msgs[1] = {"crash"};
+               const long lens[1] = {sizeof("crash") - 1};
                CrashMail = NewStrBuf();
                syslog(LOG_ALERT, "Posting crash message\n");
                StrBufPrintf(CrashMail, 
@@ -1095,13 +1112,17 @@ void checkcrash(void)
                        "factor.\n \n"
                        " You can obtain more information about this by enabling core dumps.\n \n"
                        " For more information, please see:\n \n"
-                       " http://citadel.org/doku.php/faq:mastering_your_os:gdb#how.do.i.make.my.system.produce.core-files"
+                       " http://citadel.org/doku.php?id=faq:mastering_your_os:gdb#how.do.i.make.my.system.produce.core-files"
                        "\n \n"
 
                        " If you have already done this, the core dump is likely to be found at %score.%d\n"
                        ,
                        ctdl_run_dir, ForkedPid);
-               CtdlAideMessage(ChrPtr(CrashMail), "Citadel server process terminated unexpectedly");
+               CtdlAideFPMessage(ChrPtr(CrashMail),
+                                 "Citadel server process terminated unexpectedly",
+                                 1, msgs, lens,
+                                 0, 0,
+                                 time(NULL));
                FreeStrBuf(&CrashMail);
        }
 }
@@ -1113,7 +1134,7 @@ void checkcrash(void)
  */
 int convert_login(char NameToConvert[]) {
        struct passwd *pw;
-       int a;
+       unsigned int a;
 
        pw = getpwnam(NameToConvert);
        if (pw == NULL) {
@@ -1130,6 +1151,87 @@ int convert_login(char NameToConvert[]) {
 
 
 
+void HuntBadSession(void)
+{
+       int highest;
+       CitContext *ptr;
+       fd_set readfds;
+       struct timeval tv;
+       struct ServiceFunctionHook *serviceptr;
+
+
+
+       /* Next, add all of the client sockets. */
+       begin_critical_section(S_SESSION_TABLE);
+       for (ptr = ContextList; ptr != NULL; ptr = ptr->next) {
+               if ((ptr->state == CON_SYS) && (ptr->client_socket == 0))
+                       continue;
+               /* Initialize the fdset. */
+               FD_ZERO(&readfds);
+               highest = 0;
+               tv.tv_sec = 0;          /* wake up every second if no input */
+               tv.tv_usec = 0;
+
+               /* Don't select on dead sessions, only truly idle ones */
+               if (    (ptr->state == CON_IDLE)
+                       && (ptr->kill_me == 0)
+                       && (ptr->client_socket > 0)
+                       ) {
+                       FD_SET(ptr->client_socket, &readfds);
+                       if (ptr->client_socket > highest)
+                               highest = ptr->client_socket;
+                       
+                       if ((select(highest + 1, &readfds, NULL, NULL, &tv) < 0) &&
+                           (errno == EBADF))
+                       {
+                               /* Gotcha! */
+                               syslog(LOG_EMERG,
+                                      "Killing Session CC[%d] bad FD: [%d:%d] User[%s] Host[%s:%s]\n",
+                                      ptr->cs_pid,
+                                      ptr->client_socket,
+                                      ptr->is_local_socket,
+                                      ptr->curr_user,
+                                      ptr->cs_host,ptr->cs_addr);
+
+                               ptr->kill_me = 1;
+                               ptr->client_socket = -1;
+                               break;
+                       }
+               }
+               
+       }
+       end_critical_section(S_SESSION_TABLE);
+
+
+       /* First, add the various master sockets to the fdset. */
+       for (serviceptr = ServiceHookTable; serviceptr != NULL; serviceptr = serviceptr->next ) {
+
+               /* Initialize the fdset. */
+               highest = 0;
+               tv.tv_sec = 0;          /* wake up every second if no input */
+               tv.tv_usec = 0;
+
+               FD_SET(serviceptr->msock, &readfds);
+               if (serviceptr->msock > highest) {
+                       highest = serviceptr->msock;
+               }
+               if ((select(highest + 1, &readfds, NULL, NULL, &tv) < 0) &&
+                   (errno == EBADF))
+               {
+                       /* Gotcha! server socket dead? commit suicide! */
+                       syslog(LOG_EMERG,
+                              "Found bad FD: %d and its a server socket! Shutting Down!\n",
+                              serviceptr->msock);
+
+                       server_shutting_down = 1;
+                       break;
+               }
+       }
+
+
+}
+
+const char *WorkerLogStr = "W";
 /* 
  * This loop just keeps going and going and going...
  */
@@ -1146,7 +1248,11 @@ void *worker_thread(void *blah) {
        CitContext *con = NULL;         /* Temporary context pointer */
        int i;
 
+       pthread_mutex_lock(&ThreadCountMutex);
        ++num_workers;
+       pthread_mutex_unlock(&ThreadCountMutex);
+
+       pthread_setspecific(evConKey, WorkerLogStr);
 
        while (!server_shutting_down) {
 
@@ -1221,7 +1327,8 @@ do_select:        force_purge = 0;
                 */
                if (retval < 0) {
                        if (errno == EBADF) {
-                               syslog(LOG_NOTICE, "select() failed: (%s)\n", strerror(errno));
+                               syslog(LOG_EMERG, "select() failed: (%s)\n", strerror(errno));
+                               HuntBadSession ();
                                goto do_select;
                        }
                        if (errno != EINTR) {
@@ -1271,6 +1378,7 @@ do_select:        force_purge = 0;
                                        con = CreateNewContext();
 
                                        /* Assign our new socket number to it. */
+                                       con->tcp_port = serviceptr->tcp_port;
                                        con->client_socket = ssock;
                                        con->h_command_function = serviceptr->h_command_function;
                                        con->h_async_function = serviceptr->h_async_function;
@@ -1327,7 +1435,10 @@ do_select:       force_purge = 0;
 
 SKIP_SELECT:
                /* We're bound to a session */
+               pthread_mutex_lock(&ThreadCountMutex);
                ++active_workers;
+               pthread_mutex_unlock(&ThreadCountMutex);
+
                if (bind_me != NULL) {
                        become_session(bind_me);
 
@@ -1361,11 +1472,23 @@ SKIP_SELECT:
 
                dead_session_purge(force_purge);
                do_housekeeping();
+
+               pthread_mutex_lock(&ThreadCountMutex);
                --active_workers;
+               if ((active_workers + config.c_min_workers < num_workers) &&
+                   (num_workers > config.c_min_workers))
+               {
+                       num_workers--;
+                       pthread_mutex_unlock(&ThreadCountMutex);
+                       return (NULL);
+               }
+               pthread_mutex_unlock(&ThreadCountMutex);
        }
 
        /* If control reaches this point, the server is shutting down */
+       pthread_mutex_lock(&ThreadCountMutex);
        --num_workers;
+       pthread_mutex_unlock(&ThreadCountMutex);
        return(NULL);
 }