Add handler for recovery of bad file descriptors
[citadel.git] / citadel / sysdep.c
index 31a9c7ba150b732ffd594a8ae037aacd7dde9960..f8ff6df46eb0b0c06aa7132e6c80ecdfd12c0bd7 100644 (file)
  * Copyright (c) 1987-2011 by the citadel.org team
  *
  * This program is open source software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
+ * it under the terms of the GNU General Public License, version 3.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include "sysdep.h"
 #include <sys/select.h>
 #endif
 
-#ifndef HAVE_SNPRINTF
-#include "snprintf.h"
-#endif
-
 #include "ctdl_module.h"
 #include "threads.h"
 #include "user_ops.h"
@@ -918,7 +908,8 @@ int client_getln(char *buf, int bufsize)
 void close_masters (void)
 {
        struct ServiceFunctionHook *serviceptr;
-       
+       const char *Text;
+
        /*
         * close all protocol master sockets
         */
@@ -927,16 +918,37 @@ void close_masters (void)
 
                if (serviceptr->tcp_port > 0)
                {
-                       syslog(LOG_INFO, "Closing listener on port %d\n",
-                               serviceptr->tcp_port);
+                       if (serviceptr->msock == -1)
+                               Text = "not closing again";
+                       else
+                               Text = "Closing";
+                                       
+                       syslog(LOG_INFO, "%s %d listener on port %d\n",
+                              Text,
+                              serviceptr->msock,
+                              serviceptr->tcp_port);
                        serviceptr->tcp_port = 0;
                }
                
                if (serviceptr->sockpath != NULL)
-                       syslog(LOG_INFO, "Closing listener on '%s'\n",
-                               serviceptr->sockpath);
+               {
+                       if (serviceptr->msock == -1)
+                               Text = "not closing again";
+                       else
+                               Text = "Closing";
+
+                       syslog(LOG_INFO, "%s %d listener on '%s'\n",
+                              Text,
+                              serviceptr->msock,
+                              serviceptr->sockpath);
+               }
+
+                if (serviceptr->msock != -1)
+               {
+                       close(serviceptr->msock);
+                       serviceptr->msock = -1;
+               }
 
-               close(serviceptr->msock);
                /* If it's a Unix domain socket, remove the file. */
                if (serviceptr->sockpath != NULL) {
                        unlink(serviceptr->sockpath);
@@ -967,9 +979,11 @@ void sysdep_master_cleanup(void) {
        CtdlDestroyCleanupHooks();
        CtdlDestroyFixedOutputHooks();  
        CtdlDestroySessionHooks();
+       CtdlDestroyTDAPVetoHooks();
        CtdlDestroyServiceHook();
        CtdlDestroyRoomHooks();
        CtdlDestroySearchHooks();
+       CtdlDestroyDebugTable();
        #ifdef HAVE_BACKTRACE
 ///    eCrash_Uninit();
        #endif
@@ -1049,7 +1063,7 @@ void start_daemon(int unused) {
                        }
                        waitpid(current_child, &status, 0);
                }
-               do_restart = 0;
+
                nFireUpsNonRestart = nFireUps;
                
                /* Exit code 0 means the watcher should exit */
@@ -1093,7 +1107,7 @@ void checkcrash(void)
                        "factor.\n \n"
                        " You can obtain more information about this by enabling core dumps.\n \n"
                        " For more information, please see:\n \n"
-                       " http://citadel.org/doku.php/faq:mastering_your_os:gdb#how.do.i.make.my.system.produce.core-files"
+                       " http://citadel.org/doku.php?id=faq:mastering_your_os:gdb#how.do.i.make.my.system.produce.core-files"
                        "\n \n"
 
                        " If you have already done this, the core dump is likely to be found at %score.%d\n"
@@ -1128,6 +1142,87 @@ int convert_login(char NameToConvert[]) {
 
 
 
+void HuntBadSession(void)
+{
+       int highest;
+       CitContext *ptr;
+       fd_set readfds;
+       struct timeval tv;
+       struct ServiceFunctionHook *serviceptr;
+
+
+
+       /* Next, add all of the client sockets. */
+       begin_critical_section(S_SESSION_TABLE);
+       for (ptr = ContextList; ptr != NULL; ptr = ptr->next) {
+               if ((ptr->state == CON_SYS) && (ptr->client_socket == 0))
+                       continue;
+               /* Initialize the fdset. */
+               FD_ZERO(&readfds);
+               highest = 0;
+               tv.tv_sec = 0;          /* wake up every second if no input */
+               tv.tv_usec = 0;
+
+               /* Don't select on dead sessions, only truly idle ones */
+               if (    (ptr->state == CON_IDLE)
+                       && (ptr->kill_me == 0)
+                       && (ptr->client_socket > 0)
+                       ) {
+                       FD_SET(ptr->client_socket, &readfds);
+                       if (ptr->client_socket > highest)
+                               highest = ptr->client_socket;
+                       
+                       if ((select(highest + 1, &readfds, NULL, NULL, &tv) < 0) &&
+                           (errno == EBADF))
+                       {
+                               /* Gotcha! */
+                               syslog(LOG_EMERG,
+                                      "Killing Session CC[%d] bad FD: [%d:%d] User[%s] Host[%s:%s]\n",
+                                      ptr->cs_pid,
+                                      ptr->client_socket,
+                                      ptr->is_local_socket,
+                                      ptr->curr_user,
+                                      ptr->cs_host,ptr->cs_addr);
+
+                               ptr->kill_me = 1;
+                               ptr->client_socket = -1;
+                               break;
+                       }
+               }
+               
+       }
+       end_critical_section(S_SESSION_TABLE);
+
+
+       /* First, add the various master sockets to the fdset. */
+       for (serviceptr = ServiceHookTable; serviceptr != NULL; serviceptr = serviceptr->next ) {
+
+               /* Initialize the fdset. */
+               highest = 0;
+               tv.tv_sec = 0;          /* wake up every second if no input */
+               tv.tv_usec = 0;
+
+               FD_SET(serviceptr->msock, &readfds);
+               if (serviceptr->msock > highest) {
+                       highest = serviceptr->msock;
+               }
+               if ((select(highest + 1, &readfds, NULL, NULL, &tv) < 0) &&
+                   (errno == EBADF))
+               {
+                       /* Gotcha! server socket dead? commit suicide! */
+                       syslog(LOG_EMERG,
+                              "Found bad FD: %d and its a server socket! Shutting Down!\n",
+                              serviceptr->msock);
+
+                       server_shutting_down = 1;
+                       break;
+               }
+       }
+
+
+}
+
+
 /* 
  * This loop just keeps going and going and going...
  */
@@ -1139,8 +1234,14 @@ void *worker_thread(void *blah) {
        int retval = 0;
        struct timeval tv;
        int force_purge = 0;
+       struct ServiceFunctionHook *serviceptr;
+       int ssock;                      /* Descriptor for client socket */
+       CitContext *con = NULL;         /* Temporary context pointer */
+       int i;
 
+       pthread_mutex_lock(&ThreadCountMutex);
        ++num_workers;
+       pthread_mutex_unlock(&ThreadCountMutex);
 
        while (!server_shutting_down) {
 
@@ -1155,6 +1256,15 @@ do_select:       force_purge = 0;
                FD_ZERO(&readfds);
                highest = 0;
 
+               /* First, add the various master sockets to the fdset. */
+               for (serviceptr = ServiceHookTable; serviceptr != NULL; serviceptr = serviceptr->next ) {
+                       FD_SET(serviceptr->msock, &readfds);
+                       if (serviceptr->msock > highest) {
+                               highest = serviceptr->msock;
+                       }
+               }
+
+               /* Next, add all of the client sockets. */
                begin_critical_section(S_SESSION_TABLE);
                for (ptr = ContextList; ptr != NULL; ptr = ptr->next) {
                        if ((ptr->state == CON_SYS) && (ptr->client_socket == 0))
@@ -1206,7 +1316,8 @@ do_select:        force_purge = 0;
                 */
                if (retval < 0) {
                        if (errno == EBADF) {
-                               syslog(LOG_NOTICE, "select() failed: (%s)\n", strerror(errno));
+                               syslog(LOG_EMERG, "select() failed: (%s)\n", strerror(errno));
+                               HuntBadSession ();
                                goto do_select;
                        }
                        if (errno != EINTR) {
@@ -1231,6 +1342,54 @@ do_select:       force_purge = 0;
                        }
                }
 
+               /* Next, check to see if it's a new client connecting * on a master socket. */
+
+               else if ((retval > 0) && (!server_shutting_down)) for (serviceptr = ServiceHookTable; serviceptr != NULL; serviceptr = serviceptr->next) {
+
+                       if (FD_ISSET(serviceptr->msock, &readfds)) {
+                               ssock = accept(serviceptr->msock, NULL, 0);
+                               if (ssock >= 0) {
+                                       syslog(LOG_DEBUG, "New client socket %d", ssock);
+
+                                       /* The master socket is non-blocking but the client
+                                        * sockets need to be blocking, otherwise certain
+                                        * operations barf on FreeBSD.  Not a fatal error.
+                                        */
+                                       if (fcntl(ssock, F_SETFL, 0) < 0) {
+                                               syslog(LOG_EMERG,
+                                                       "citserver: Can't set socket to blocking: %s\n",
+                                                       strerror(errno));
+                                       }
+
+                                       /* New context will be created already
+                                        * set up in the CON_EXECUTING state.
+                                        */
+                                       con = CreateNewContext();
+
+                                       /* Assign our new socket number to it. */
+                                       con->tcp_port = serviceptr->tcp_port;
+                                       con->client_socket = ssock;
+                                       con->h_command_function = serviceptr->h_command_function;
+                                       con->h_async_function = serviceptr->h_async_function;
+                                       con->h_greeting_function = serviceptr->h_greeting_function;
+                                       con->ServiceName = serviceptr->ServiceName;
+                                       
+                                       /* Determine whether it's a local socket */
+                                       if (serviceptr->sockpath != NULL) {
+                                               con->is_local_socket = 1;
+                                       }
+       
+                                       /* Set the SO_REUSEADDR socket option */
+                                       i = 1;
+                                       setsockopt(ssock, SOL_SOCKET, SO_REUSEADDR, &i, sizeof(i));
+                                       con->state = CON_GREETING;
+                                       retval--;
+                                       if (retval == 0)
+                                               break;
+                               }
+                       }
+               }
+
                /* It must be a client socket.  Find a context that has data
                 * waiting on its socket *and* is in the CON_IDLE state.  Any
                 * active sockets other than our chosen one are marked as
@@ -1265,7 +1424,10 @@ do_select:       force_purge = 0;
 
 SKIP_SELECT:
                /* We're bound to a session */
+               pthread_mutex_lock(&ThreadCountMutex);
                ++active_workers;
+               pthread_mutex_unlock(&ThreadCountMutex);
+
                if (bind_me != NULL) {
                        become_session(bind_me);
 
@@ -1299,132 +1461,28 @@ SKIP_SELECT:
 
                dead_session_purge(force_purge);
                do_housekeeping();
+
+               pthread_mutex_lock(&ThreadCountMutex);
                --active_workers;
+               if ((active_workers + config.c_min_workers < num_workers) &&
+                   (num_workers > config.c_min_workers))
+               {
+                       num_workers--;
+                       pthread_mutex_unlock(&ThreadCountMutex);
+                       return (NULL);
+               }
+               pthread_mutex_unlock(&ThreadCountMutex);
        }
 
        /* If control reaches this point, the server is shutting down */
+       pthread_mutex_lock(&ThreadCountMutex);
        --num_workers;
+       pthread_mutex_unlock(&ThreadCountMutex);
        return(NULL);
 }
 
 
 
-
-/*
- * A function to handle selecting on master sockets.
- * In other words it handles new connections.
- * It is a thread.
- */
-void *select_on_master(void *blah)
-{
-       struct ServiceFunctionHook *serviceptr;
-       fd_set master_fds;
-       int highest;
-       struct timeval tv;
-       int ssock;                      /* Descriptor for client socket */
-       CitContext *con = NULL;         /* Temporary context pointer */
-       int m;
-       int i;
-       int retval;
-
-       while (!server_shutting_down) {
-               /* Initialize the fdset. */
-               FD_ZERO(&master_fds);
-               highest = 0;
-
-               /* First, add the various master sockets to the fdset. */
-               for (serviceptr = ServiceHookTable; serviceptr != NULL;
-               serviceptr = serviceptr->next ) {
-                       m = serviceptr->msock;
-                       FD_SET(m, &master_fds);
-                       if (m > highest) {
-                               highest = m;
-                       }
-               }
-
-               if (!server_shutting_down) {
-                       tv.tv_sec = 60;         /* wake up every second if no input */
-                       tv.tv_usec = 0;
-                       retval = select(highest + 1, &master_fds, NULL, NULL, &tv);
-               }
-               else {
-                       retval = -1 ;
-               }
-
-               /* Now figure out who made this select() unblock.
-                * First, check for an error or exit condition.
-                */
-               if (retval < 0) {
-                       if (errno == EBADF) {
-                               syslog(LOG_NOTICE, "select() failed: (%s)\n",
-                                       strerror(errno));
-                               continue;
-                       }
-                       if (errno != EINTR) {
-                               syslog(LOG_EMERG, "Exiting (%s)\n", strerror(errno));
-                               server_shutting_down = 1;
-                       } else {
-#if 0
-                               syslog(LOG_DEBUG, "Interrupted CtdlThreadSelect.\n");
-#endif
-                               if (server_shutting_down) return(NULL);
-                               continue;
-                       }
-               }
-
-               /* Next, check to see if it's a new client connecting
-                * on a master socket.
-                */
-               else if ((retval > 0) && (!server_shutting_down)) for (serviceptr = ServiceHookTable; serviceptr != NULL; serviceptr = serviceptr->next) {
-
-                       if (FD_ISSET(serviceptr->msock, &master_fds)) {
-                               ssock = accept(serviceptr->msock, NULL, 0);
-                               if (ssock >= 0) {
-                                       syslog(LOG_DEBUG, "New client socket %d\n", ssock);
-
-                                       /* The master socket is non-blocking but the client
-                                        * sockets need to be blocking, otherwise certain
-                                        * operations barf on FreeBSD.  Not a fatal error.
-                                        */
-                                       if (fcntl(ssock, F_SETFL, 0) < 0) {
-                                               syslog(LOG_EMERG,
-                                                       "citserver: Can't set socket to blocking: %s\n",
-                                                       strerror(errno));
-                                       }
-
-                                       /* New context will be created already
-                                        * set up in the CON_EXECUTING state.
-                                        */
-                                       con = CreateNewContext();
-
-                                       /* Assign our new socket number to it. */
-                                       con->client_socket = ssock;
-                                       con->h_command_function = serviceptr->h_command_function;
-                                       con->h_async_function = serviceptr->h_async_function;
-                                       con->h_greeting_function = serviceptr->h_greeting_function;
-                                       con->ServiceName = serviceptr->ServiceName;
-                                       
-                                       /* Determine whether it's a local socket */
-                                       if (serviceptr->sockpath != NULL) {
-                                               con->is_local_socket = 1;
-                                       }
-       
-                                       /* Set the SO_REUSEADDR socket option */
-                                       i = 1;
-                                       setsockopt(ssock, SOL_SOCKET, SO_REUSEADDR, &i, sizeof(i));
-                                       con->state = CON_GREETING;
-                                       retval--;
-                                       if (retval == 0)
-                                               break;
-                               }
-                       }
-               }
-       }
-       return NULL;
-}
-
-
-
 /*
  * SyslogFacility()
  * Translate text facility name to syslog.h defined value.