Prevent the creation of new threads if system load climbs at shutdown
[citadel.git] / citadel / server_main.c
index 3c610aba915ae23e95de5a140df1216becc5f86d..27942365d7ba7cd90aa37c0e6dcd794de8b6fda8 100644 (file)
 #include <errno.h>
 #include <stdarg.h>
 #include <grp.h>
+#include <pwd.h>
 #ifdef HAVE_PTHREAD_H
 #include <pthread.h>
 #endif
 #ifdef HAVE_SYS_PRCTL_H
 #include <sys/prctl.h>
 #endif
+#include <libcitadel.h>
 #include "citadel.h"
 #include "server.h"
 #include "serv_extensions.h"
 #include "citserver.h"
 #include "support.h"
 #include "config.h"
+#include "control.h"
 #include "database.h"
+#include "user_ops.h"
 #include "housekeeping.h"
-#include "tools.h"
+#include "citadel_dirs.c"
+
+#include "modules_init.h"
+#include "ecrash.h"
 
 #ifdef HAVE_SYS_SELECT_H
 #include <sys/select.h>
 #ifndef HAVE_SNPRINTF
 #include "snprintf.h"
 #endif
+const char *CitadelServiceUDS="citadel-UDS";
+const char *CitadelServiceTCP="citadel-TCP";
+
+
+void go_threading(void);
 
-int running_as_daemon = 0;
 
 /*
  * Here's where it all begins.
@@ -70,22 +81,37 @@ int running_as_daemon = 0;
 int main(int argc, char **argv)
 {
        char facility[32];
-       int a, i;                       /* General-purpose variables */
-       struct passwd *pw;
+       int a;                  /* General-purpose variables */
+       struct passwd pw, *pwp = NULL;
+       char pwbuf[SIZ];
        int drop_root_perms = 1;
        size_t size;
-
+       int relh=0;
+       int home=0;
+       int dbg=0;
+       char relhome[PATH_MAX]="";
+       char ctdldir[PATH_MAX]=CTDLDIR;
+#ifdef HAVE_RUN_DIR
+       struct stat filestats;
+#endif
+#ifdef HAVE_BACKTRACE
+       eCrashParameters params;
+//     eCrashSymbolTable symbol_table;
+#endif
+       /* initialise semaphores here. Patch by Matt and davew
+        * its called here as they are needed by CtdlLogPrintf for thread safety
+        */
+       CtdlInitBase64Table();
+       InitialiseSemaphores();
+       
        /* initialize the master context */
        InitializeMasterCC();
 
-       /* set default syslog facility */
-       syslog_facility = LOG_DAEMON;
-
        /* parse command-line arguments */
        for (a=1; a<argc; ++a) {
 
                if (!strncmp(argv[a], "-l", 2)) {
-                       safestrncpy(facility, argv[a], sizeof(facility));
+                       safestrncpy(facility, &argv[a][2], sizeof(facility));
                        syslog_facility = SyslogFacility(facility);
                        enable_syslog = 1;
                }
@@ -101,9 +127,14 @@ int main(int argc, char **argv)
                }
 
                else if (!strncmp(argv[a], "-h", 2)) {
-                       safestrncpy(ctdl_home_directory, &argv[a][2],
-                                   sizeof ctdl_home_directory);
+                       relh=argv[a][2]!='/';
+                       if (!relh) safestrncpy(ctdl_home_directory, &argv[a][2],
+                                                                  sizeof ctdl_home_directory);
+                       else
+                               safestrncpy(relhome, &argv[a][2],
+                                                       sizeof relhome);
                        home_specified = 1;
+                       home=1;
                }
 
                else if (!strncmp(argv[a], "-t", 2)) {
@@ -114,6 +145,10 @@ int main(int argc, char **argv)
                        do_defrag = 1;
                }
 
+               else if (!strncmp(argv[a], "-D", 2)) {
+                       dbg = 1;
+               }
+
                /* -r tells the server not to drop root permissions. don't use
                 * this unless you know what you're doing. this should be
                 * removed in the next release if it proves unnecessary. */
@@ -122,10 +157,10 @@ int main(int argc, char **argv)
 
                /* any other parameter makes it crash and burn */
                else {
-                       lprintf(CTDL_EMERG,     "citserver: usage: "
+                       CtdlLogPrintf(CTDL_EMERG,       "citserver: usage: "
                                        "citserver "
                                        "[-lLogFacility] "
-                                       "[-d] [-f]"
+                                       "[-d] [-f] [-D] "
                                        " [-tTraceFile]"
                                        " [-xLogLevel] [-hHomeDir]\n");
                        exit(1);
@@ -133,44 +168,81 @@ int main(int argc, char **argv)
 
        }
 
+       calc_dirs_n_files(relh, home, relhome, ctdldir, dbg);
        /* daemonize, if we were asked to */
        if (running_as_daemon) {
                start_daemon(0);
                drop_root_perms = 1;
        }
 
-       /* initialize the syslog facility */
+#ifdef HAVE_BACKTRACE
+       bzero(&params, sizeof(params));
+       params.filename = file_pid_paniclog;
+       panic_fd=open(file_pid_paniclog, O_APPEND|O_CREAT|O_DIRECT);
+       params.filep = fopen(file_pid_paniclog, "a+");
+       params.debugLevel = ECRASH_DEBUG_VERBOSE;
+       params.dumpAllThreads = TRUE;
+       params.useBacktraceSymbols = 1;
+///    BuildSymbolTable(&symbol_table);
+//     params.symbolTable = &symbol_table;
+       params.signals[0]=SIGSEGV;
+       params.signals[1]=SIGILL;
+       params.signals[2]=SIGBUS;
+       params.signals[3]=SIGABRT;
+
+       eCrash_Init(&params);
+               
+       eCrash_RegisterThread("MasterThread", 0);
+
+///    signal(SIGSEGV, cit_panic_backtrace);
+#endif
+       /* Initialize the syslogger.  Yes, we are really using 0 as the
+        * facility, because we are going to bitwise-OR the facility to
+        * the severity of each message, allowing us to write to other
+        * facilities when we need to...
+        */
        if (enable_syslog) {
-               if (running_as_daemon) {
-                       openlog("citadel", LOG_NDELAY, syslog_facility);
-               }
-               else {
-                       openlog("citadel", LOG_PERROR|LOG_NDELAY,
-                               syslog_facility);
-               }
+               openlog("citadel", LOG_NDELAY, 0);
                setlogmask(LOG_UPTO(verbosity));
        }
        
        /* Tell 'em who's in da house */
-       lprintf(CTDL_NOTICE, "\n");
-       lprintf(CTDL_NOTICE, "\n");
-       lprintf(CTDL_NOTICE,
+       CtdlLogPrintf(CTDL_NOTICE, "\n");
+       CtdlLogPrintf(CTDL_NOTICE, "\n");
+       CtdlLogPrintf(CTDL_NOTICE,
                "*** Citadel server engine v%d.%02d ***\n",
                (REV_LEVEL/100), (REV_LEVEL%100));
-       lprintf(CTDL_NOTICE,
-               "Copyright (C) 1987-2005 by the Citadel development team.\n");
-       lprintf(CTDL_NOTICE,
+       CtdlLogPrintf(CTDL_NOTICE,
+               "Copyright (C) 1987-2007 by the Citadel development team.\n");
+       CtdlLogPrintf(CTDL_NOTICE,
                "This program is distributed under the terms of the GNU "
                "General Public License.\n");
-       lprintf(CTDL_NOTICE, "\n");
-       lprintf(CTDL_DEBUG, "Called as: %s\n", argv[0]);
+       CtdlLogPrintf(CTDL_NOTICE, "\n");
+       CtdlLogPrintf(CTDL_DEBUG, "Called as: %s\n", argv[0]);
+       CtdlLogPrintf(CTDL_INFO, "%s\n", libcitadel_version_string());
 
        /* Load site-specific parameters, and set the ipgm secret */
-       lprintf(CTDL_INFO, "Loading citadel.config\n");
+       CtdlLogPrintf(CTDL_INFO, "Loading citadel.config\n");
        get_config();
        config.c_ipgm_secret = rand();
        put_config();
 
+#ifdef HAVE_RUN_DIR
+       /* on some dists rundir gets purged on startup. so we need to recreate it. */
+
+       if (stat(ctdl_run_dir, &filestats)==-1){
+#ifdef SOLARIS_GETPWUID
+               pwp = getpwuid_r(config.c_ctdluid, &pw, pwbuf, sizeof(pwbuf));
+#else
+               getpwuid_r(config.c_ctdluid, &pw, pwbuf, sizeof(pwbuf), &pwp);
+#endif
+               mkdir(ctdl_run_dir, 0755);
+               chown(ctdl_run_dir, config.c_ctdluid, (pwp==NULL)?-1:pw.pw_gid);
+       }
+                       
+
+#endif
+
        /* Initialize... */
        init_sysdep();
 
@@ -179,14 +251,18 @@ int main(int argc, char **argv)
         */
        master_startup();
 
+       CtdlLogPrintf(CTDL_INFO, "Acquiring control record\n");
+       get_control();
+
        /*
         * Bind the server to a Unix-domain socket.
         */
        CtdlRegisterServiceHook(0,
-                               "citadel.socket",
+                               file_citadel_socket,
                                citproto_begin_session,
                                do_command_loop,
-                               do_async_loop);
+                               do_async_loop,
+                               CitadelServiceUDS);
 
        /*
         * Bind the server to our favorite TCP port (usually 504).
@@ -195,33 +271,59 @@ int main(int argc, char **argv)
                                NULL,
                                citproto_begin_session,
                                do_command_loop,
-                               do_async_loop);
+                               do_async_loop,
+                               CitadelServiceTCP);
 
+                               
+       /*
+        * Run any upgrade entry points
+        */
+       CtdlLogPrintf(CTDL_INFO, "Upgrading modules.\n");
+       upgrade_modules();
+       
+       
        /*
         * Load any server-side extensions available here.
         */
-       lprintf(CTDL_INFO, "Initializing server extensions\n");
+       CtdlLogPrintf(CTDL_INFO, "Initializing server extensions\n");
        size = strlen(ctdl_home_directory) + 9;
-       initialize_server_extensions();
+       
+       initialise_modules(0);
+       
+       
+
+       /*
+        * If we need host auth, start our chkpwd daemon.
+        */
+       if (config.c_auth_mode == 1) {
+               start_chkpwd_daemon();
+       }
 
        /*
         * Now that we've bound the sockets, change to the Citadel user id and its
         * corresponding group ids
         */
        if (drop_root_perms) {
-               if ((pw = getpwuid(CTDLUID)) == NULL)
-                       lprintf(CTDL_CRIT, "WARNING: getpwuid(%ld): %s\n"
+               cdb_chmod_data();       /* make sure we own our data files */
+
+#ifdef SOLARIS_GETPWUID
+               pwp = getpwuid_r(config.c_ctdluid, &pw, pwbuf, sizeof(pwbuf));
+#else
+               getpwuid_r(config.c_ctdluid, &pw, pwbuf, sizeof(pwbuf), &pwp);
+#endif
+               if (pwp == NULL)
+                       CtdlLogPrintf(CTDL_CRIT, "WARNING: getpwuid(%ld): %s\n"
                                   "Group IDs will be incorrect.\n", (long)CTDLUID,
                                strerror(errno));
                else {
-                       initgroups(pw->pw_name, pw->pw_gid);
-                       if (setgid(pw->pw_gid))
-                               lprintf(CTDL_CRIT, "setgid(%ld): %s\n", (long)pw->pw_gid,
+                       initgroups(pw.pw_name, pw.pw_gid);
+                       if (setgid(pw.pw_gid))
+                               CtdlLogPrintf(CTDL_CRIT, "setgid(%ld): %s\n", (long)pw.pw_gid,
                                        strerror(errno));
                }
-               lprintf(CTDL_INFO, "Changing uid to %ld\n", (long)CTDLUID);
+               CtdlLogPrintf(CTDL_INFO, "Changing uid to %ld\n", (long)CTDLUID);
                if (setuid(CTDLUID) != 0) {
-                       lprintf(CTDL_CRIT, "setuid() failed: %s\n", strerror(errno));
+                       CtdlLogPrintf(CTDL_CRIT, "setuid() failed: %s\n", strerror(errno));
                }
 #if defined (HAVE_SYS_PRCTL_H) && defined (PR_SET_DUMPABLE)
                prctl(PR_SET_DUMPABLE, 1);
@@ -231,26 +333,127 @@ int main(int argc, char **argv)
        /* We want to check for idle sessions once per minute */
        CtdlRegisterSessionHook(terminate_idle_sessions, EVT_TIMER);
 
+       go_threading();
+       
+       
+       master_cleanup(exit_signal);
+       return(0);
+}
+
+
+
+void go_threading(void)
+{
+       int i;
+       struct CtdlThreadNode *last_worker;
+       
+       /*
+        * Initialise the thread system
+        */
+       ctdl_thread_internal_init();
        /*
         * Now create a bunch of worker threads.
         */
-       lprintf(CTDL_DEBUG, "Starting %d worker threads\n",
-               config.c_min_workers-1);
-       begin_critical_section(S_WORKER_LIST);
-       for (i=0; i<(config.c_min_workers-1); ++i) {
-               create_worker();
-       }
-       end_critical_section(S_WORKER_LIST);
+       CtdlLogPrintf(CTDL_DEBUG, "Starting %d worker threads\n", config.c_min_workers);
+       begin_critical_section(S_THREAD_LIST);
+       i=0;    /* Always start at least 1 worker thread */
+       do
+       {
+               ctdl_internal_create_thread("Worker Thread", CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER, worker_thread, NULL);
+       } while (++i < config.c_min_workers);
+       end_critical_section(S_THREAD_LIST);
+
+       /* Second call to module init functions now that threading is up */
+       initialise_modules(1);
 
-       /* Create the indexer thread. */
-       create_indexer_thread();
+       /*
+        * This thread is now used for garbage collection of other threads in the thread list
+        */
+       CtdlLogPrintf(CTDL_INFO, "Startup thread %d becoming garbage collector,\n", pthread_self());
 
-       /* This thread is now useless.  It can't be turned into a worker
-        * thread because its stack is too small, but it can't be killed
-        * either because the whole server process would exit.  So we just
-        * join to the first worker thread and exit when it exits.
+       /*
+        * We do a lot of locking and unlocking of the thread list in here.
+        * We do this so that we can repeatedly release time for other threads
+        * that may be waiting on the thread list.
+        * We are a low priority thread so we can afford to do this
         */
-       pthread_join(worker_list->tid, NULL);
-       master_cleanup(0);
-       return(0);
+       
+       while (CtdlThreadGetCount())
+       {
+               if (exit_signal)
+                       CtdlThreadStopAll();
+               check_sched_shutdown();
+               begin_critical_section(S_THREAD_LIST);
+               ctdl_thread_internal_calc_loadavg();
+               end_critical_section(S_THREAD_LIST);
+       
+               /* Reduce the size of the worker thread pool if necessary. */
+               if ((CtdlThreadGetWorkers() > config.c_min_workers) && (CtdlThreadWorkerAvg < 20))
+               {
+                       /* Ask a worker thread to stop as we no longer need it */
+                       begin_critical_section(S_THREAD_LIST);
+                       last_worker = CtdlThreadList;
+                       while (last_worker)
+                       {
+                               pthread_mutex_lock(&last_worker->ThreadMutex);
+                               if (last_worker->flags & CTDLTHREAD_WORKER && last_worker->state > CTDL_THREAD_STOPPING)
+                               {
+                                       pthread_mutex_unlock(&last_worker->ThreadMutex);
+                                       break;
+                               }
+                               pthread_mutex_unlock(&last_worker->ThreadMutex);
+                               last_worker = last_worker->next;
+                       }
+                       end_critical_section(S_THREAD_LIST);
+                       if (last_worker)
+                       {
+#ifdef WITH_THREADLOG
+                               CtdlLogPrintf(CTDL_DEBUG, "Thread system, stopping excess worker thread \"%s\" (%ld).\n",
+                                       last_worker->name,
+                                       last_worker->tid
+                                       );
+#endif
+                               CtdlThreadStop(last_worker);
+                       }
+               }
+       
+               /*
+                * If all our workers are working hard, start some more to help out
+                * with things
+                */
+               /* FIXME: come up with a better way to dynamically alter the number of threads
+                * based on the system load
+                */
+//             if ((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkers() < num_sessions))
+               // && (CtdlThreadLoadAvg < 90) )
+               if ((CtdlThreadGetWorkers() < config.c_max_workers) && (CtdlThreadGetWorkerAvg() > 60) && (CtdlThreadGetLoadAvg() < 90) && (CT->state > CTDL_THREAD_STOP_REQ))
+               {
+                       for (i=0; i<5 ; i++)
+//                     for (i=0; i< (num_sessions - CtdlThreadGetWorkers()) ; i++)
+//                     for (i=0; i< (10 - (55 - CtdlThreadWorkerAvg) / CtdlThreadWorkerAvg / CtdlThreadGetWorkers()) ; i++)
+                       {
+//                             begin_critical_section(S_THREAD_LIST);
+                               CtdlThreadCreate("Worker Thread",
+                                       CTDLTHREAD_BIGSTACK + CTDLTHREAD_WORKER,
+                                       worker_thread,
+                                       NULL
+                                       );
+//                             end_critical_section(S_THREAD_LIST);
+                       }
+               }
+               
+               CtdlThreadGC();
+               
+               if (CtdlThreadGetCount() <= 1) // Shutting down clean up the garbage collector
+               {
+                       CtdlThreadGC();
+               }
+               
+               if (CtdlThreadGetCount())
+                       CtdlThreadSleep(1);
+       }
+       /*
+        * If the above loop exits we must be shutting down since we obviously have no threads
+        */
+       ctdl_thread_internal_cleanup();
 }