Log a warning message if housekeeping has not run in more than 5 minutes
[citadel.git] / citadel / housekeeping.c
index 09dbad4eb143aca6d3b3fb76b22dcb9706d6823c..dae17c3eab220770de5ad614244704063adb3f7d 100644 (file)
 /*
- * $Id$
- *
  * This file contains miscellaneous housekeeping tasks.
  *
+ * Copyright (c) 1987-2017 by the citadel.org team
+ *
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 3.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  */
 
-#ifdef DLL_EXPORT
-#define IN_LIBCIT
-#endif
-
-#include "sysdep.h"
-#include <stdlib.h>
-#include <unistd.h>
 #include <stdio.h>
-#include <fcntl.h>
-
-#if TIME_WITH_SYS_TIME
-# include <sys/time.h>
-# include <time.h>
-#else
-# if HAVE_SYS_TIME_H
-#  include <sys/time.h>
-# else
-#  include <time.h>
-# endif
-#endif
-
-#include <ctype.h>
-#include <string.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/types.h>
-#ifdef HAVE_SYS_SELECT_H
-#include <sys/select.h>
-#endif
-#include "tools.h"
-#include "citadel.h"
-#include "server.h"
+#include <libcitadel.h>
+
+#include "ctdl_module.h"
 #include "serv_extensions.h"
-#include "citserver.h"
-#include "config.h"
-#include "housekeeping.h"
-#include "sysdep_decls.h"
 #include "room_ops.h"
-#include "database.h"
-
-
-
-
-/*
- * Terminate idle sessions.  This function pounds through the session table
- * comparing the current time to each session's time-of-last-command.  If an
- * idle session is found it is terminated, then the search restarts at the
- * beginning because the pointer to our place in the list becomes invalid.
- */
-void terminate_idle_sessions(void) {
-       struct CitContext *ccptr;
-       time_t now;
-       int session_to_kill;
-       int killed = 0;
-
-       now = time(NULL);
-       session_to_kill = 0;
-       begin_critical_section(S_SESSION_TABLE);
-       for (ccptr = ContextList; ccptr != NULL; ccptr = ccptr->next) {
-               if (  (ccptr!=CC)
-               && (config.c_sleeping > 0)
-               && (now - (ccptr->lastcmd) > config.c_sleeping) ) {
-                       ccptr->kill_me = 1;
-                       ++killed;
-               }
-       }
-       end_critical_section(S_SESSION_TABLE);
-       lprintf(9, "Terminated %d idle sessions\n", killed);
-}
-
-
+#include "internet_addressing.h"
+#include "journaling.h"
 
 void check_sched_shutdown(void) {
        if ((ScheduledShutdown == 1) && (ContextList == NULL)) {
-               lprintf(3, "Scheduled shutdown initiating.\n");
-               time_to_die = 1;
+               syslog(LOG_NOTICE, "housekeeping: scheduled shutdown initiating");
+               server_shutting_down = 1;
        }
 }
 
 
-
 /*
  * Check (and fix) floor reference counts.  This doesn't need to be done
  * very often, since the counts should remain correct during normal operation.
- * NOTE: this function pair should ONLY be called during startup.  It is NOT
- * thread safe.
  */
-void check_ref_counts_backend(struct room *qrbuf, void *data) {
-       struct floor flbuf;
+void check_ref_counts_backend(struct ctdlroom *qrbuf, void *data) {
 
-       getfloor(&flbuf, qrbuf->QRfloor);
-       ++flbuf.f_ref_count;
-       flbuf.f_flags = flbuf.f_flags | QR_INUSE;
-       putfloor(&flbuf, qrbuf->QRfloor);
+       int *new_refcounts;
+
+       new_refcounts = (int *) data;
+
+       ++new_refcounts[(int)qrbuf->QRfloor];
 }
 
+
 void check_ref_counts(void) {
        struct floor flbuf;
        int a;
 
-       lprintf(7, "Checking floor reference counts\n");
+       int new_refcounts[MAXFLOORS];
+
+       syslog(LOG_DEBUG, "housekeeping: checking floor reference counts");
        for (a=0; a<MAXFLOORS; ++a) {
-               getfloor(&flbuf, a);
-               flbuf.f_ref_count = 0;
-               flbuf.f_flags = flbuf.f_flags & ~QR_INUSE;
-               putfloor(&flbuf, a);
+               new_refcounts[a] = 0;
        }
 
        cdb_begin_transaction();
-       ForEachRoom(check_ref_counts_backend, NULL);
+       CtdlForEachRoom(check_ref_counts_backend, (void *)new_refcounts );
        cdb_end_transaction();
+
+       for (a=0; a<MAXFLOORS; ++a) {
+               lgetfloor(&flbuf, a);
+               flbuf.f_ref_count = new_refcounts[a];
+               if (new_refcounts[a] > 0) {
+                       flbuf.f_flags = flbuf.f_flags | QR_INUSE;
+               }
+               else {
+                       flbuf.f_flags = flbuf.f_flags & ~QR_INUSE;
+               }
+               lputfloor(&flbuf, a);
+               syslog(LOG_DEBUG, "housekeeping: floor %d has %d rooms", a, new_refcounts[a]);
+       }
 }      
 
+
 /*
  * This is the housekeeping loop.  Worker threads come through here after
  * processing client requests but before jumping back into the pool.  We
  * only allow housekeeping to execute once per minute, and we only allow one
  * instance to run at a time.
  */
+static int housekeeping_in_progress = 0;
+static time_t last_timer = 0L;
 void do_housekeeping(void) {
-       static int housekeeping_in_progress = 0;
-       static time_t last_timer = 0L;
        int do_housekeeping_now = 0;
+       int do_perminute_housekeeping_now = 0;
        time_t now;
 
        /*
@@ -136,17 +92,20 @@ void do_housekeeping(void) {
         * potentially have multiple concurrent mutexes in progress.
         */
        begin_critical_section(S_HOUSEKEEPING);
-       now = time(NULL);
-       if ( (now - last_timer) > (time_t)60 ) {
-               if (housekeeping_in_progress == 0) {
-                       do_housekeeping_now = 1;
-                       housekeeping_in_progress = 1;
-                       last_timer = time(NULL);
-               }
+       if (housekeeping_in_progress == 0) {
+               do_housekeeping_now = 1;
+               housekeeping_in_progress = 1;
        }
        end_critical_section(S_HOUSEKEEPING);
 
+       now = time(NULL);
        if (do_housekeeping_now == 0) {
+               if ( (now - last_timer) > (time_t)300 ) {
+                       syslog(LOG_WARNING,
+                               "housekeeping: WARNING: housekeeping loop has not run for %ld minutes.  Is something stuck?",
+                               ((now - last_timer) / 60)
+                       );
+               }
                return;
        }
 
@@ -155,11 +114,84 @@ void do_housekeeping(void) {
         * loop.  Everything below this point is real work.
         */
 
-       cdb_check_handles();                    /* suggested by Justin Case */
-       PerformSessionHooks(EVT_TIMER);         /* Run any timer hooks */
+       if ( (now - last_timer) > (time_t)60 ) {
+               do_perminute_housekeeping_now = 1;
+               last_timer = time(NULL);
+       }
+
+       /* First, do the "as often as needed" stuff... */
+       JournalRunQueue();
+       PerformSessionHooks(EVT_HOUSE);
+
+       /* Then, do the "once per minute" stuff... */
+       if (do_perminute_housekeeping_now) {
+               cdb_check_handles();                    /* suggested by Justin Case */
+               PerformSessionHooks(EVT_TIMER);         /* Run any timer hooks */
+       }
 
        /*
         * All done.
         */
+       begin_critical_section(S_HOUSEKEEPING);
+       housekeeping_in_progress = 0;
+       end_critical_section(S_HOUSEKEEPING);
+}
+
+
+void CtdlDisableHouseKeeping(void)
+{
+       int ActiveBackgroundJobs;
+       int do_housekeeping_now = 0;
+       struct CitContext *nptr;
+       int nContexts, i;
+
+retry_block_housekeeping:
+       syslog(LOG_INFO, "housekeeping: trying to disable services");
+       begin_critical_section(S_HOUSEKEEPING);
+       if (housekeeping_in_progress == 0) {
+               do_housekeeping_now = 1;
+               housekeeping_in_progress = 1;
+       }
+       end_critical_section(S_HOUSEKEEPING);
+       if (do_housekeeping_now == 0) {
+               usleep(1000000);
+               goto retry_block_housekeeping;
+       }
+       
+       syslog(LOG_INFO, "housekeeping: checking for running server jobs");
+
+retry_wait_for_contexts:
+       /* So that we don't keep the context list locked for a long time
+        * we create a copy of it first
+        */
+       ActiveBackgroundJobs = 0;
+       nptr = CtdlGetContextArray(&nContexts) ;
+       if (nptr)
+       {
+               for (i=0; i<nContexts; i++) 
+               {
+                       if ((nptr[i].state != CON_SYS) || (nptr[i].lastcmd == 0))
+                               continue;
+                       ActiveBackgroundJobs ++;
+                       syslog(LOG_INFO, "jousekeeping: job CC[%d] active; use TERM if you don't want to wait for it", nptr[i].cs_pid);
+               
+               }
+       
+               free(nptr);
+
+       }
+       if (ActiveBackgroundJobs != 0) {
+               syslog(LOG_INFO, "housekeeping: found %d running jobs, need to wait", ActiveBackgroundJobs);
+               usleep(5000000);
+               goto retry_wait_for_contexts;
+       }
+       syslog(LOG_INFO, "housekeeping: disabled now.");
+}
+
+
+void CtdlEnableHouseKeeping(void)
+{
+       begin_critical_section(S_HOUSEKEEPING);
        housekeeping_in_progress = 0;
+       end_critical_section(S_HOUSEKEEPING);
 }