Log a warning message if housekeeping has not run in more than 5 minutes
[citadel.git] / citadel / housekeeping.c
index 108410af29a79a7924526d2f214ba5dea868ffea..dae17c3eab220770de5ad614244704063adb3f7d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file contains miscellaneous housekeeping tasks.
  *
- * Copyright (c) 1987-2011 by the citadel.org team
+ * Copyright (c) 1987-2017 by the citadel.org team
  *
  * This program is open source software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, version 3.
  * GNU General Public License for more details.
  */
 
-#include "sysdep.h"
-#include <stdlib.h>
-#include <unistd.h>
 #include <stdio.h>
-#include <fcntl.h>
-
-#if TIME_WITH_SYS_TIME
-# include <sys/time.h>
-# include <time.h>
-#else
-# if HAVE_SYS_TIME_H
-#  include <sys/time.h>
-# else
-#  include <time.h>
-# endif
-#endif
-
-#include <ctype.h>
-#include <string.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/types.h>
-#ifdef HAVE_SYS_SELECT_H
-#include <sys/select.h>
-#endif
-#include <syslog.h>
 #include <libcitadel.h>
-#include "citadel.h"
-#include "server.h"
+
+#include "ctdl_module.h"
 #include "serv_extensions.h"
-#include "citserver.h"
-#include "config.h"
-#include "housekeeping.h"
-#include "sysdep_decls.h"
 #include "room_ops.h"
-#include "database.h"
-#include "msgbase.h"
+#include "internet_addressing.h"
 #include "journaling.h"
 
-#include "ctdl_module.h"
-#include "threads.h"
-
 void check_sched_shutdown(void) {
        if ((ScheduledShutdown == 1) && (ContextList == NULL)) {
-               syslog(LOG_NOTICE, "Scheduled shutdown initiating.\n");
+               syslog(LOG_NOTICE, "housekeeping: scheduled shutdown initiating");
                server_shutting_down = 1;
        }
 }
 
 
-
 /*
  * Check (and fix) floor reference counts.  This doesn't need to be done
  * very often, since the counts should remain correct during normal operation.
@@ -76,13 +42,14 @@ void check_ref_counts_backend(struct ctdlroom *qrbuf, void *data) {
        ++new_refcounts[(int)qrbuf->QRfloor];
 }
 
+
 void check_ref_counts(void) {
        struct floor flbuf;
        int a;
 
        int new_refcounts[MAXFLOORS];
 
-       syslog(LOG_DEBUG, "Checking floor reference counts\n");
+       syslog(LOG_DEBUG, "housekeeping: checking floor reference counts");
        for (a=0; a<MAXFLOORS; ++a) {
                new_refcounts[a] = 0;
        }
@@ -101,10 +68,11 @@ void check_ref_counts(void) {
                        flbuf.f_flags = flbuf.f_flags & ~QR_INUSE;
                }
                lputfloor(&flbuf, a);
-               syslog(LOG_DEBUG, "Floor %d: %d rooms\n", a, new_refcounts[a]);
+               syslog(LOG_DEBUG, "housekeeping: floor %d has %d rooms", a, new_refcounts[a]);
        }
 }      
 
+
 /*
  * This is the housekeeping loop.  Worker threads come through here after
  * processing client requests but before jumping back into the pool.  We
@@ -130,7 +98,14 @@ void do_housekeeping(void) {
        }
        end_critical_section(S_HOUSEKEEPING);
 
+       now = time(NULL);
        if (do_housekeeping_now == 0) {
+               if ( (now - last_timer) > (time_t)300 ) {
+                       syslog(LOG_WARNING,
+                               "housekeeping: WARNING: housekeeping loop has not run for %ld minutes.  Is something stuck?",
+                               ((now - last_timer) / 60)
+                       );
+               }
                return;
        }
 
@@ -139,7 +114,6 @@ void do_housekeeping(void) {
         * loop.  Everything below this point is real work.
         */
 
-       now = time(NULL);
        if ( (now - last_timer) > (time_t)60 ) {
                do_perminute_housekeeping_now = 1;
                last_timer = time(NULL);
@@ -162,3 +136,62 @@ void do_housekeeping(void) {
        housekeeping_in_progress = 0;
        end_critical_section(S_HOUSEKEEPING);
 }
+
+
+void CtdlDisableHouseKeeping(void)
+{
+       int ActiveBackgroundJobs;
+       int do_housekeeping_now = 0;
+       struct CitContext *nptr;
+       int nContexts, i;
+
+retry_block_housekeeping:
+       syslog(LOG_INFO, "housekeeping: trying to disable services");
+       begin_critical_section(S_HOUSEKEEPING);
+       if (housekeeping_in_progress == 0) {
+               do_housekeeping_now = 1;
+               housekeeping_in_progress = 1;
+       }
+       end_critical_section(S_HOUSEKEEPING);
+       if (do_housekeeping_now == 0) {
+               usleep(1000000);
+               goto retry_block_housekeeping;
+       }
+       
+       syslog(LOG_INFO, "housekeeping: checking for running server jobs");
+
+retry_wait_for_contexts:
+       /* So that we don't keep the context list locked for a long time
+        * we create a copy of it first
+        */
+       ActiveBackgroundJobs = 0;
+       nptr = CtdlGetContextArray(&nContexts) ;
+       if (nptr)
+       {
+               for (i=0; i<nContexts; i++) 
+               {
+                       if ((nptr[i].state != CON_SYS) || (nptr[i].lastcmd == 0))
+                               continue;
+                       ActiveBackgroundJobs ++;
+                       syslog(LOG_INFO, "jousekeeping: job CC[%d] active; use TERM if you don't want to wait for it", nptr[i].cs_pid);
+               
+               }
+       
+               free(nptr);
+
+       }
+       if (ActiveBackgroundJobs != 0) {
+               syslog(LOG_INFO, "housekeeping: found %d running jobs, need to wait", ActiveBackgroundJobs);
+               usleep(5000000);
+               goto retry_wait_for_contexts;
+       }
+       syslog(LOG_INFO, "housekeeping: disabled now.");
+}
+
+
+void CtdlEnableHouseKeeping(void)
+{
+       begin_critical_section(S_HOUSEKEEPING);
+       housekeeping_in_progress = 0;
+       end_critical_section(S_HOUSEKEEPING);
+}