Log a warning message if housekeeping has not run in more than 5 minutes
[citadel.git] / citadel / housekeeping.c
index ac6d8882e2cb50cc0cc865f570d49b3f6291b2dc..dae17c3eab220770de5ad614244704063adb3f7d 100644 (file)
 /*
- * This file contains housekeeping tasks which periodically
- * need to be executed.  It keeps a nice little queue...
+ * This file contains miscellaneous housekeeping tasks.
  *
- * $Id$
+ * Copyright (c) 1987-2017 by the citadel.org team
+ *
+ * This program is open source software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 3.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  */
 
-#include "sysdep.h"
-#include <stdlib.h>
-#include <unistd.h>
 #include <stdio.h>
-#include <fcntl.h>
-#include <time.h>
-#include <ctype.h>
-#include <string.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#ifdef HAVE_SYS_SELECT_H
-#include <sys/select.h>
-#endif
-#ifdef HAVE_PTHREAD_H
-#include <pthread.h>
-#endif
-#include "tools.h"
-#include "citadel.h"
-#include "server.h"
-#include "citserver.h"
-#include "config.h"
-#include "housekeeping.h"
-#include "sysdep_decls.h"
-#include "room_ops.h"
+#include <libcitadel.h>
 
+#include "ctdl_module.h"
+#include "serv_extensions.h"
+#include "room_ops.h"
+#include "internet_addressing.h"
+#include "journaling.h"
 
-int housepipe[2];      /* This is the queue for housekeeping tasks */
+void check_sched_shutdown(void) {
+       if ((ScheduledShutdown == 1) && (ContextList == NULL)) {
+               syslog(LOG_NOTICE, "housekeeping: scheduled shutdown initiating");
+               server_shutting_down = 1;
+       }
+}
 
 
 /*
- * Terminate idle sessions.  This function pounds through the session table
- * comparing the current time to each session's time-of-last-command.  If an
- * idle session is found it is terminated, then the search restarts at the
- * beginning because the pointer to our place in the list becomes invalid.
+ * Check (and fix) floor reference counts.  This doesn't need to be done
+ * very often, since the counts should remain correct during normal operation.
  */
-void terminate_idle_sessions(void) {
-       struct CitContext *ccptr;
-       time_t now;
-       int session_to_kill;
-
-       do {
-               now = time(NULL);
-               session_to_kill = 0;
-               begin_critical_section(S_SESSION_TABLE);
-               for (ccptr = ContextList; ccptr != NULL; ccptr = ccptr->next) {
-                       if (  (ccptr!=CC)
-                       && (config.c_sleeping > 0)
-                       && (now - (ccptr->lastcmd) > config.c_sleeping) ) {
-                               session_to_kill = ccptr->cs_pid;
-                               }
-                       }
-               end_critical_section(S_SESSION_TABLE);
-               if (session_to_kill > 0) {
-                       lprintf(3, "Session %d timed out.  Terminating it...\n",
-                               session_to_kill);
-                       kill_session(session_to_kill);
-                       }
-               } while(session_to_kill > 0);
-       }
+void check_ref_counts_backend(struct ctdlroom *qrbuf, void *data) {
 
+       int *new_refcounts;
 
+       new_refcounts = (int *) data;
 
-void check_sched_shutdown(void) {
-       if ((ScheduledShutdown == 1) && (ContextList == NULL)) {
-               lprintf(3, "Scheduled shutdown initiating.\n");
-               master_cleanup();
-       }
+       ++new_refcounts[(int)qrbuf->QRfloor];
 }
 
 
+void check_ref_counts(void) {
+       struct floor flbuf;
+       int a;
+
+       int new_refcounts[MAXFLOORS];
 
-/*
- * This is the main loop for the housekeeping thread.  It remains active
- * during the entire run of the server.
- */
-void housekeeping_loop(void) {
-       long flags;
-        struct timeval tv;
-        fd_set readfds;
-        int did_something;
-       char house_cmd[256];    /* Housekeep cmds are always 256 bytes long */
-
-       if (pipe(housepipe) != 0) {
-               lprintf(1, "FATAL ERROR: can't create housekeeping pipe: %s\n",
-                       strerror(errno));
-               exit(0);
+       syslog(LOG_DEBUG, "housekeeping: checking floor reference counts");
+       for (a=0; a<MAXFLOORS; ++a) {
+               new_refcounts[a] = 0;
        }
 
-       flags = (long) fcntl(housepipe[1], F_GETFL);
-       flags |= O_NONBLOCK;
-       fcntl(housepipe[1], F_SETFL, flags);
-
-       while(1) {
-               do {
-                       did_something = 0;
-                       tv.tv_sec = HOUSEKEEPING_WAKEUP;
-                       tv.tv_usec = 0;
-                       FD_ZERO(&readfds);
-                       FD_SET(housepipe[0], &readfds);
-                       select(housepipe[0] + 1, &readfds, 0L, 0L, &tv);
-                       if (FD_ISSET(housepipe[0], &readfds)) {
-                               did_something = 1;
-                       }
-
-                       if (did_something) {
-                               read(housepipe[0], house_cmd, 256);
-                       }
-                       else {
-                               memset(house_cmd, 0, 256);
-                               strcpy(house_cmd, "MINUTE");
-                       }
-
-
-                       /* Do whatever this cmd requires */
-                       if (!strcmp(house_cmd, "MINUTE")) {
-                               terminate_idle_sessions();
-                       }
-
-                       else if (!strcmp(house_cmd, "SCHED_SHUTDOWN")) {
-                               check_sched_shutdown();
-                       }
-
-                       else {
-                               lprintf(7, "Unknown housekeeping command\n");
-                       }
-
-               } while (did_something);
+       cdb_begin_transaction();
+       CtdlForEachRoom(check_ref_counts_backend, (void *)new_refcounts );
+       cdb_end_transaction();
+
+       for (a=0; a<MAXFLOORS; ++a) {
+               lgetfloor(&flbuf, a);
+               flbuf.f_ref_count = new_refcounts[a];
+               if (new_refcounts[a] > 0) {
+                       flbuf.f_flags = flbuf.f_flags | QR_INUSE;
+               }
+               else {
+                       flbuf.f_flags = flbuf.f_flags & ~QR_INUSE;
+               }
+               lputfloor(&flbuf, a);
+               syslog(LOG_DEBUG, "housekeeping: floor %d has %d rooms", a, new_refcounts[a]);
        }
-}
+}      
 
 
+/*
+ * This is the housekeeping loop.  Worker threads come through here after
+ * processing client requests but before jumping back into the pool.  We
+ * only allow housekeeping to execute once per minute, and we only allow one
+ * instance to run at a time.
+ */
+static int housekeeping_in_progress = 0;
+static time_t last_timer = 0L;
+void do_housekeeping(void) {
+       int do_housekeeping_now = 0;
+       int do_perminute_housekeeping_now = 0;
+       time_t now;
 
+       /*
+        * We do it this way instead of wrapping the whole loop in an
+        * S_HOUSEKEEPING critical section because it eliminates the need to
+        * potentially have multiple concurrent mutexes in progress.
+        */
+       begin_critical_section(S_HOUSEKEEPING);
+       if (housekeeping_in_progress == 0) {
+               do_housekeeping_now = 1;
+               housekeeping_in_progress = 1;
+       }
+       end_critical_section(S_HOUSEKEEPING);
+
+       now = time(NULL);
+       if (do_housekeeping_now == 0) {
+               if ( (now - last_timer) > (time_t)300 ) {
+                       syslog(LOG_WARNING,
+                               "housekeeping: WARNING: housekeeping loop has not run for %ld minutes.  Is something stuck?",
+                               ((now - last_timer) / 60)
+                       );
+               }
+               return;
+       }
 
+       /*
+        * Ok, at this point we've made the decision to run the housekeeping
+        * loop.  Everything below this point is real work.
+        */
 
+       if ( (now - last_timer) > (time_t)60 ) {
+               do_perminute_housekeeping_now = 1;
+               last_timer = time(NULL);
+       }
+
+       /* First, do the "as often as needed" stuff... */
+       JournalRunQueue();
+       PerformSessionHooks(EVT_HOUSE);
 
-void enter_housekeeping_cmd(char *cmd) {
-       char cmdbuf[256];
+       /* Then, do the "once per minute" stuff... */
+       if (do_perminute_housekeeping_now) {
+               cdb_check_handles();                    /* suggested by Justin Case */
+               PerformSessionHooks(EVT_TIMER);         /* Run any timer hooks */
+       }
 
-       safestrncpy(cmdbuf, cmd, 256);
+       /*
+        * All done.
+        */
        begin_critical_section(S_HOUSEKEEPING);
-       write(housepipe[1], cmdbuf, 256);
+       housekeeping_in_progress = 0;
        end_critical_section(S_HOUSEKEEPING);
 }
-       
 
 
-/*
- * Check (and fix) floor reference counts.  This doesn't need to be done
- * very often, since the counts should remain correct during normal operation.
- * NOTE: this function pair should ONLY be called during startup.  It is NOT
- * thread safe.
- */
-void check_ref_counts_backend(struct quickroom *qrbuf) {
-       struct floor flbuf;
+void CtdlDisableHouseKeeping(void)
+{
+       int ActiveBackgroundJobs;
+       int do_housekeeping_now = 0;
+       struct CitContext *nptr;
+       int nContexts, i;
 
-       getfloor(&flbuf, qrbuf->QRfloor);
-       ++flbuf.f_ref_count;
-       flbuf.f_flags = flbuf.f_flags | QR_INUSE;
-       putfloor(&flbuf, qrbuf->QRfloor);
+retry_block_housekeeping:
+       syslog(LOG_INFO, "housekeeping: trying to disable services");
+       begin_critical_section(S_HOUSEKEEPING);
+       if (housekeeping_in_progress == 0) {
+               do_housekeeping_now = 1;
+               housekeeping_in_progress = 1;
        }
-
-void check_ref_counts(void) {
-       struct floor flbuf;
-       int a;
-
-       for (a=0; a<MAXFLOORS; ++a) {
-               getfloor(&flbuf, a);
-               flbuf.f_ref_count = 0;
-               flbuf.f_flags = flbuf.f_flags & ~QR_INUSE;
-               putfloor(&flbuf, a);
+       end_critical_section(S_HOUSEKEEPING);
+       if (do_housekeeping_now == 0) {
+               usleep(1000000);
+               goto retry_block_housekeeping;
+       }
+       
+       syslog(LOG_INFO, "housekeeping: checking for running server jobs");
+
+retry_wait_for_contexts:
+       /* So that we don't keep the context list locked for a long time
+        * we create a copy of it first
+        */
+       ActiveBackgroundJobs = 0;
+       nptr = CtdlGetContextArray(&nContexts) ;
+       if (nptr)
+       {
+               for (i=0; i<nContexts; i++) 
+               {
+                       if ((nptr[i].state != CON_SYS) || (nptr[i].lastcmd == 0))
+                               continue;
+                       ActiveBackgroundJobs ++;
+                       syslog(LOG_INFO, "jousekeeping: job CC[%d] active; use TERM if you don't want to wait for it", nptr[i].cs_pid);
+               
                }
+       
+               free(nptr);
+
+       }
+       if (ActiveBackgroundJobs != 0) {
+               syslog(LOG_INFO, "housekeeping: found %d running jobs, need to wait", ActiveBackgroundJobs);
+               usleep(5000000);
+               goto retry_wait_for_contexts;
+       }
+       syslog(LOG_INFO, "housekeeping: disabled now.");
+}
 
-       ForEachRoom(check_ref_counts_backend);
-       }       
 
+void CtdlEnableHouseKeeping(void)
+{
+       begin_critical_section(S_HOUSEKEEPING);
+       housekeeping_in_progress = 0;
+       end_critical_section(S_HOUSEKEEPING);
+}