[rrd-developers] [PATCH] fast shutdown mode when jounaling
kevin brintnall
kbrint at rufus.net
Sat Sep 27 00:28:32 CEST 2008
This patch introduces "fast shutdown" mode and two new signals.
---
diff --git a/doc/rrdcached.pod b/doc/rrdcached.pod
index d8d88c5..c75b9e0 100644
--- a/doc/rrdcached.pod
+++ b/doc/rrdcached.pod
@@ -6,7 +6,7 @@ rrdcached - Data caching daemon for rrdtool
=head1 SYNOPSIS
-B<rrdcached> [B<-l> I<address>] [B<-w> I<timeout>] [B<-z> I<delay>] [B<-f> I<timeout>] [B<-j> I<dir>]
+B<rrdcached> [B<-l> I<address>] [B<-w> I<timeout>] [B<-z> I<delay>] [B<-f> I<timeout>] [B<-j> I<dir>] [-F]
=head1 DESCRIPTION
@@ -87,7 +87,20 @@ found, all updates therein will be read into memory before the daemon
starts accepting new connections.
The journal will be rotated with the same frequency as the flush timer
-given by B<-f>. On clean shutdown, the journal files are removed.
+given by B<-f>.
+
+When journaling is enabled, the daemon will use a fast shutdown procedure.
+Rather than flushing all files to disk, it will make sure the journal is
+properly written and exit immediately. Although the RRD data files are
+not fully up-to-date, no information is lost; all pending updates will be
+replayed from the journal next time the daemon starts up.
+
+To disable fast shutdown, use the B<-F> option.
+
+=item B<-F>
+
+ALWAYS flush all updates to the RRD data files when the daemon is shut
+down, regardless of journal setting.
=item B<-b> I<dir>
@@ -421,7 +434,20 @@ Number of times the journal has been rotated since startup.
=item SIGINT and SIGTERM
-The daemon exits normally on receipt of either of these signals.
+The daemon exits normally on receipt of either of these signals. Pending
+updates are handled in accordance with the B<-j> and B<-F> options.
+
+=item SIGUSR1
+
+The daemon exits AFTER flushing all updates out to disk. This may take a
+while.
+
+=item SIGUSR2
+
+The daemon exits immediately, without flushing updates out to disk.
+Pending updates will be replayed from the journal when the daemon starts
+up again. B<WARNING: if journaling (-j) is NOT enabled, any pending
+updates WILL BE LOST>.
=back
diff --git a/src/rrd_daemon.c b/src/rrd_daemon.c
index 79425d2..b091734 100644
--- a/src/rrd_daemon.c
+++ b/src/rrd_daemon.c
@@ -168,6 +168,7 @@ static pthread_cond_t cache_cond = PTHREAD_COND_INITIALIZER;
static int config_write_interval = 300;
static int config_write_jitter = 0;
static int config_flush_interval = 3600;
+static int config_flush_at_shutdown = 0;
static char *config_pid_file = NULL;
static char *config_base_dir = NULL;
@@ -212,6 +213,18 @@ static void sig_term_handler (int s __attribute__((unused))) /* {{{ */
sig_common("TERM");
} /* }}} void sig_term_handler */
+static void sig_usr1_handler (int s __attribute__((unused))) /* {{{ */
+{
+ config_flush_at_shutdown = 1;
+ sig_common("USR1");
+} /* }}} void sig_usr1_handler */
+
+static void sig_usr2_handler (int s __attribute__((unused))) /* {{{ */
+{
+ config_flush_at_shutdown = 0;
+ sig_common("USR2");
+} /* }}} void sig_usr2_handler */
+
static void install_signal_handlers(void) /* {{{ */
{
/* These structures are static, because `sigaction' behaves weird if the are
@@ -219,6 +232,8 @@ static void install_signal_handlers(void) /* {{{ */
static struct sigaction sa_int;
static struct sigaction sa_term;
static struct sigaction sa_pipe;
+ static struct sigaction sa_usr1;
+ static struct sigaction sa_usr2;
/* Install signal handlers */
memset (&sa_int, 0, sizeof (sa_int));
@@ -233,6 +248,14 @@ static void install_signal_handlers(void) /* {{{ */
sa_pipe.sa_handler = SIG_IGN;
sigaction (SIGPIPE, &sa_pipe, NULL);
+ memset (&sa_pipe, 0, sizeof (sa_usr1));
+ sa_usr1.sa_handler = sig_usr1_handler;
+ sigaction (SIGUSR1, &sa_usr1, NULL);
+
+ memset (&sa_usr2, 0, sizeof (sa_usr2));
+ sa_usr2.sa_handler = sig_usr2_handler;
+ sigaction (SIGUSR2, &sa_usr2, NULL);
+
} /* }}} void install_signal_handlers */
static int open_pidfile(void) /* {{{ */
@@ -571,6 +594,7 @@ static void *queue_thread_main (void *args __attribute__((unused))) /* {{{ */
{
struct timeval now;
struct timespec next_flush;
+ int final_flush = 0; /* make sure we only flush once on shutdown */
gettimeofday (&now, NULL);
next_flush.tv_sec = now.tv_sec + config_flush_interval;
@@ -608,8 +632,9 @@ static void *queue_thread_main (void *args __attribute__((unused))) /* {{{ */
}
/* Now, check if there's something to store away. If not, wait until
- * something comes in or it's time to do the cache flush. */
- if (cache_queue_head == NULL)
+ * something comes in or it's time to do the cache flush. if we are
+ * shutting down, do not wait around. */
+ if (cache_queue_head == NULL && !do_shutdown)
{
status = pthread_cond_timedwait (&cache_cond, &cache_lock, &next_flush);
if ((status != 0) && (status != ETIMEDOUT))
@@ -619,9 +644,14 @@ static void *queue_thread_main (void *args __attribute__((unused))) /* {{{ */
}
}
- /* We're about to shut down, so lets flush the entire tree. */
- if ((do_shutdown != 0) && (cache_queue_head == NULL))
- flush_old_values (/* max age = */ -1);
+ /* We're about to shut down */
+ if (do_shutdown != 0 && !final_flush++)
+ {
+ if (config_flush_at_shutdown)
+ flush_old_values (-1); /* flush everything */
+ else
+ break;
+ }
/* Check if a value has arrived. This may be NULL if we timed out or there
* was an interrupt such as a signal. */
@@ -686,14 +716,23 @@ static void *queue_thread_main (void *args __attribute__((unused))) /* {{{ */
pthread_mutex_lock (&cache_lock);
- /* We're about to shut down, so lets flush the entire tree. */
- if ((do_shutdown != 0) && (cache_queue_head == NULL))
- flush_old_values (/* max age = */ -1);
+ /* We're about to shut down */
+ if (do_shutdown != 0 && !final_flush++)
+ {
+ if (config_flush_at_shutdown)
+ flush_old_values (-1); /* flush everything */
+ else
+ break;
+ }
} /* while ((do_shutdown == 0) || (cache_queue_head != NULL)) */
pthread_mutex_unlock (&cache_lock);
- assert(cache_queue_head == NULL);
- RRDD_LOG(LOG_INFO, "clean shutdown; all RRDs flushed");
+ if (config_flush_at_shutdown)
+ {
+ assert(cache_queue_head == NULL);
+ RRDD_LOG(LOG_INFO, "clean shutdown; all RRDs flushed");
+ }
+
journal_done();
return (NULL);
@@ -1346,10 +1385,16 @@ static void journal_rotate(void) /* {{{ */
fclose(old_fh);
if (journal_fh == NULL)
+ {
RRDD_LOG(LOG_CRIT,
"JOURNALING DISABLED: Cannot open journal file '%s' : (%s)",
journal_cur, rrd_strerror(errno));
+ RRDD_LOG(LOG_ERR,
+ "JOURNALING DISABLED: All values will be flushed at shutdown");
+ config_flush_at_shutdown = 1;
+ }
+
} /* }}} static void journal_rotate */
static void journal_done(void) /* {{{ */
@@ -1364,10 +1409,18 @@ static void journal_done(void) /* {{{ */
journal_fh = NULL;
}
- RRDD_LOG(LOG_INFO, "removing journals");
+ if (config_flush_at_shutdown)
+ {
+ RRDD_LOG(LOG_INFO, "removing journals");
+ unlink(journal_old);
+ unlink(journal_cur);
+ }
+ else
+ {
+ RRDD_LOG(LOG_INFO, "expedited shutdown; "
+ "journals will be used at next startup");
+ }
- unlink(journal_old);
- unlink(journal_cur);
pthread_mutex_unlock(&journal_lock);
} /* }}} static void journal_done */
@@ -1493,7 +1546,9 @@ static void *connection_thread_main (void *args) /* {{{ */
pollfd.revents = 0;
status = poll (&pollfd, 1, /* timeout = */ 500);
- if (status == 0) /* timeout */
+ if (do_shutdown)
+ break;
+ else if (status == 0) /* timeout */
continue;
else if (status < 0) /* error */
{
@@ -1800,11 +1855,11 @@ static void *listen_thread_main (void *args __attribute__((unused))) /* {{{ */
}
status = poll (pollfds, pollfds_num, /* timeout = */ 1000);
- if (status == 0)
- {
- continue; /* timeout */
- }
- else if (status < 0)
+ if (do_shutdown)
+ break;
+ else if (status == 0) /* timeout */
+ continue;
+ else if (status < 0) /* error */
{
status = errno;
if (status != EINTR)
@@ -1968,7 +2023,7 @@ static int read_options (int argc, char **argv) /* {{{ */
int option;
int status = 0;
- while ((option = getopt(argc, argv, "gl:f:w:b:z:p:j:h?")) != -1)
+ while ((option = getopt(argc, argv, "gl:f:w:b:z:p:j:h?F")) != -1)
{
switch (option)
{
@@ -2086,6 +2141,10 @@ static int read_options (int argc, char **argv) /* {{{ */
}
break;
+ case 'F':
+ config_flush_at_shutdown = 1;
+ break;
+
case 'j':
{
struct stat statbuf;
@@ -2136,6 +2195,7 @@ static int read_options (int argc, char **argv) /* {{{ */
" -b <dir> Base directory to change to.\n"
" -g Do not fork and run in the foreground.\n"
" -j <dir> Directory in which to create the journal files.\n"
+ " -F Always flush all updates at shutdown\n"
"\n"
"For more information and a detailed description of all options "
"please refer\n"
@@ -2154,6 +2214,9 @@ static int read_options (int argc, char **argv) /* {{{ */
fprintf(stderr, "WARNING: write delay (-z) should NOT be larger than"
" write interval (-w) !\n");
+ if (journal_cur == NULL)
+ config_flush_at_shutdown = 1;
+
return (status);
} /* }}} int read_options */
More information about the rrd-developers
mailing list