From 1b5565cbaeadc8e96d7c94e683fa2d2fe9671f6f Mon Sep 17 00:00:00 2001 From: Frank Vissing Date: Tue, 15 May 2018 12:40:54 +0200 Subject: [PATCH 1/3] adding option to force reboot, ignoring active allerts --- README.md | 19 ++++++++++--------- cmd/kured/main.go | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 74169e6e5..bf58675f3 100644 --- a/README.md +++ b/README.md @@ -61,15 +61,16 @@ The following arguments can be passed to kured via the daemonset pod template: ``` Flags: - --alert-filter-regexp value alert names to ignore when checking for active alerts - --ds-name string namespace containing daemonset on which to place lock (default "kube-system") - --ds-namespace string name of daemonset on which to place lock (default "kured") - --lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock") - --period duration reboot check period (default 1h0m0s) - --prometheus-url string Prometheus instance to probe for active alerts - --reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required") - --slack-hook-url string slack hook URL for reboot notfications - --slack-username string slack username for reboot notfications (default "kured") + --alert-filter-regexp value alert names to ignore when checking for active alerts + --ds-name string namespace containing daemonset on which to place lock (default "kube-system") + --ds-namespace string name of daemonset on which to place lock (default "kured") + --lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock") + --period duration reboot check period (default 1h0m0s) + --prometheus-url string Prometheus instance to probe for active alerts + --reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required") + --force-reboot-sentinel string path to file whose existence signals need to force reboot aka. ignore active prometheus alerts (default "/var/run/force-reboot-required") + --slack-hook-url string slack hook URL for reboot notfications + --slack-username string slack username for reboot notfications (default "kured") ``` ### Reboot Sentinel File & Period diff --git a/cmd/kured/main.go b/cmd/kured/main.go index 6adcbd587..d34031fc5 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -26,15 +26,16 @@ var ( version = "unreleased" // Command line flags - period time.Duration - dsNamespace string - dsName string - lockAnnotation string - prometheusURL string - alertFilter *regexp.Regexp - rebootSentinel string - slackHookURL string - slackUsername string + period time.Duration + dsNamespace string + dsName string + lockAnnotation string + prometheusURL string + alertFilter *regexp.Regexp + rebootSentinel string + forceRebootSentinel string + slackHookURL string + slackUsername string // Metrics rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ @@ -68,7 +69,8 @@ func main() { "alert names to ignore when checking for active alerts") rootCmd.PersistentFlags().StringVar(&rebootSentinel, "reboot-sentinel", "/var/run/reboot-required", "path to file whose existence signals need to reboot") - + rootCmd.PersistentFlags().StringVar(&forceRebootSentinel, "force-reboot-sentinel", "/var/run/force-reboot-required", + "path to file whose existence signals need to force reboot") rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "", "slack hook URL for reboot notfications") rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured", @@ -108,6 +110,18 @@ func sentinelExists() bool { return false // unreachable; prevents compilation error } } +func forceRebootsentinelExists() bool { + _, err := os.Stat(forceRebootSentinel) + switch { + case err == nil: + return true + case os.IsNotExist(err): + return false + default: + log.Fatalf("Unable to determine existence of force reboot sentinel: %v", err) + return false // unreachable; prevents compilation error + } +} func rebootRequired() bool { if sentinelExists() { @@ -120,6 +134,10 @@ func rebootRequired() bool { } func rebootBlocked() bool { + if forceRebootsentinelExists() { + log.Infof("Force reebot sentinel %v exists, force reeboting activated",forceRebootSentinel) + return false + } if prometheusURL != "" { alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter) if err != nil { From 989594acbf2495c3391eceaa95c59cc751f1ef9f Mon Sep 17 00:00:00 2001 From: Frank Vissing Date: Tue, 15 May 2018 14:19:08 +0200 Subject: [PATCH 2/3] Update main.go fix spelling --- cmd/kured/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/kured/main.go b/cmd/kured/main.go index d34031fc5..cd4707142 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -135,7 +135,7 @@ func rebootRequired() bool { func rebootBlocked() bool { if forceRebootsentinelExists() { - log.Infof("Force reebot sentinel %v exists, force reeboting activated",forceRebootSentinel) + log.Infof("Force reboot sentinel %v exists, force rebooting activated",forceRebootSentinel) return false } if prometheusURL != "" { From 6a4dcc8fb30d6c5867f9a8de10dc2d15b81382d8 Mon Sep 17 00:00:00 2001 From: Frank Vissing Date: Fri, 1 Jun 2018 13:29:49 +0200 Subject: [PATCH 3/3] reboot if forceRebootSentinel exists --- cmd/kured/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/kured/main.go b/cmd/kured/main.go index cd4707142..f2bb3f6ab 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -124,7 +124,7 @@ func forceRebootsentinelExists() bool { } func rebootRequired() bool { - if sentinelExists() { + if sentinelExists() || forceRebootsentinelExists() { log.Infof("Reboot required") return true } else { @@ -135,7 +135,7 @@ func rebootRequired() bool { func rebootBlocked() bool { if forceRebootsentinelExists() { - log.Infof("Force reboot sentinel %v exists, force rebooting activated",forceRebootSentinel) + log.Infof("Force reboot sentinel %v exists, force rebooting activated", forceRebootSentinel) return false } if prometheusURL != "" {