Skip to content

Commit

Permalink
Add waitExclude check plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
Nuckal777 committed Mar 16, 2022
1 parent e419f87 commit 2190ba6
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 1 deletion.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,17 @@ config:
weekdays: weekdays when the time window is valid as array, e.g. [monday, tuesday, wednesday, thursday, friday, saturday, sunday], required
exclude: month/day combinations as array, when maintenances are not allowed to occur, e.g. ["Dec 24", "Oct 31"], optional
```
__wait:__ Checks if a certain duration has passed since the last state transition
__wait:__ Checks if a certain duration has passed since the last state transition.
```yaml
config:
duration: a duration according to the rules of golangs time.ParseDuration(), required
```
__waitExclude:__ Checks if a certain duration has passed since the last state transition, while time does not progress on excluded days. This likely to have some inaccuracies, e.g. leap seconds due to the involved math.
```yaml
config:
duration: a duration according to the rules of golangs time.ParseDuration(), required
exclude: weekdays when the time does not progress, e.g. [monday, tuesday, wednesday, thursday, friday, saturday, sunday], required
```
__affinity:__ Pods are rescheduled, when a node is drained. While maintaining a whole cluster it is possible that are rescheduled onto nodes, which are subject to another drain soon.
This effect can be reduced by specifying a preferred node affinity towards nodes in the operational state.
The affinity check plugin prefers to send nodes into maintenance, which do not have pods matching exactly the node affinity below, so nodes with non-critical pods are maintained first to provide operational nodes for critical workloads.
Expand Down Expand Up @@ -292,6 +298,9 @@ config:
## Additional integrations
- Support for [VMware ESX maintenances](esx/README.md)
- Support for [Kubernikus](kubernikus/README.md)
- The maintenance controller exports a bunch of prometheus metrics, but especially
- `maintenance_controller_shuffle_count`: Counts pods in DaemonSets, Deployments and StatefulSets, that were likely shuffled by a node send into maintenance
- `maintenance_controller_shuffles_per_replica`: Count of pods in DaemonSets, Deployments and StatefulSets, that were likely shuffled by a node send into maintenance, divided by the replica count when the event occurred

## Example configuration for flatcar update agents
This example requires that the Flatcar-Linux-Update-Agent is present on the nodes.
Expand Down
1 change: 1 addition & 0 deletions controllers/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ func addPluginsToRegistry(registry *plugin.Registry) {
registry.CheckPlugins["stagger"] = &impl.Stagger{}
registry.CheckPlugins["timeWindow"] = &impl.TimeWindow{}
registry.CheckPlugins["wait"] = &impl.Wait{}
registry.CheckPlugins["waitExclude"] = &impl.WaitExclude{}

registry.NotificationPlugins["mail"] = &impl.Mail{}
registry.NotificationPlugins["slack"] = &impl.SlackWebhook{}
Expand Down
96 changes: 96 additions & 0 deletions plugin/impl/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ import (
"time"

"github.com/elastic/go-ucfg"
"github.com/sapcc/maintenance-controller/common"
"github.com/sapcc/maintenance-controller/plugin"
)

const day = 24 * time.Hour

type Wait struct {
Duration time.Duration
}
Expand Down Expand Up @@ -54,3 +57,96 @@ func (w *Wait) Check(params plugin.Parameters) (bool, error) {
func (w *Wait) AfterEval(chainResult bool, params plugin.Parameters) error {
return nil
}

type WaitExclude struct {
Duration time.Duration
Exclude []time.Weekday
}

func (we *WaitExclude) New(config *ucfg.Config) (plugin.Checker, error) {
conf := struct {
Duration string `config:"duration" validate:"required"`
Exclude []string `config:"exclude" validate:"required"`
}{}
if err := config.Unpack(&conf); err != nil {
return nil, err
}
duration, err := time.ParseDuration(conf.Duration)
if err != nil {
return nil, err
}
weekdays := make([]time.Weekday, 0)
for _, weekdayStr := range conf.Exclude {
weekday, err := common.WeekdayFromString(weekdayStr)
if err != nil {
return nil, err
}
weekdays = append(weekdays, weekday)
}
return &WaitExclude{Duration: duration, Exclude: weekdays}, nil
}

func (we *WaitExclude) Check(params plugin.Parameters) (bool, error) {
return we.checkInternal(&params, time.Now().UTC()), nil
}

func (we *WaitExclude) checkInternal(params *plugin.Parameters, now time.Time) bool {
timestamp := params.LastTransition
since := now.Sub(params.LastTransition)
// "since" currently includes excluded days.
// So, we loop through each day between timestamp (included) and today (included)
// and subtract 24 hours if that weekday was excluded
for !timestamp.After(now) {
if !we.isExcluded(timestamp.Weekday()) {
// not excluded => check the next day
timestamp = timestamp.Add(day)
continue
}
sub := day
// We can only remove the full 24 hours, if the full day can be considered
// as excluded. That does not hold for "params.LastTransition" and today.
// To make matters worse, both can be the same day.
if isSameDay(timestamp, params.LastTransition) {
// Day is the same as params.LastTransition so only the time from
// params.LastTransition to 00:00:00 can be subtracted.
// So if params.LastTransition and now are on the same day
// sub will be greater then since => sub becomes negative.
// In the end we compare against a positive duration, so this is fine.
hour, min, sec := params.LastTransition.Clock()
sub = day - time.Duration(hour)*time.Hour - time.Duration(min)*time.Minute - time.Duration(sec)*time.Second
}
// subtract since and move to the next day
since -= sub
timestamp = timestamp.Add(day)
}
// if now is an excluded day and we have not accounted for it already,
// the time from 00:00:00 to now has to be subtracted.
if !isSameDay(params.LastTransition, now) && we.isExcluded(now.Weekday()) {
hour, min, sec := now.Clock()
sub := time.Duration(hour)*time.Hour + time.Duration(min)*time.Minute + time.Duration(sec)*time.Second
since -= sub
}
if since > we.Duration {
return true
}
return false
}

func (we *WaitExclude) isExcluded(weekday time.Weekday) bool {
for _, excluded := range we.Exclude {
if weekday == excluded {
return true
}
}
return false
}

func isSameDay(t, u time.Time) bool {
tyear, tmonth, tday := t.Date()
uyear, umonth, uday := u.Date()
return tyear == uyear && tmonth == umonth && tday == uday
}

func (we *WaitExclude) AfterEval(chainResult bool, params plugin.Parameters) error {
return nil
}
83 changes: 83 additions & 0 deletions plugin/impl/wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,86 @@ var _ = Describe("The wait plugin", func() {
})

})

var _ = Describe("The waitExclude plugin", func() {

It("can parse its config", func() {
base := WaitExclude{}
configStr := "duration: 17m\nexclude: [\"tue\"]"
config, err := yaml.NewConfig([]byte(configStr))
Expect(err).To(Succeed())
plugin, err := base.New(config)
Expect(err).To(Succeed())
Expect(plugin.(*WaitExclude).Duration).To(Equal(17 * time.Minute))
Expect(plugin.(*WaitExclude).Exclude).To(ContainElement(time.Tuesday))
})

checkWaitExclude := func(we *WaitExclude, transition, now time.Time) bool {
return we.checkInternal(&plugin.Parameters{LastTransition: transition}, now)
}

Context("with a duration of one hour and no exclusions", func() {

It("fails between 10:00 and 10:30", func() {
we := WaitExclude{Duration: 1 * time.Hour, Exclude: make([]time.Weekday, 0)}
lastTransition := time.Date(2022, time.March, 15, 10, 00, 00, 00, time.UTC)
now := time.Date(2022, time.March, 15, 10, 30, 00, 00, time.UTC)
result := checkWaitExclude(&we, lastTransition, now)
Expect(result).To(BeFalse())
})

It("passes between 10:00 and 11:30", func() {
we := WaitExclude{Duration: 1 * time.Hour, Exclude: make([]time.Weekday, 0)}
lastTransition := time.Date(2022, time.March, 15, 10, 00, 00, 00, time.UTC)
now := time.Date(2022, time.March, 15, 11, 30, 00, 00, time.UTC)
result := checkWaitExclude(&we, lastTransition, now)
Expect(result).To(BeTrue())
})

})

Context("with a duration of 30 hours and exclusions on monday and wednesday", func() {

It("fails between sun 12:00 and tue 17:00", func() {
we := WaitExclude{Duration: 30 * time.Hour, Exclude: []time.Weekday{time.Monday, time.Wednesday}}
lastTransition := time.Date(2022, time.March, 6, 12, 00, 00, 00, time.UTC)
now := time.Date(2022, time.March, 8, 17, 00, 00, 00, time.UTC)
result := checkWaitExclude(&we, lastTransition, now)
Expect(result).To(BeFalse())
})

It("passes between sun 12:00 and tue 18:10", func() {
we := WaitExclude{Duration: 30 * time.Hour, Exclude: []time.Weekday{time.Monday, time.Wednesday}}
lastTransition := time.Date(2022, time.March, 6, 12, 00, 00, 00, time.UTC)
now := time.Date(2022, time.March, 8, 18, 10, 00, 00, time.UTC)
result := checkWaitExclude(&we, lastTransition, now)
Expect(result).To(BeTrue())
})

It("fails between mon 12:00 and thu 5:00", func() {
we := WaitExclude{Duration: 30 * time.Hour, Exclude: []time.Weekday{time.Monday, time.Wednesday}}
lastTransition := time.Date(2022, time.March, 7, 12, 00, 00, 00, time.UTC)
now := time.Date(2022, time.March, 10, 5, 00, 00, 00, time.UTC)
result := checkWaitExclude(&we, lastTransition, now)
Expect(result).To(BeFalse())
})

It("passes between mon 12:00 and thu 6:10", func() {
we := WaitExclude{Duration: 30 * time.Hour, Exclude: []time.Weekday{time.Monday, time.Wednesday}}
lastTransition := time.Date(2022, time.March, 7, 12, 00, 00, 00, time.UTC)
now := time.Date(2022, time.March, 10, 6, 10, 00, 00, time.UTC)
result := checkWaitExclude(&we, lastTransition, now)
Expect(result).To(BeTrue())
})

It("fails between son 22:00 and mon 17:00", func() {
we := WaitExclude{Duration: 30 * time.Hour, Exclude: []time.Weekday{time.Monday, time.Wednesday}}
lastTransition := time.Date(2022, time.March, 5, 22, 00, 00, 00, time.UTC)
now := time.Date(2022, time.March, 7, 17, 00, 00, 00, time.UTC)
result := checkWaitExclude(&we, lastTransition, now)
Expect(result).To(BeFalse())
})

})

})

0 comments on commit 2190ba6

Please sign in to comment.