From 74c107a41ecad1e290081aa88b2c1e3741f5bb53 Mon Sep 17 00:00:00 2001 From: Anand Rajagopal Date: Mon, 9 Oct 2023 18:21:45 +0000 Subject: [PATCH] Adding a query parameter to filter out active alerts Signed-off-by: Anand Rajagopal --- CHANGELOG.md | 1 + docs/querying/api.md | 3 +- web/api/v1/api.go | 19 +++- web/api/v1/api_test.go | 226 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 225 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ad9c69dc96..7cec526353e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## 2.47.1 / 2023-10-04 * [BUGFIX] Fix duplicate sample detection at chunk size limit #12874 +* [ENHANCEMENT] Add ability to filter out active alerts in `/rules` API ## 2.47.0 / 2023-09-06 diff --git a/docs/querying/api.md b/docs/querying/api.md index 408d32cdab5..c0e0d39b61c 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -679,6 +679,7 @@ URL query parameters: - `rule_name[]=`: only return rules with the given rule name. If the parameter is repeated, rules with any of the provided names are returned. If we've filtered out all the rules of a group, the group is not returned. When the parameter is absent or empty, no filtering is done. - `rule_group[]=`: only return rules with the given rule group name. If the parameter is repeated, rules with any of the provided rule group names are returned. When the parameter is absent or empty, no filtering is done. - `file[]=`: only return rules with the given filepath. If the parameter is repeated, rules with any of the provided filepaths are returned. When the parameter is absent or empty, no filtering is done. +- `exclude_active_alerts`: only return rules without active alerts. When this parameter is absent or empty, no filtering is done. ```json $ curl http://localhost:9090/api/v1/rules @@ -1307,4 +1308,4 @@ Enable the OTLP receiver by the feature flag `--enable-feature=otlp-write-receiver`. When enabled, the OTLP receiver endpoint is `/api/v1/otlp/v1/metrics`. -*New in v2.47* \ No newline at end of file +*New in v2.47* diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 1a54f23a61f..7c73327df1a 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -1373,6 +1373,17 @@ func (api *API) rules(r *http.Request) apiFuncResult { returnAlerts := typ == "" || typ == "alert" returnRecording := typ == "" || typ == "record" + excludeAlertsParam := strings.ToLower(r.URL.Query().Get("exclude_active_alerts")) + + if excludeAlertsParam == "" { + excludeAlertsParam = "false" + } + + excludeAlerts, err := strconv.ParseBool(excludeAlertsParam) + if err != nil { + excludeAlerts = false + } + rgs := make([]*RuleGroup, 0, len(ruleGroups)) for _, grp := range ruleGroups { if len(rgSet) > 0 { @@ -1414,6 +1425,12 @@ func (api *API) rules(r *http.Request) apiFuncResult { if !returnAlerts { break } + var activeAlerts []*Alert + if excludeAlerts { + activeAlerts = []*Alert{} + } else { + activeAlerts = rulesAlertsToAPIAlerts(rule.ActiveAlerts()) + } enrichedRule = AlertingRule{ State: rule.State().String(), Name: rule.Name(), @@ -1422,7 +1439,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { KeepFiringFor: rule.KeepFiringFor().Seconds(), Labels: rule.Labels(), Annotations: rule.Annotations(), - Alerts: rulesAlertsToAPIAlerts(rule.ActiveAlerts()), + Alerts: activeAlerts, Health: rule.Health(), LastError: lastError, EvaluationTime: rule.GetEvaluationDuration().Seconds(), diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 475b4bab54f..dca651e4544 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -209,10 +209,12 @@ func (t testAlertmanagerRetriever) toFactory() func(context.Context) Alertmanage } type rulesRetrieverMock struct { - testing *testing.T + alertingRules []*rules.AlertingRule + ruleGroups []*rules.Group + testing *testing.T } -func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { +func (m *rulesRetrieverMock) CreateAlertingRules() { expr1, err := parser.ParseExpr(`absent(test_metric3) != 1`) if err != nil { m.testing.Fatalf("unable to parse alert expression: %s", err) @@ -222,6 +224,11 @@ func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { m.testing.Fatalf("Unable to parse alert expression: %s", err) } + expr3, err := parser.ParseExpr(`vector(1)`) + if err != nil { + m.testing.Fatalf("Unable to parse alert expression: %s", err) + } + rule1 := rules.NewAlertingRule( "test_metric3", expr1, @@ -246,15 +253,29 @@ func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { true, log.NewNopLogger(), ) + rule3 := rules.NewAlertingRule( + "test_metric5", + expr3, + time.Second, + 0, + labels.FromStrings("name", "tm5"), + labels.Labels{}, + labels.FromStrings("name", "tm5"), + "", + false, + log.NewNopLogger(), + ) + var r []*rules.AlertingRule r = append(r, rule1) r = append(r, rule2) - return r + r = append(r, rule3) + m.alertingRules = r } -func (m rulesRetrieverMock) RuleGroups() []*rules.Group { - var ar rulesRetrieverMock - arules := ar.AlertingRules() +func (m *rulesRetrieverMock) CreateRuleGroups() { + m.CreateAlertingRules() + arules := m.AlertingRules() storage := teststorage.New(m.testing) defer storage.Close() @@ -271,6 +292,7 @@ func (m rulesRetrieverMock) RuleGroups() []*rules.Group { Appendable: storage, Context: context.Background(), Logger: log.NewNopLogger(), + NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, } var r []rules.Rule @@ -294,7 +316,15 @@ func (m rulesRetrieverMock) RuleGroups() []*rules.Group { ShouldRestore: false, Opts: opts, }) - return []*rules.Group{group} + m.ruleGroups = []*rules.Group{group} +} + +func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { + return m.alertingRules +} + +func (m rulesRetrieverMock) RuleGroups() []*rules.Group { + return m.ruleGroups } func (m rulesRetrieverMock) toFactory() func(context.Context) RulesRetriever { @@ -380,12 +410,14 @@ func TestEndpoints(t *testing.T) { now := time.Now() t.Run("local", func(t *testing.T) { - var algr rulesRetrieverMock + algr := rulesRetrieverMock{} algr.testing = t - algr.AlertingRules() + algr.CreateAlertingRules() + algr.CreateRuleGroups() - algr.RuleGroups() + g := algr.RuleGroups() + g[0].Eval(context.Background(), time.Now()) testTargetRetriever := setupTestTargetRetriever(t) @@ -442,12 +474,14 @@ func TestEndpoints(t *testing.T) { }) require.NoError(t, err) - var algr rulesRetrieverMock + algr := rulesRetrieverMock{} algr.testing = t - algr.AlertingRules() + algr.CreateAlertingRules() + algr.CreateRuleGroups() - algr.RuleGroups() + g := algr.RuleGroups() + g[0].Eval(context.Background(), time.Now()) testTargetRetriever := setupTestTargetRetriever(t) @@ -1036,6 +1070,36 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E sorter func(interface{}) metadata []targetMetadata exemplars []exemplar.QueryResult + zeroFunc func(interface{}) + } + + rulesZeroFunc := func(i interface{}) { + if i != nil { + v := i.(*RuleDiscovery) + for _, ruleGroup := range v.RuleGroups { + ruleGroup.EvaluationTime = float64(0) + ruleGroup.LastEvaluation = time.Time{} + for k, rule := range ruleGroup.Rules { + switch r := rule.(type) { + case AlertingRule: + r.LastEvaluation = time.Time{} + r.EvaluationTime = float64(0) + r.LastError = "" + r.Health = "ok" + for _, alert := range r.Alerts { + alert.ActiveAt = nil + } + ruleGroup.Rules[k] = r + case RecordingRule: + r.LastEvaluation = time.Time{} + r.EvaluationTime = float64(0) + r.LastError = "" + r.Health = "ok" + ruleGroup.Rules[k] = r + } + } + } + } } tests := []test{ @@ -1988,7 +2052,22 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E { endpoint: api.alerts, response: &AlertDiscovery{ - Alerts: []*Alert{}, + Alerts: []*Alert{ + { + Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"), + Annotations: labels.Labels{}, + State: "pending", + Value: "1e+00", + }, + }, + }, + zeroFunc: func(i interface{}) { + if i != nil { + v := i.(*AlertDiscovery) + for _, alert := range v.Alerts { + alert.ActiveAt = nil + } + } }, }, { @@ -2009,7 +2088,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E Labels: labels.Labels{}, Annotations: labels.Labels{}, Alerts: []*Alert{}, - Health: "unknown", + Health: "ok", Type: "alerting", }, AlertingRule{ @@ -2020,20 +2099,98 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E Labels: labels.Labels{}, Annotations: labels.Labels{}, Alerts: []*Alert{}, - Health: "unknown", + Health: "ok", Type: "alerting", }, + AlertingRule{ + State: "pending", + Name: "test_metric5", + Query: "vector(1)", + Duration: 1, + Labels: labels.FromStrings("name", "tm5"), + Annotations: labels.Labels{}, + Alerts: []*Alert{ + { + Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"), + Annotations: labels.Labels{}, + State: "pending", + Value: "1e+00", + }, + }, + Health: "ok", + Type: "alerting", + }, RecordingRule{ Name: "recording-rule-1", Query: "vector(1)", Labels: labels.Labels{}, - Health: "unknown", + Health: "ok", Type: "recording", }, }, }, }, }, + zeroFunc: rulesZeroFunc, + }, + { + endpoint: api.rules, + query: url.Values{ + "exclude_active_alerts": []string{"true"}, + }, + response: &RuleDiscovery{ + RuleGroups: []*RuleGroup{ + { + Name: "grp", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric4", + Query: "up == 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "pending", + Name: "test_metric5", + Query: "vector(1)", + Duration: 1, + Labels: labels.FromStrings("name", "tm5"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + RecordingRule{ + Name: "recording-rule-1", + Query: "vector(1)", + Labels: labels.Labels{}, + Health: "ok", + Type: "recording", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, }, { endpoint: api.rules, @@ -2056,7 +2213,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E Labels: labels.Labels{}, Annotations: labels.Labels{}, Alerts: []*Alert{}, - Health: "unknown", + Health: "ok", Type: "alerting", }, AlertingRule{ @@ -2067,13 +2224,32 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E Labels: labels.Labels{}, Annotations: labels.Labels{}, Alerts: []*Alert{}, - Health: "unknown", + Health: "ok", Type: "alerting", }, + AlertingRule{ + State: "pending", + Name: "test_metric5", + Query: "vector(1)", + Duration: 1, + Labels: labels.FromStrings("name", "tm5"), + Annotations: labels.Labels{}, + Alerts: []*Alert{ + { + Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"), + Annotations: labels.Labels{}, + State: "pending", + Value: "1e+00", + }, + }, + Health: "ok", + Type: "alerting", + }, }, }, }, }, + zeroFunc: rulesZeroFunc, }, { endpoint: api.rules, @@ -2092,13 +2268,14 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E Name: "recording-rule-1", Query: "vector(1)", Labels: labels.Labels{}, - Health: "unknown", + Health: "ok", Type: "recording", }, }, }, }, }, + zeroFunc: rulesZeroFunc, }, { endpoint: api.rules, @@ -2119,13 +2296,14 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E Labels: labels.Labels{}, Annotations: labels.Labels{}, Alerts: []*Alert{}, - Health: "unknown", + Health: "ok", Type: "alerting", }, }, }, }, }, + zeroFunc: rulesZeroFunc, }, { endpoint: api.rules, @@ -2151,13 +2329,14 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E Labels: labels.Labels{}, Annotations: labels.Labels{}, Alerts: []*Alert{}, - Health: "unknown", + Health: "ok", Type: "alerting", }, }, }, }, }, + zeroFunc: rulesZeroFunc, }, { endpoint: api.queryExemplars, @@ -2696,6 +2875,9 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E assertAPIResponseMetadataLen(t, res.data, test.responseMetadataTotal) } } else { + if test.zeroFunc != nil { + test.zeroFunc(res.data) + } assertAPIResponse(t, res.data, test.response) } })