Skip to content

Commit

Permalink
Added support for custom alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
bastianeicher committed Jul 7, 2023
1 parent b874aae commit 46838ca
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 0 deletions.
11 changes: 11 additions & 0 deletions charts/generic-service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,17 @@ app:
| `alerting.grpc.maxErrorRatio` | `2.5` | The maximum gRPC error ratio increase in the sample interval compared to the reference interval |
| `alerting.grpc.maxCriticalErrors` | `0` | The maximum number of critical gRPC errors responses in the sample interval |
| `alerting.grpc.criticalCodes` | `[Internal, Unimplemented]` | Which gRPC status codes are considered critical errors |
| `alerting.custom.*.metric` | __required if used__ | The name of the Prometheus metric exposed by the service |
| `alerting.custom.*.metricLabels` | `{}` | Labels to use for filtering the metric |
| `alerting.custom.*.aggregate` | __required if used__ | The aggregate function to use to combine metric values from multiple replicas (e.g., `max` or `sum`) |
| `alerting.custom.*.increaseOver` | | A sliding window in which to calculate the increase of the metric (e.g., `10m`) |
| `alerting.custom.*.averageOver` | | A sliding window in which to calculate the average value of the metric (e.g., `10m`) |
| `alerting.custom.*.round` | `false` | Round the result before evaluating the predicate |
| `alerting.custom.*.predicate` | __required if used__ | An expression that triggers the alert when the metric fulfills it |
| `alerting.custom.*.severity` | `warning` | The severity of the alert |
| `alerting.custom.*.topic` | | The severity of the alert |
| `alerting.custom.*.summary` | __required if used__ | A short summary of the alert |
| `alerting.custom.*.description` | __required if used__ | A longer description of the alert; can include metric labels via templating |
| `sidecars` | `[]` | Additional sidecar containers to be added to the `Pod` |
| `sidecarTemplates` | `[]` | Strings to be templated providing additional sidecar containers to be added to the Pod |
| `rbac.roles` | `[]` | Namespace-specific Kubernetes RBAC Roles to assign to the service (supports templating) |
Expand Down
20 changes: 20 additions & 0 deletions charts/generic-service/ci/custom-alerts-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# custom alerts test

image:
repository: jwilder/whoami
tag: latest

monitoring:
enabled: true

alerting:
enabled: true
custom:
MyCustomAlert:
metric: some_metric
metricLabels:
someKey: someValue
aggregate: sum
predicate: '> 100'
summary: some metric too high
description: Some metric is {{ $value }}, which is too high.
24 changes: 24 additions & 0 deletions charts/generic-service/templates/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,28 @@ spec:
{{- end }}
{{- end }}

{{- range $name, $props := .Values.alerting.custom }}
{{- if not $.Values.monitoring.enabled }}{{ fail "Custom alerts can only be used when monitoring.enabled is true" }}{{ end }}
- alert: {{ $name | quote }}
expr: |
{{ $props.aggregate | required "alert aggregate required" }}(
{{- if $props.round }}round({{ end }}
{{- if $props.increaseOver }}increase({{ end }}
{{- if $props.averageOver }}avg_over_time({{ end }}
{{ $props.metric | required "alert metric required" }}{
namespace="{{ $.Release.Namespace }}", release="{{ $.Release.Name }}"{{ range $key, $val := $props.metricLabels }}, {{ $key }}={{ tpl $val $ | quote }}{{ end }}
}
{{- if $props.averageOver }}[{{ $props.averageOver }}]){{ end }}
{{- if $props.increaseOver }}[{{ $props.increaseOver }}]){{ end }}
{{- if $props.round }}){{ end }}
)
{{ $props.predicate | required "alert predicate required" }}
labels: {{- include "generic-service.alert-labels" $ | nindent 12 }} {{ $props.severity | default "warning" }}
{{- if $props.topic }}
topic: {{ $props.topic }}
{{- end }}
annotations: {{- include "generic-service.alert-annotations" $ | nindent 12 }} {{ $props.summary | required "alert summary required" }}
description: {{ $props.description | required "alert description required" | quote }}
{{- end }}

{{- end }}
58 changes: 58 additions & 0 deletions charts/generic-service/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,64 @@
},
"additionalProperties": false
},
"custom": {
"type": "object",
"additionalProperties": {
"type": "object",
"properties": {
"metric": {
"type": "string",
"description": "The name of the Prometheus metric exposed by the service"
},
"metricLabels": {
"type": "object",
"additionalProperties": {"type": "string"},
"description": "Labels to use for filtering the metric"
},
"aggregate": {
"type": "string",
"description": "The aggregate function to use to combine metric values from multiple replicas (e.g., max or sum)"
},
"increaseOver": {
"type": "string",
"description": "A sliding window in which to calculate the increase of the metric (e.g., 10m)"
},
"averageOver": {
"type": "string",
"description": "A sliding window in which to calculate the average value of the metric (e.g., 10m)"
},
"round": {
"type": "boolean",
"default": false,
"description": "Round the result before evaluating the predicate"
},
"predicate": {
"type": "string",
"description": "An expression that triggers the alert when the metric fulfills it"
},
"severity": {
"type": "string",
"default": "warning",
"description": "The severity of the alert"
},
"topic": {
"type": "string",
"description": "An optional topic label for the alert"
},
"summary": {
"type": "string",
"description": "A short summary of the alert"
},
"description": {
"type": "string",
"description": "A longer description of the alert; can include metric labels via templating"
}
},
"required": ["metric", "aggregate", "predicate", "summary", "description"],
"additionalProperties": false
},
"description": "Additional alerts based on Prometheus metrics exposed by the service"
},
"additionalProperties": false
}
},
Expand Down
1 change: 1 addition & 0 deletions charts/generic-service/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ alerting:
maxErrorRatio: 2.5
maxCriticalErrors: 0
criticalCodes: [Internal, Unimplemented]
custom: {}

sidecars: []
sidecarTemplates: []
Expand Down

0 comments on commit 46838ca

Please sign in to comment.