From 51b8f371364e8ea8d35c2f7ebe53525dbef6281c Mon Sep 17 00:00:00 2001
From: Thomas <12561498+tn819@users.noreply.github.com>
Date: Fri, 19 Jul 2024 11:34:33 +0000
Subject: [PATCH 1/8] adjust to include a 499 error and remove 499 from 4xx
 error

---
 charts/generic-service/templates/alerts.yaml | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/charts/generic-service/templates/alerts.yaml b/charts/generic-service/templates/alerts.yaml
index e694bca..fb23b28 100644
--- a/charts/generic-service/templates/alerts.yaml
+++ b/charts/generic-service/templates/alerts.yaml
@@ -165,15 +165,20 @@ spec:
             topic: ingress
           annotations: {{- include "generic-service.alert-annotations" . | nindent 12 }} HTTP responses slower than usual
             description: '{{ include "generic-service.fullname" . }} HTTP responses are {{"{{ $value }}"}}x slower in the last {{ .Values.alerting.http.sampleInterval }} than in the last {{ .Values.alerting.http.referenceInterval }}.'
-
         - alert: Http4xx
           expr: |
-            (sum(rate({{ include "generic-service.request-code-count-metric" . }}"4.."}[{{ .Values.alerting.http.sampleInterval }}])) / sum(rate({{ include "generic-service.request-count-metric" . }}[{{ .Values.alerting.http.sampleInterval }}]))) /
-            (sum(rate({{ include "generic-service.request-code-count-metric" . }}"4.."}[{{ .Values.alerting.http.referenceInterval }}])) / sum(rate({{ include "generic-service.request-count-metric" . }}[{{ .Values.alerting.http.referenceInterval }}])))
-            > {{ .Values.alerting.http.max4xxRatio }}
+            (sum(rate({{ include "generic-service.request-code-count-metric" . }}"4.."}[{{ .Values.alerting.http.sampleInterval }}])) - sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.sampleInterval }}]))) / 
+            (sum(rate({{ include "generic-service.request-code-count-metric" . }}"4.."}[{{ .Values.alerting.http.referenceInterval }}])) - sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.referenceInterval }}])))) > {{ .Values.alerting.http.max4xxRatio }}
           labels: {{- include "generic-service.alert-labels" . | nindent 12 }} warning
           annotations: {{- include "generic-service.alert-annotations" . | nindent 12 }} higher HTTP 4xx rate
             description: '{{ include "generic-service.fullname" . }} gave {{"{{ $value }}"}}x more HTTP 4xx responses per request in the last {{ .Values.alerting.http.sampleInterval }} than in the last {{ .Values.alerting.http.referenceInterval }}.'
+        - alert: ClientConnectionClosed499
+          expr: |
+            (sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.sampleInterval }}]))) / 
+            (sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.referenceInterval }}])))) > {{ .Values.alerting.http.max4xxRatio }}
+          labels: {{- include "generic-service.alert-labels" . | nindent 12 }} warning
+          annotations: {{- include "generic-service.alert-annotations" . | nindent 12 }} higher HTTP 499 rate
+            description: '{{ include "generic-service.fullname" . }} gave {{"{{ $value }}"}}x more HTTP 499 responses per request in the last {{ .Values.alerting.http.sampleInterval }} than in the last {{ .Values.alerting.http.referenceInterval }}.'
         {{- end }}
 
         - alert: Http5xx

From 0a5f742eae382c034b8b5e9b4d7bdfc89d8479d9 Mon Sep 17 00:00:00 2001
From: Thomas <12561498+tn819@users.noreply.github.com>
Date: Wed, 24 Jul 2024 09:27:49 +0200
Subject: [PATCH 2/8] Update charts/generic-service/templates/alerts.yaml

Co-authored-by: Bastian Eicher <bastian@eicher.net>
---
 charts/generic-service/templates/alerts.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/generic-service/templates/alerts.yaml b/charts/generic-service/templates/alerts.yaml
index fb23b28..0cd1e38 100644
--- a/charts/generic-service/templates/alerts.yaml
+++ b/charts/generic-service/templates/alerts.yaml
@@ -172,7 +172,7 @@ spec:
           labels: {{- include "generic-service.alert-labels" . | nindent 12 }} warning
           annotations: {{- include "generic-service.alert-annotations" . | nindent 12 }} higher HTTP 4xx rate
             description: '{{ include "generic-service.fullname" . }} gave {{"{{ $value }}"}}x more HTTP 4xx responses per request in the last {{ .Values.alerting.http.sampleInterval }} than in the last {{ .Values.alerting.http.referenceInterval }}.'
-        - alert: ClientConnectionClosed499
+        - alert: HttpClientCancelled
           expr: |
             (sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.sampleInterval }}]))) / 
             (sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.referenceInterval }}])))) > {{ .Values.alerting.http.max4xxRatio }}

From 0df2cdf4b6e950e2f6139e909f2114454e32ab88 Mon Sep 17 00:00:00 2001
From: Thomas <12561498+tn819@users.noreply.github.com>
Date: Wed, 24 Jul 2024 09:28:17 +0200
Subject: [PATCH 3/8] Update charts/generic-service/templates/alerts.yaml

Co-authored-by: Bastian Eicher <bastian@eicher.net>
---
 charts/generic-service/templates/alerts.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/charts/generic-service/templates/alerts.yaml b/charts/generic-service/templates/alerts.yaml
index 0cd1e38..09858c7 100644
--- a/charts/generic-service/templates/alerts.yaml
+++ b/charts/generic-service/templates/alerts.yaml
@@ -177,8 +177,8 @@ spec:
             (sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.sampleInterval }}]))) / 
             (sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.referenceInterval }}])))) > {{ .Values.alerting.http.max4xxRatio }}
           labels: {{- include "generic-service.alert-labels" . | nindent 12 }} warning
-          annotations: {{- include "generic-service.alert-annotations" . | nindent 12 }} higher HTTP 499 rate
-            description: '{{ include "generic-service.fullname" . }} gave {{"{{ $value }}"}}x more HTTP 499 responses per request in the last {{ .Values.alerting.http.sampleInterval }} than in the last {{ .Values.alerting.http.referenceInterval }}.'
+          annotations: {{- include "generic-service.alert-annotations" . | nindent 12 }} higher HTTP client cancellation rate
+            description: '{{ include "generic-service.fullname" . }} gave a {{"{{ $value }}"}}x higher percentage of HTTP request cancelled by the client in the last {{ .Values.alerting.http.sampleInterval }} than in the last {{ .Values.alerting.http.referenceInterval }}.'
         {{- end }}
 
         - alert: Http5xx

From c841478ce0b03767b3800624eab87fd80026f951 Mon Sep 17 00:00:00 2001
From: Thomas <12561498+tn819@users.noreply.github.com>
Date: Wed, 24 Jul 2024 07:37:00 +0000
Subject: [PATCH 4/8] introduce maxClientCancellationRatio

---
 charts/generic-service/README.md             | 4 +++-
 charts/generic-service/templates/alerts.yaml | 2 +-
 charts/generic-service/values.schema.json    | 5 +++++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/charts/generic-service/README.md b/charts/generic-service/README.md
index def47a2..a972b47 100644
--- a/charts/generic-service/README.md
+++ b/charts/generic-service/README.md
@@ -156,7 +156,9 @@ app:
 | `alerting.http.sampleInterval`                  | `20m`                       | The time interval in which to measure HTTP responses for triggering alerts                               |
 | `alerting.http.referenceInterval`               | `1w`                        | The time interval to compare with the sample interval to detect changes                                  |
 | `alerting.http.maxSlowdown`                     | `2.5`                       | The maximum HTTP response slowdown in the sample interval compared to the reference interval             |
-| `alerting.http.max4xxRatio`                     | `2.5`                       | The maximum HTTP 4xx ratio increase in the sample interval compared to the reference interval            |
+| `alerting.http.max4xxRatio`                     | `2.5`                       | The maximum HTTP 4xx ratio increase (except 499) in the sample interval compared to the reference interval            |
+| `alerting.http.maxClientCancellationRatio`      | `0`                         | The maximum client cancellation (HTTP 499) ratio increase in the sample interval compared to the reference interval            |
+| `alerting.http.maxTimeoutCount`                 | `0`                         | The maximum number of HTTP gateway timeout responses (504) in the sample interval  
 | `alerting.http.max5xxCount`                     | `0`                         | The maximum number of HTTP 5xx responses (except 504) in the sample interval                             |
 | `alerting.http.maxTimeoutCount`                 | `0`                         | The maximum number of HTTP gateway timeout responses (504) in the sample interval                        |
 | `alerting.grpc.requestsMetric`                  | `grpc_server_handled_total` | The name of the Prometheus metric counting gRPC requests                                                 |
diff --git a/charts/generic-service/templates/alerts.yaml b/charts/generic-service/templates/alerts.yaml
index 09858c7..72b24d7 100644
--- a/charts/generic-service/templates/alerts.yaml
+++ b/charts/generic-service/templates/alerts.yaml
@@ -175,7 +175,7 @@ spec:
         - alert: HttpClientCancelled
           expr: |
             (sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.sampleInterval }}]))) / 
-            (sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.referenceInterval }}])))) > {{ .Values.alerting.http.max4xxRatio }}
+            (sum(rate({{ include "generic-service.request-code-count-metric" . }}"499"[{{ .Values.alerting.http.referenceInterval }}])))) > {{ .Values.alerting.http.maxClientCancellationRatio }}
           labels: {{- include "generic-service.alert-labels" . | nindent 12 }} warning
           annotations: {{- include "generic-service.alert-annotations" . | nindent 12 }} higher HTTP client cancellation rate
             description: '{{ include "generic-service.fullname" . }} gave a {{"{{ $value }}"}}x higher percentage of HTTP request cancelled by the client in the last {{ .Values.alerting.http.sampleInterval }} than in the last {{ .Values.alerting.http.referenceInterval }}.'
diff --git a/charts/generic-service/values.schema.json b/charts/generic-service/values.schema.json
index f10b3ad..0ad6070 100644
--- a/charts/generic-service/values.schema.json
+++ b/charts/generic-service/values.schema.json
@@ -886,6 +886,11 @@
               "default": 2.5,
               "description": "The maximum HTTP 4xx ratio increase in the sample interval compared to the reference interval"
             },
+            "maxClientCancellationRatio": {
+              "type": "number",
+              "default": 2.5,
+              "description": "The maximum client cancellation (HTTP 499) ratio increase in the sample interval compared to the reference interval"
+            },
             "max5xxCount": {
               "type": "number",
               "default": 0,

From 3977f6383dfd1730ba4a74a53ce8977edc106b69 Mon Sep 17 00:00:00 2001
From: Thomas <12561498+tn819@users.noreply.github.com>
Date: Mon, 29 Jul 2024 09:45:37 +0200
Subject: [PATCH 5/8] Update charts/generic-service/README.md

Co-authored-by: Bastian Eicher <bastian@eicher.net>
---
 charts/generic-service/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/charts/generic-service/README.md b/charts/generic-service/README.md
index a972b47..39799a4 100644
--- a/charts/generic-service/README.md
+++ b/charts/generic-service/README.md
@@ -158,7 +158,6 @@ app:
 | `alerting.http.maxSlowdown`                     | `2.5`                       | The maximum HTTP response slowdown in the sample interval compared to the reference interval             |
 | `alerting.http.max4xxRatio`                     | `2.5`                       | The maximum HTTP 4xx ratio increase (except 499) in the sample interval compared to the reference interval            |
 | `alerting.http.maxClientCancellationRatio`      | `0`                         | The maximum client cancellation (HTTP 499) ratio increase in the sample interval compared to the reference interval            |
-| `alerting.http.maxTimeoutCount`                 | `0`                         | The maximum number of HTTP gateway timeout responses (504) in the sample interval  
 | `alerting.http.max5xxCount`                     | `0`                         | The maximum number of HTTP 5xx responses (except 504) in the sample interval                             |
 | `alerting.http.maxTimeoutCount`                 | `0`                         | The maximum number of HTTP gateway timeout responses (504) in the sample interval                        |
 | `alerting.grpc.requestsMetric`                  | `grpc_server_handled_total` | The name of the Prometheus metric counting gRPC requests                                                 |

From a82373eeb162a6ed7026aa01bcbd3fbbc54541e5 Mon Sep 17 00:00:00 2001
From: Thomas <12561498+tn819@users.noreply.github.com>
Date: Mon, 29 Jul 2024 09:45:59 +0200
Subject: [PATCH 6/8] Update charts/generic-service/README.md

Co-authored-by: Bastian Eicher <bastian@eicher.net>
---
 charts/generic-service/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/charts/generic-service/README.md b/charts/generic-service/README.md
index 39799a4..c5bb2e0 100644
--- a/charts/generic-service/README.md
+++ b/charts/generic-service/README.md
@@ -156,8 +156,8 @@ app:
 | `alerting.http.sampleInterval`                  | `20m`                       | The time interval in which to measure HTTP responses for triggering alerts                               |
 | `alerting.http.referenceInterval`               | `1w`                        | The time interval to compare with the sample interval to detect changes                                  |
 | `alerting.http.maxSlowdown`                     | `2.5`                       | The maximum HTTP response slowdown in the sample interval compared to the reference interval             |
-| `alerting.http.max4xxRatio`                     | `2.5`                       | The maximum HTTP 4xx ratio increase (except 499) in the sample interval compared to the reference interval            |
-| `alerting.http.maxClientCancellationRatio`      | `0`                         | The maximum client cancellation (HTTP 499) ratio increase in the sample interval compared to the reference interval            |
+| `alerting.http.max4xxRatio`                     | `2.5`                       | The maximum HTTP 4xx ratio increase in the sample interval compared to the reference interval            |
+| `alerting.http.maxClientCancellationRatio`      | `2.5`                       | The maximum client cancellation ratio increase in the sample interval compared to the reference interval |
 | `alerting.http.max5xxCount`                     | `0`                         | The maximum number of HTTP 5xx responses (except 504) in the sample interval                             |
 | `alerting.http.maxTimeoutCount`                 | `0`                         | The maximum number of HTTP gateway timeout responses (504) in the sample interval                        |
 | `alerting.grpc.requestsMetric`                  | `grpc_server_handled_total` | The name of the Prometheus metric counting gRPC requests                                                 |

From 385bd6aaa4164f9b4d94a70daffe10eae4383763 Mon Sep 17 00:00:00 2001
From: Thomas <12561498+tn819@users.noreply.github.com>
Date: Mon, 29 Jul 2024 09:46:06 +0200
Subject: [PATCH 7/8] Update charts/generic-service/values.schema.json

Co-authored-by: Bastian Eicher <bastian@eicher.net>
---
 charts/generic-service/values.schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/generic-service/values.schema.json b/charts/generic-service/values.schema.json
index 0ad6070..27390bb 100644
--- a/charts/generic-service/values.schema.json
+++ b/charts/generic-service/values.schema.json
@@ -889,7 +889,7 @@
             "maxClientCancellationRatio": {
               "type": "number",
               "default": 2.5,
-              "description": "The maximum client cancellation (HTTP 499) ratio increase in the sample interval compared to the reference interval"
+              "description": "The maximum client cancellation ratio increase in the sample interval compared to the reference interval"
             },
             "max5xxCount": {
               "type": "number",

From c68095aafa1e6e5dce2812fd4f20c2c7e490704b Mon Sep 17 00:00:00 2001
From: Thomas <12561498+tn819@users.noreply.github.com>
Date: Mon, 29 Jul 2024 11:33:06 +0000
Subject: [PATCH 8/8] add to values.yaml

---
 charts/generic-service/values.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/charts/generic-service/values.yaml b/charts/generic-service/values.yaml
index 3d52b02..3c2b733 100644
--- a/charts/generic-service/values.yaml
+++ b/charts/generic-service/values.yaml
@@ -172,6 +172,7 @@ alerting:
     referenceInterval: 1w
     maxSlowdown: 2.5
     max4xxRatio: 2.5
+    maxClientCancellationRatio: 2.5
     max5xxCount: 0
     maxTimeoutCount: 0
   grpc: