From 4d1e0527af17f60d4a794dcb2ecf410d36981669 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= <pablob@amazon.com>
Date: Mon, 15 Apr 2024 16:58:28 +0200
Subject: [PATCH] test: add a global dimension to test failure metric
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current test failure metrics have high cardinality because they
include the test name. This makes it hard to visualize them in
CloudWatch since we run into aggregation limits.

It would be nice to have one global failure rate, and some views
per-kernel and per-CPU type.

This adds 10 new metrics = 6 per-CPU + 3 per-host kernel + 1 global
metric so it is not too costly.

Signed-off-by: Pablo Barbáchano <pablob@amazon.com>
---
 tests/conftest.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 4d6764856fb8..bcd9c1f69802 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -117,17 +117,24 @@ def record_props(request, record_property):
 def pytest_runtest_logreport(report):
     """Send general test metrics to CloudWatch"""
     if report.when == "call":
-        dimensions = {
-            "test": report.nodeid,
-            "instance": global_props.instance,
-            "cpu_model": global_props.cpu_model,
-            "host_kernel": "linux-" + global_props.host_linux_version,
-        }
+        METRICS.set_dimensions(
+            {
+                "test": report.nodeid,
+                "instance": global_props.instance,
+                "cpu_model": global_props.cpu_model,
+                "host_kernel": "linux-" + global_props.host_linux_version,
+            },
+            # per host kernel
+            {"host_kernel": "linux-" + global_props.host_linux_version},
+            # per CPU
+            {"cpu_model": global_props.cpu_model},
+            # and global
+            {},
+        )
         METRICS.set_property("result", report.outcome)
         METRICS.set_property("location", report.location)
         for prop_name, prop_val in report.user_properties:
             METRICS.set_property(prop_name, prop_val)
-        METRICS.set_dimensions(dimensions)
         METRICS.put_metric(
             "duration",
             report.duration,