From 0cdc9bbc967b14a62dba26b1d80a3eeff7d8dce2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Mon, 6 May 2024 15:57:54 +0200 Subject: [PATCH] test: stop all monitors before killing microvm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a test fails, the FCmetricsMonitor keeps running in some tests. After we kill the microvm and remove the resources, the monitor complains it cannot find the metrics file. This is tricky to fix simply since we don't have a handle to the monitor either at fixture teardown or in the Microvm class. Add a hook in the Microvm class to register monitors, so we can stop all of them before killing the microvm. Also adapt existing monitors to fix this pattern. Signed-off-by: Pablo Barbáchano --- tests/framework/microvm.py | 9 ++++++--- tests/host_tools/fcmetrics.py | 16 +++++++++------- tests/host_tools/memory.py | 6 ++++++ 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 538ae1704a52..fccf032be893 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -204,9 +204,11 @@ def __init__( if int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", 1)) > 1: self.time_api_requests = False + self.monitors = [] self.memory_monitor = None if monitor_memory: self.memory_monitor = MemoryMonitor(self) + self.monitors.append(self.memory_monitor) self.api = None self.log_file = None @@ -237,6 +239,10 @@ def kill(self): """All clean up associated with this microVM should go here.""" # pylint: disable=subprocess-run-check + # Stop any registered monitors + for monitor in self.monitors: + monitor.stop() + # We start with vhost-user backends, # because if we stop Firecracker first, the backend will want # to exit as well and this will cause a race condition. @@ -286,9 +292,6 @@ def kill(self): self._validate_api_response_times() if self.memory_monitor: - if self.memory_monitor.is_alive(): - self.memory_monitor.signal_stop() - self.memory_monitor.join(timeout=1) self.memory_monitor.check_samples() def _validate_api_response_times(self): diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index 1306c22bf59c..53caf4d4adcd 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -500,6 +500,7 @@ class FCMetricsMonitor(Thread): def __init__(self, vm, timer=60): Thread.__init__(self, daemon=True) self.vm = vm + vm.monitors.append(self) self.timer = timer self.metrics_index = 0 @@ -538,13 +539,14 @@ def stop(self): in sleep when stop is called and once it wakes out of sleep the "vm" might not be avaiable to provide the metrics. """ - self.running = False - # wait for the running thread to finish - # this should also avoid any race condition leading to - # uploading the same metrics twice - self.join() - self.vm.api.actions.put(action_type="FlushMetrics") - self._flush_metrics() + if self.is_alive(): + self.running = False + # wait for the running thread to finish + # this should also avoid any race condition leading to + # uploading the same metrics twice + self.join() + self.vm.api.actions.put(action_type="FlushMetrics") + self._flush_metrics() def run(self): self.running = True diff --git a/tests/host_tools/memory.py b/tests/host_tools/memory.py index fe9e7af931ea..690cda387016 100644 --- a/tests/host_tools/memory.py +++ b/tests/host_tools/memory.py @@ -46,6 +46,12 @@ def signal_stop(self): """Signal that the thread should stop.""" self._should_stop = True + def stop(self): + """Stop the thread""" + if self.is_alive(): + self.signal_stop() + self.join(timeout=1) + def run(self): """Thread for monitoring the RSS memory usage of a Firecracker process.