Skip to content

Commit

Permalink
test: stop all monitors before killing microvm
Browse files Browse the repository at this point in the history
If a test fails, the FCmetricsMonitor keeps running in some tests. After
we kill the microvm and remove the resources, the monitor complains it
cannot find the metrics file.

This is tricky to fix simply since we don't have a handle to the monitor
either at fixture teardown or in the Microvm class.

Add a hook in the Microvm class to register monitors, so we can stop all
of them before killing the microvm.

Also adapt existing monitors to fix this pattern.

Signed-off-by: Pablo Barbáchano <[email protected]>
  • Loading branch information
pb8o authored and andr3wy committed May 7, 2024
1 parent fe3547a commit 0cdc9bb
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 10 deletions.
9 changes: 6 additions & 3 deletions tests/framework/microvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,11 @@ def __init__(
if int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", 1)) > 1:
self.time_api_requests = False

self.monitors = []
self.memory_monitor = None
if monitor_memory:
self.memory_monitor = MemoryMonitor(self)
self.monitors.append(self.memory_monitor)

self.api = None
self.log_file = None
Expand Down Expand Up @@ -237,6 +239,10 @@ def kill(self):
"""All clean up associated with this microVM should go here."""
# pylint: disable=subprocess-run-check

# Stop any registered monitors
for monitor in self.monitors:
monitor.stop()

# We start with vhost-user backends,
# because if we stop Firecracker first, the backend will want
# to exit as well and this will cause a race condition.
Expand Down Expand Up @@ -286,9 +292,6 @@ def kill(self):
self._validate_api_response_times()

if self.memory_monitor:
if self.memory_monitor.is_alive():
self.memory_monitor.signal_stop()
self.memory_monitor.join(timeout=1)
self.memory_monitor.check_samples()

def _validate_api_response_times(self):
Expand Down
16 changes: 9 additions & 7 deletions tests/host_tools/fcmetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,7 @@ class FCMetricsMonitor(Thread):
def __init__(self, vm, timer=60):
Thread.__init__(self, daemon=True)
self.vm = vm
vm.monitors.append(self)
self.timer = timer

self.metrics_index = 0
Expand Down Expand Up @@ -538,13 +539,14 @@ def stop(self):
in sleep when stop is called and once it wakes out of sleep
the "vm" might not be avaiable to provide the metrics.
"""
self.running = False
# wait for the running thread to finish
# this should also avoid any race condition leading to
# uploading the same metrics twice
self.join()
self.vm.api.actions.put(action_type="FlushMetrics")
self._flush_metrics()
if self.is_alive():
self.running = False
# wait for the running thread to finish
# this should also avoid any race condition leading to
# uploading the same metrics twice
self.join()
self.vm.api.actions.put(action_type="FlushMetrics")
self._flush_metrics()

def run(self):
self.running = True
Expand Down
6 changes: 6 additions & 0 deletions tests/host_tools/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ def signal_stop(self):
"""Signal that the thread should stop."""
self._should_stop = True

def stop(self):
"""Stop the thread"""
if self.is_alive():
self.signal_stop()
self.join(timeout=1)

def run(self):
"""Thread for monitoring the RSS memory usage of a Firecracker process.
Expand Down

0 comments on commit 0cdc9bb

Please sign in to comment.