From 78a37066841455141e8a7177689dd95577f1f755 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 4 Sep 2024 10:31:07 +0200 Subject: [PATCH 1/3] Shutdown subscription after exactly 30 minutes Instead of relying on the progressively growing and random resubscribe interval simply check the time since the first re-subscription attempt. The resubscribe interval is chosen at random, and even in the terminal state, a resubscription potentially happens below 30 minutes (in the final progression the resubscription time is chosen at random between 27 and 92 minutes). This means that in theory we shutdown the subscription much later than actually intended. --- matter_server/server/device_controller.py | 43 +++++++++++++---------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/matter_server/server/device_controller.py b/matter_server/server/device_controller.py index bfe57795..b6f0b6dd 100644 --- a/matter_server/server/device_controller.py +++ b/matter_server/server/device_controller.py @@ -23,6 +23,7 @@ from chip.clusters.ClusterObjects import ALL_ATTRIBUTES, ALL_CLUSTERS, Cluster from chip.discovery import DiscoveryType from chip.exceptions import ChipStackError +from chip.native import PyChipError from zeroconf import BadTypeInNameException, IPVersion, ServiceStateChange, Zeroconf from zeroconf.asyncio import AsyncServiceBrowser, AsyncServiceInfo, AsyncZeroconf @@ -80,8 +81,8 @@ NODE_SUBSCRIPTION_CEILING_WIFI = 60 NODE_SUBSCRIPTION_CEILING_THREAD = 60 NODE_SUBSCRIPTION_CEILING_BATTERY_POWERED = 600 -NODE_RESUBSCRIBE_ATTEMPTS_UNAVAILABLE = 3 -NODE_RESUBSCRIBE_TIMEOUT_OFFLINE = 30 * 60 * 1000 +NODE_RESUBSCRIBE_ATTEMPTS_UNAVAILABLE = 2 +NODE_RESUBSCRIBE_TIMEOUT_OFFLINE = 30 * 60 NODE_RESUBSCRIBE_FORCE_TIMEOUT = 5 NODE_PING_TIMEOUT = 10 NODE_PING_TIMEOUT_BATTERY_POWERED = 60 @@ -150,6 +151,7 @@ def __init__( self._nodes: dict[int, MatterNodeData] = {} self._last_known_ip_addresses: dict[int, list[str]] = {} self._last_subscription_attempt: dict[int, int] = {} + self._first_resubscribe_attempt: dict[int, float] = {} self._known_commissioning_params: dict[int, CommissioningParameters] = {} self._known_commissioning_params_timers: dict[int, asyncio.TimerHandle] = {} self._aiobrowser: AsyncServiceBrowser | None = None @@ -1188,21 +1190,28 @@ def resubscription_attempted( nextResubscribeIntervalMsec: int, ) -> None: # pylint: disable=unused-argument, invalid-name + resubscription_attempt = self._last_subscription_attempt[node_id] node_logger.info( - "Previous subscription failed with Error: %s, re-subscribing in %s ms...", - terminationError, - nextResubscribeIntervalMsec, + "Subscription failed with %s, resubscription attempt %s", + str(PyChipError(code=terminationError)), + resubscription_attempt, ) - resubscription_attempt = self._last_subscription_attempt[node_id] + 1 - self._last_subscription_attempt[node_id] = resubscription_attempt + self._last_subscription_attempt[node_id] = resubscription_attempt + 1 + if resubscription_attempt == 0: + self._first_resubscribe_attempt[node_id] = time.time() # Mark node as unavailable and signal consumers. # We debounce it a bit so we only mark the node unavailable - # after some resubscription attempts and we shutdown the subscription - # if the resubscription interval exceeds 30 minutes (TTL of mdns). - # The node will be auto picked up by mdns if it's alive again. + # after some resubscription attempts. if resubscription_attempt >= NODE_RESUBSCRIBE_ATTEMPTS_UNAVAILABLE: self._node_unavailable(node_id) - if nextResubscribeIntervalMsec > NODE_RESUBSCRIBE_TIMEOUT_OFFLINE: + # Shutdown the subscription if we tried to resubscribe for more than 30 + # minutes (typical TTL of mDNS). We assume this device got powered off. + # When the device gets powered on again, it typically announced itself via + # mDNS again. The mDNS browsing code will setup the subscription again. + if ( + time.time() - self._first_resubscribe_attempt[node_id] + > NODE_RESUBSCRIBE_TIMEOUT_OFFLINE + ): asyncio.create_task(self._node_offline(node_id)) def resubscription_succeeded( @@ -1211,6 +1220,7 @@ def resubscription_succeeded( # pylint: disable=unused-argument, invalid-name node_logger.info("Re-Subscription succeeded") self._last_subscription_attempt[node_id] = 0 + self._first_resubscribe_attempt.pop(node_id, None) # mark node as available and signal consumers node = self._nodes[node_id] if not node.available: @@ -1605,15 +1615,12 @@ def _node_unavailable( async def _node_offline(self, node_id: int) -> None: """Mark node as offline.""" # shutdown existing subscriptions + node_logger = self.get_node_logger(LOGGER, node_id) + node_logger.info("Node considered offline, shutdown subscription") await self._chip_device_controller.shutdown_subscription(node_id) + # mark node as unavailable (if it wasn't already) - node = self._nodes[node_id] - if not node.available: - return # nothing to do to - node.available = False - self.server.signal_event(EventType.NODE_UPDATED, node) - node_logger = self.get_node_logger(LOGGER, node_id) - node_logger.info("Marked node as offline") + self._node_unavailable(node_id) async def _fallback_node_scanner(self) -> None: """Scan for operational nodes in the background that are missed by mdns.""" From b3c318c0a879a22a38fc629eccfea488f406e6b1 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 4 Sep 2024 12:14:16 +0200 Subject: [PATCH 2/3] Rename `_last_subscription_attempt` to `_resubscription_attempt` --- matter_server/server/device_controller.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/matter_server/server/device_controller.py b/matter_server/server/device_controller.py index b6f0b6dd..6be6832e 100644 --- a/matter_server/server/device_controller.py +++ b/matter_server/server/device_controller.py @@ -150,7 +150,7 @@ def __init__( self._node_last_seen_on_mdns: dict[int, float] = {} self._nodes: dict[int, MatterNodeData] = {} self._last_known_ip_addresses: dict[int, list[str]] = {} - self._last_subscription_attempt: dict[int, int] = {} + self._resubscription_attempt: dict[int, int] = {} self._first_resubscribe_attempt: dict[int, float] = {} self._known_commissioning_params: dict[int, CommissioningParameters] = {} self._known_commissioning_params_timers: dict[int, asyncio.TimerHandle] = {} @@ -1190,13 +1190,13 @@ def resubscription_attempted( nextResubscribeIntervalMsec: int, ) -> None: # pylint: disable=unused-argument, invalid-name - resubscription_attempt = self._last_subscription_attempt[node_id] + resubscription_attempt = self._resubscription_attempt[node_id] node_logger.info( "Subscription failed with %s, resubscription attempt %s", str(PyChipError(code=terminationError)), resubscription_attempt, ) - self._last_subscription_attempt[node_id] = resubscription_attempt + 1 + self._resubscription_attempt[node_id] = resubscription_attempt + 1 if resubscription_attempt == 0: self._first_resubscribe_attempt[node_id] = time.time() # Mark node as unavailable and signal consumers. @@ -1219,7 +1219,7 @@ def resubscription_succeeded( ) -> None: # pylint: disable=unused-argument, invalid-name node_logger.info("Re-Subscription succeeded") - self._last_subscription_attempt[node_id] = 0 + self._resubscription_attempt[node_id] = 0 self._first_resubscribe_attempt.pop(node_id, None) # mark node as available and signal consumers node = self._nodes[node_id] @@ -1243,7 +1243,7 @@ def resubscription_succeeded( interval_ceiling = NODE_SUBSCRIPTION_CEILING_BATTERY_POWERED else: interval_ceiling = NODE_SUBSCRIPTION_CEILING_THREAD - self._last_subscription_attempt[node_id] = 0 + self._resubscription_attempt[node_id] = 0 # set-up the actual subscription sub: Attribute.SubscriptionTransaction = ( await self._chip_device_controller.read_attribute( From 5940463e8de2970ae2a56f322f6135e9ee50835c Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 4 Sep 2024 12:55:10 +0200 Subject: [PATCH 3/3] Update matter_server/server/device_controller.py Co-authored-by: Marcel van der Veldt --- matter_server/server/device_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matter_server/server/device_controller.py b/matter_server/server/device_controller.py index 6be6832e..7d79c560 100644 --- a/matter_server/server/device_controller.py +++ b/matter_server/server/device_controller.py @@ -1206,7 +1206,7 @@ def resubscription_attempted( self._node_unavailable(node_id) # Shutdown the subscription if we tried to resubscribe for more than 30 # minutes (typical TTL of mDNS). We assume this device got powered off. - # When the device gets powered on again, it typically announced itself via + # When the device gets powered on again, it typically announces itself via # mDNS again. The mDNS browsing code will setup the subscription again. if ( time.time() - self._first_resubscribe_attempt[node_id]