diff --git a/matter_server/server/device_controller.py b/matter_server/server/device_controller.py index 2b8afff8..28f8ff14 100644 --- a/matter_server/server/device_controller.py +++ b/matter_server/server/device_controller.py @@ -72,6 +72,7 @@ NODE_PING_TIMEOUT = 10 NODE_PING_TIMEOUT_BATTERY_POWERED = 60 NODE_MDNS_BACKOFF = 60 +FALLBACK_NODE_SCANNER_INTERVAL = 1800 MDNS_TYPE_OPERATIONAL_NODE = "_matter._tcp.local." MDNS_TYPE_COMMISSIONABLE_NODE = "_matterc._udp.local." @@ -113,6 +114,7 @@ def __init__( self._aiobrowser: AsyncServiceBrowser | None = None self._aiozc: AsyncZeroconf | None = None self._sdk_executor = ThreadPoolExecutor(max_workers=1) + self._fallback_node_scanner_timer: asyncio.TimerHandle | None = None async def initialize(self) -> None: """Async initialize of controller.""" @@ -174,20 +176,24 @@ async def start(self) -> None: services, handlers=[self._on_mdns_service_state_change], ) + # set-up fallback node scanner + asyncio.create_task(self._fallback_node_scanner()) async def stop(self) -> None: """Handle logic on server stop.""" if self.chip_controller is None: raise RuntimeError("Device Controller not initialized.") - # unsubscribe all node subscriptions - for sub in self._subscriptions.values(): - await self._call_sdk(sub.Shutdown) - self._subscriptions = {} - # shutdown (and cleanup) mdns browser + # shutdown (and cleanup) mdns browser and fallback node scanner if self._aiobrowser: await self._aiobrowser.async_cancel() + if self._fallback_node_scanner_timer: + self._fallback_node_scanner_timer.cancel() if self._aiozc: await self._aiozc.async_close() + # unsubscribe all node subscriptions + for sub in self._subscriptions.values(): + await self._call_sdk(sub.Shutdown) + self._subscriptions = {} # shutdown the sdk device controller await self._call_sdk(self.chip_controller.Shutdown) LOGGER.debug("Stopped.") @@ -1250,3 +1256,30 @@ async def _node_offline(self, node_id: int) -> None: node.available = False self.server.signal_event(EventType.NODE_UPDATED, node) LOGGER.info("Marked node %s as offline", node_id) + + async def _fallback_node_scanner(self) -> None: + """Scan for operational nodes in the background that are missed by mdns.""" + # This code could/should be removed in the future and is added to have a fallback + # to discover operational nodes that got somehow missed by zeroconf. + # the issue in zeroconf is being investigated and in the meanwhile we have this fallback. + for node_id, node in self._nodes.items(): + if node.available: + continue + now = time.time() + last_seen = self._node_last_seen.get(node_id, 0) + if now - last_seen < FALLBACK_NODE_SCANNER_INTERVAL: + continue + if await self.ping_node(node_id, attempts=3): + LOGGER.info("Node %s discovered using fallback ping", node_id) + await self._setup_node(node_id) + + def reschedule_self() -> None: + self._fallback_node_scanner_timer = None + asyncio.create_task(self._fallback_node_scanner()) + + # reschedule task to run at next interval + if TYPE_CHECKING: + assert self.server.loop + self._fallback_node_scanner_timer = self.server.loop.call_later( + FALLBACK_NODE_SCANNER_INTERVAL, reschedule_self + )