deeppavlov · ZergLev · May 6, 2024 · May 6, 2024 · May 8, 2024 · May 8, 2024
diff --git a/chatsky/messengers/common/interface.py b/chatsky/messengers/common/interface.py
@@ -11,13 +11,19 @@
 import logging
 from pathlib import Path
 from tempfile import gettempdir
+import signal
+from functools import partial
+import time  # Don't forget to remove this
+import contextlib
+
 from typing import Optional, Any, List, Tuple, Hashable, TYPE_CHECKING, Type
 
 if TYPE_CHECKING:
     from chatsky.script import Context, Message
     from chatsky.pipeline.types import PipelineRunnerFunction
     from chatsky.messengers.common.types import PollingInterfaceLoopFunction
     from chatsky.script.core.message import Attachment
+    from chatsky.pipeline.pipeline.pipeline import Pipeline
 
 logger = logging.getLogger(__name__)
 
@@ -28,8 +34,15 @@ class MessengerInterface(abc.ABC):
     It is responsible for connection between user and pipeline, as well as for request-response transactions.
     """
 
+    def __init__(self):
+        self.task = None
+        self.running_in_foreground = False
+        self.running = True
+        self.stopped = False
+        self.shielded = False  # This determines whether the interface wants to be shut down with task.cancel() or just switching a flag. Let's say PollingMessengerInterface wants task.cancel()
+
     @abc.abstractmethod
-    async def connect(self, pipeline_runner: PipelineRunnerFunction):
+    async def connect(self, *args):
         """
         Method invoked when message interface is instantiated and connection is established.
         May be used for sending an introduction message or displaying general bot information.
@@ -39,6 +52,50 @@ async def connect(self, pipeline_runner: PipelineRunnerFunction):
         """
         raise NotImplementedError
 
+    # This is an optional method, so no need to make it abstract, I think.
+    async def cleanup(self, *args):
+        pass
+
+    async def run_in_foreground(
+        self, pipeline: Pipeline, loop: PollingInterfaceLoopFunction = lambda: True, timeout: float = 0, *args
+    ):
+        self.running_in_foreground = True
+        self.pipeline = pipeline
+
+        async_loop = asyncio.get_running_loop()
+        async_loop.add_signal_handler(signal.SIGINT, partial(pipeline.sigint_handler, async_loop))
+        # TO-DO: Clean this up and/or think this through (connect() methods are different for various MessengerInterface() classes)
+        if isinstance(self.pipeline.messenger_interface, PollingMessengerInterface):
+            self.task = asyncio.create_task(self.connect(loop=loop, timeout=timeout, *args))
+        elif isinstance(self.pipeline.messenger_interface, CallbackMessengerInterface):
+            self.task = asyncio.create_task(self.connect(self.pipeline._run_pipeline, *args))
+        else:
+            self.task = asyncio.create_task(self.connect(self.pipeline._run_pipeline, *args))
+
+        try:
+            await self.task
+        except asyncio.CancelledError:
+            await asyncio.sleep(0)
+            await self.cleanup()
+
+        self.stopped = True
+
+        # Placeholder for any cleanup code.
+
+    # I can make shutdown() work for PollingMessengerInterface, but I don't know the structure of Telegram Messenger Interfaces. Right now, this ends the main task and sets a flag self.running to False, so that any async tasks in loops can see that and turn off as soon as they are done.
+    async def shutdown(self):
+        logger.info(f"messenger_interface.shutdown() called - shutting down interface")
+        self.running = False
+        self.task.cancel()
+        try:
+            await self.task
+        except asyncio.CancelledError:
+            # raise asyncio.CancelledError
+            # await asyncio.sleep(0)
+            if not self.stopped:
+                raise asyncio.CancelledError
+        logger.info(f"{type(self).__name__} has stopped working - SIGINT received")
+
 
 class MessengerInterfaceWithAttachments(MessengerInterface, abc.ABC):
     """
@@ -94,74 +151,142 @@ class PollingMessengerInterface(MessengerInterface):
     Polling message interface runs in a loop, constantly asking users for a new input.
     """
 
+    def __init__(self):
+        self.request_queue = asyncio.Queue()
+        self.cancel_on_shutdown = True  # Would like task.cancel(). (Not done yet)
+        self.number_of_workers = 2
+        # Could make this an argument of connect(), but people can just type interface.number_of_workers = their_number before creating pipeline. Interface features like timeouts could be a tutorial, actually. But it's not really necessary or in demand.
+        self._worker_tasks = []
+        super().__init__()
+
     @abc.abstractmethod
-    def _request(self) -> List[Tuple[Message, Hashable]]:
+    async def _respond(self, ctx_id, last_response):
         """
-        Method used for sending users request for their input.
+        Method used for sending users responses for their last input.
 
-        :return: A list of tuples: user inputs and context ids (any user ids) associated with the inputs.
+        :param ctx_id: Context id, specifies the user id. Without multiple messenger interfaces it's basically a redundant parameter, because this function is just a more complex `print(last_response)`. (Change before merge)
+        :param last_response: Latest response from the pipeline which should be relayed to the specified user.
         """
         raise NotImplementedError
 
-    @abc.abstractmethod
-    def _respond(self, responses: List[Context]):
+    async def _process_request(self, ctx_id, update: Message, pipeline: Pipeline):
         """
-        Method used for sending users responses for their last input.
+        Process a new update for ctx.
+        """
+        context = await pipeline._run_pipeline(update, ctx_id)
+        await self._respond(ctx_id, context.last_response)
 
-        :param responses: A list of contexts, representing dialogs with the users;
-            `last_response`, `id` and some dialog info can be extracted from there.
+    async def _worker_job(self):
         """
-        raise NotImplementedError
+        Obtain Lock over the current context,
+        Process the update and send it.
+        """
+        request = await self.request_queue.get()
+        if request is not None:
+            (ctx_id, update) = request
+            async with self.pipeline.context_lock[ctx_id]:  # get exclusive access to this context among interfaces
+                # Trying to see if _process_request works at all. Looks like it does it just fine, actually
+                # await self._process_request(ctx_id, update, self.pipeline)
+                # Doesn't work in a thread for some reason - it goes into an infinite cycle.
+                # """
+                await asyncio.to_thread(  # [optional] execute in a separate thread to avoid blocking
+                    self._process_request, ctx_id, update, self.pipeline
+                )
+                # """
+            return False
+        else:
+            return True
 
-    def _on_exception(self, e: BaseException):
+    # This worker doesn't save the request and basically deletes it from the queue in case it can't process it. An option to save the request may be fitting? Maybe with an amount of retries.
+    async def _worker(self, worker_timeout: float):
+        while self.running or not self.request_queue.empty():
+            try:
+                no_more_jobs = await asyncio.wait_for(self._worker_job(), timeout=worker_timeout)
+                if no_more_jobs:
+                    logger.info(
+                        f"Worker finished working - stop signal received and remaining requests have been processed."
+                    )
+                    # This logging is incorrect right now, request queue running out isn't handled and it's mistakenly called a stop signal.
+                    break
+            except TimeoutError:
+                # If there's just no requests coming, worker will keep sending this log message.
+                # Looks really bad.
+                logger.info("worker just timed out. A request *may* have been lost.")
+
+    @abc.abstractmethod
+    async def _get_updates(self) -> list[tuple[Any, Message]]:
         """
-        Method that is called on polling cycle exceptions, in some cases it should show users the exception.
-        By default, it logs all exit exceptions to `info` log and all non-exit exceptions to `error`.
+        Obtain updates from another server
 
-        :param e: The exception.
+        Example:
+            self.bot.request_updates()
         """
-        if isinstance(e, Exception):
-            logger.error(f"Exception in {type(self).__name__} loop!", exc_info=e)
-        else:
-            logger.info(f"{type(self).__name__} has stopped polling.")
+
+    async def _polling_job(self, poll_timeout: float):
+        try:
+            coroutine = asyncio.wait_for(self._get_updates(), timeout=poll_timeout)
+            received_updates = await coroutine
+            if received_updates is not None:
+                for update in received_updates:
+                    await self.request_queue.put(update)
+        except TimeoutError:
+            # self.shutdown()
+            # Shutting down is probably too extreme, unless it's several times in a row maybe.
+            logger.info("polling_job failed - timed out")
 
     async def _polling_loop(
         self,
-        pipeline_runner: PipelineRunnerFunction,
+        loop: PollingInterfaceLoopFunction = lambda: True,
+        poll_timeout: float = None,
         timeout: float = 0,
     ):
-        """
-        Method running the request - response cycle once.
-        """
-        user_updates = self._request()
-        responses = [await pipeline_runner(request, ctx_id) for request, ctx_id in user_updates]
-        self._respond(responses)
-        await asyncio.sleep(timeout)
+        try:
+            while loop() and self.running:
+                await asyncio.shield(self._polling_job(poll_timeout))  # shield from cancellation
+                await asyncio.sleep(timeout)
+        finally:
+            self.running = False
+            print("loop ending")
+            logger.info(
+                f"polling_loop stopped working - either the stop signal was received or the loop() condition was false."
+            )
+            # If there're no more jobs/stop signal received, a special 'None' request is sent to the queue (one for each worker), they shut down the workers.
+            # In case of more workers than two, change the number of 'None' requests to the new number of workers.
+            for i in range(self.number_of_workers):
+                self.request_queue.put_nowait(None)
 
     async def connect(
         self,
-        pipeline_runner: PipelineRunnerFunction,
         loop: PollingInterfaceLoopFunction = lambda: True,
+        poll_timeout: float = None,
+        worker_timeout: float = None,
         timeout: float = 0,
     ):
-        """
-        Method, running a request - response cycle in a loop.
-        The looping behavior is determined by `loop` and `timeout`,
-        for most cases the loop itself shouldn't be overridden.
+        # Saving strong references to workers, so that they can be cleaned up properly.
+        # shield() creates a task just like create_task()
+        for i in range(self.number_of_workers):
+            task = asyncio.shield(self._worker(worker_timeout))
+            self._worker_tasks.append(task)
+        await self._polling_loop(loop=loop, poll_timeout=poll_timeout, timeout=timeout)
+
+    # Workers for PollingMessengerInterface are awaited here.
+    async def cleanup(self):
+        await super().cleanup()
+        await asyncio.wait(self._worker_tasks)
+        # await asyncio.gather(*self._worker_tasks)
+        # Blocks until all workers are done
 
-        :param pipeline_runner: A function that should process user request and return context;
-            usually it's a :py:meth:`~chatsky.pipeline.pipeline.pipeline.Pipeline._run_pipeline` function.
-        :param loop: a function that determines whether polling should be continued;
-            called in each cycle, should return `True` to continue polling or `False` to stop.
-        :param timeout: a time interval between polls (in seconds).
+    def _on_exception(self, e: BaseException):
         """
-        while loop():
-            try:
-                await self._polling_loop(pipeline_runner, timeout)
+        Method that is called on polling cycle exceptions, in some cases it should show users the exception.
+        By default, it logs all exit exceptions to `info` log and all non-exit exceptions to `error`.
 
-            except BaseException as e:
-                self._on_exception(e)
-                break
+        :param e: The exception.
+        """
+        if isinstance(e, Exception):
+            logger.error(f"Exception in {type(self).__name__} loop!", exc_info=e)
+        else:
+            logger.info(f"{type(self).__name__} has stopped polling.")
 
 
 class CallbackMessengerInterface(MessengerInterface):

diff --git a/chatsky/messengers/console.py b/chatsky/messengers/console.py
@@ -1,8 +1,6 @@
 from typing import Any, Hashable, List, Optional, TextIO, Tuple
 from uuid import uuid4
 from chatsky.messengers.common.interface import PollingMessengerInterface
-from chatsky.pipeline.types import PipelineRunnerFunction
-from chatsky.script.core.context import Context
 from chatsky.script.core.message import Message
 
 
@@ -12,9 +10,6 @@ class CLIMessengerInterface(PollingMessengerInterface):
     This message interface can maintain dialog with one user at a time only.
     """
 
-    supported_request_attachment_types = set()
-    supported_response_attachment_types = set()
-
     def __init__(
         self,
         intro: Optional[str] = None,
@@ -29,13 +24,13 @@ def __init__(
         self._prompt_response: str = prompt_response
         self._descriptor: Optional[TextIO] = out_descriptor
 
-    def _request(self) -> List[Tuple[Message, Any]]:
-        return [(Message(input(self._prompt_request)), self._ctx_id)]
+    async def _get_updates(self) -> List[Tuple[Any, Message]]:
+        return [(self._ctx_id, Message(input(self._prompt_request)))]
 
-    def _respond(self, responses: List[Context]):
-        print(f"{self._prompt_response}{responses[0].last_response.text}", file=self._descriptor)
+    async def _respond(self, ctx_id, last_response: Message):
+        print(f"{self._prompt_response}{last_response.text}", file=self._descriptor)
 
-    async def connect(self, pipeline_runner: PipelineRunnerFunction, **kwargs):
+    async def connect(self, *args, **kwargs):
         """
         The CLIProvider generates new dialog id used to user identification on each `connect` call.
 
@@ -46,4 +41,4 @@ async def connect(self, pipeline_runner: PipelineRunnerFunction, **kwargs):
         self._ctx_id = uuid4()
         if self._intro is not None:
             print(self._intro)
-        await super().connect(pipeline_runner, **kwargs)
+        await super().connect(*args, **kwargs)
diff --git a/chatsky/pipeline/pipeline/pipeline.py b/chatsky/pipeline/pipeline/pipeline.py
@@ -15,6 +15,7 @@
 """
 
 import asyncio
+import signal
 import logging
 from typing import Union, List, Dict, Optional, Hashable, Callable
 
@@ -102,6 +103,8 @@ def __init__(
         parallelize_processing: bool = False,
     ):
         self.actor: Actor = None
+        self.stopped_by_signal = False
+        self.context_lock = ContextLock()
         self.messenger_interface = CLIMessengerInterface() if messenger_interface is None else messenger_interface
         self.context_storage = {} if context_storage is None else context_storage
         self.slots = GroupSlot.model_validate(slots) if slots is not None else None
@@ -347,6 +350,15 @@ async def _run_pipeline(
 
         return ctx
 
+    def sigint_handler(self, loop):
+        self.stopped_by_signal = True
+        print("_sigint_handler() called")
+        # asyncio.run(asyncio.gather(*[iface.shutdown() for iface in self.messenger_interfaces]))
+        if self.messenger_interface.running_in_foreground:
+            loop.run_until_complete(self.messenger_interface.shutdown())
+        # In case someone launched a pipeline with connect() instead of run_in_foreground(), all SIGINTs will be ignored, though the flag self.stopped_by_signal is still changed to True.
+        logger.info(f"pipeline received SIGINT - stopping pipeline and all interfaces")
+
     def run(self):
         """
         Method that starts a pipeline and connects to `messenger_interface`.
@@ -355,7 +367,18 @@ def run(self):
         This method can be both blocking and non-blocking. It depends on current `messenger_interface` nature.
         Message interfaces that run in a loop block current thread.
         """
-        asyncio.run(self.messenger_interface.connect(self._run_pipeline))
+
+        # event_loop = asyncio.get_event_loop()
+        # event_loop.add_signal_handler(signal.SIGINT, self._sigint_handler)
+
+        # This doesn't work for now, because _sigint_handler is just added to the queue of async tasks, waiting for the program, which it shouldn't, in order to shut it down at all.
+        # I'm using a different solution fow now, but the original one has the benefit of utilising the event loop (not ending other asyncio tasks) and "being thread-safe" according to some sources, not sure if that's true or needed, though.
+        # TO-DO: Do graceful termination via the event loop. I'm thinking if the _sigint_handler() task could be added to the start of the asyncio queue and not the end, it would've worked. But I know neither if that'll work nor how to do it.
+
+        # signal.signal(signal.SIGINT, self.sigint_handler)
+
+        asyncio.run(self.messenger_interface.run_in_foreground(self, self._run_pipeline))
+        logger.info(f"pipeline finished working")
 
     def __call__(
         self, request: Message, ctx_id: Optional[Hashable] = None, update_ctx_misc: Optional[dict] = None
@@ -372,3 +395,14 @@ def __call__(
     @property
     def script(self) -> Script:
         return self.actor.script
+
+
+class ContextLock:
+    # locks: dict[ctx_id, asyncio.Lock] = {}
+    def __init__(self):
+        self.locks = {}
+
+    def __getitem__(self, key):
+        if not key in self.locks:
+            self.locks[key] = asyncio.Lock()
+        return self.locks[key]
diff --git a/tests/messengers/common/__init__.py b/tests/messengers/common/__init__.py