From 3e37ae3c23264780d1e9abafc9a24b4d989b0ce5 Mon Sep 17 00:00:00 2001 From: Roman Zlobin Date: Tue, 17 Oct 2023 14:56:13 +0300 Subject: [PATCH] Release v0.6.1 (#254) # Release notes - Add docker profiles (#246) - Add Context guide (#244) - Rework basic guide (#209) - Change documentation build configuration (#250, #249) - Various documentation updates (#245, #244, #246) --- .github/workflows/build_and_publish_docs.yml | 3 +- .github/workflows/codestyle.yml | 1 - .github/workflows/test_coverage.yml | 3 +- .github/workflows/test_full.yml | 5 +- CONTRIBUTING.md | 60 ++- dff/__init__.py | 2 +- dff/script/core/context.py | 97 ++-- docker-compose.yml | 16 + docs/source/conf.py | 10 +- docs/source/user_guides.rst | 11 +- docs/source/user_guides/basic_conceptions.rst | 455 +++++++++++------- docs/source/user_guides/context_guide.rst | 247 ++++++++++ docs/source/user_guides/superset_guide.rst | 2 +- docs/source/utils/generate_tutorials.py | 11 +- makefile | 4 +- setup.py | 12 +- tests/stats/test_patch.py | 19 - .../script/core/7_pre_response_processing.py | 7 - 18 files changed, 678 insertions(+), 287 deletions(-) create mode 100644 docs/source/user_guides/context_guide.rst delete mode 100644 tests/stats/test_patch.py diff --git a/.github/workflows/build_and_publish_docs.yml b/.github/workflows/build_and_publish_docs.yml index 6d43756d2..8a2cbdd56 100644 --- a/.github/workflows/build_and_publish_docs.yml +++ b/.github/workflows/build_and_publish_docs.yml @@ -9,7 +9,6 @@ on: pull_request: branches: - dev - - master workflow_dispatch: concurrency: @@ -30,7 +29,7 @@ jobs: - name: Build images run: | - docker-compose up -d + make docker_up - uses: r-lib/actions/setup-pandoc@v2 with: diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index a5592dd88..d86cd4dae 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -9,7 +9,6 @@ on: pull_request: branches: - dev - - master workflow_dispatch: concurrency: diff --git a/.github/workflows/test_coverage.yml b/.github/workflows/test_coverage.yml index fae5d408a..e4b0270b4 100644 --- a/.github/workflows/test_coverage.yml +++ b/.github/workflows/test_coverage.yml @@ -9,7 +9,6 @@ on: pull_request: branches: - dev - - master workflow_dispatch: concurrency: @@ -28,7 +27,7 @@ jobs: - name: Build images run: | - docker-compose up -d + make docker_up - name: set up python 3.10 uses: actions/setup-python@v4 diff --git a/.github/workflows/test_full.yml b/.github/workflows/test_full.yml index 381c223b0..60eccdf47 100644 --- a/.github/workflows/test_full.yml +++ b/.github/workflows/test_full.yml @@ -9,7 +9,6 @@ on: pull_request: branches: - dev - - master workflow_dispatch: concurrency: @@ -31,7 +30,7 @@ jobs: - name: Build images if: matrix.os == 'ubuntu-latest' run: | - docker-compose up -d + make docker_up - name: set up python ${{ matrix.python-version }} uses: actions/setup-python@v4 @@ -65,7 +64,7 @@ jobs: - name: Build images run: | - docker-compose up -d + make docker_up - name: set up python 3.8 uses: actions/setup-python@v4 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cdbe7baca..f4055262c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -67,6 +67,26 @@ WARNING! Because of the current patching solution, `make doc` modifies some of t so it is strongly advised to use it carefully and in virtual environment only. However, this behavior is likely to be changed in the future. +#### Documentation links + +In your tutorials, you can use special expanding directives in markdown cells. +They can help shorten the comments and avoid boilerplate code. +The documentation links generated by the directives are always relative +to the local documentation and verified during build. + +- `%pip install {args}` + This directive generates dependency installation cell, adds a comment and sets up "quiet" flag. + + It should be used in tutorials, like this: `# %pip install dff[...]`. +- `%doclink({args})` + This directive generates a documentation link. It supports 2 or three arguments and the generated link will look like: `ARG1/ARG2#ARG3`. + + The first argument can be either `api` for DFF codebase, `tutorial` for tutorials or `guide` for user guides. +- `%mddoclink({args})` + This directive is a shortcut for `%doclink` that generates a markdown format link instead. + + The generated link will be either `[ARG2](%doclink(ARG1,ARG2))` or `[ARG3](%doclink(ARG1,ARG2,ARG3))`. + ### Style For style supporting we propose `black`, which is a PEP 8 compliant opinionated formatter. It doesn't take previous formatting into account. See more about [black](https://github.com/psf/black). @@ -100,16 +120,36 @@ make format Tests are configured via [`.env_file`](.env_file). ### Docker -For integration tests, DFF uses Docker images of supported databases as well as docker-compose configuration. -The following images are required for complete integration testing: -1. `mysql` -2. `postgres` -3. `redis` -4. `mongo` -5. `cr.yandex/yc/yandex-docker-local-ydb` - -All of them will be downloaded, launched and awaited upon running integration test make command (`make test_all`). -However, they can be downloaded separately with `make docker_up` and awaited with `make wait_db` commands. +DFF uses docker images for two purposes: +1. Database images for integration testing. +2. Images for statistics collection. + +The first group can be launched via + +```bash +docker-compose --profile context_storage up +``` + +This will download and run all the databases (`mysql`, `postgres`, `redis`, `mongo`, `ydb`). + +The second group can be launched via + +```bash +docker-compose --profile stats up +``` + +This will download and launch Superset Dashboard, Clickhouse, OpenTelemetry Collector. + +To launch both groups run +```bash +docker-compose --profile context_storage --profile stats up +``` +or +```bash +make docker_up +``` + +This will be done automatically when running `make test_all`. ### Other provided features You can get more info about `make` commands by `help`: diff --git a/dff/__init__.py b/dff/__init__.py index 1e39010f4..a72f1a88f 100644 --- a/dff/__init__.py +++ b/dff/__init__.py @@ -4,7 +4,7 @@ __author__ = "Denis Kuznetsov" __email__ = "kuznetsov.den.p@gmail.com" -__version__ = "0.6.0" +__version__ = "0.6.1" import nest_asyncio diff --git a/dff/script/core/context.py b/dff/script/core/context.py index 6658c346f..78ee18072 100644 --- a/dff/script/core/context.py +++ b/dff/script/core/context.py @@ -32,7 +32,7 @@ def get_last_index(dictionary: dict) -> int: """ - Obtaining the last index from the `dictionary`. Functions returns `-1` if the `dict` is empty. + Obtain the last index from the `dictionary`. Return `-1` if the `dict` is empty. :param dictionary: Dictionary with unsorted keys. :return: Last index from the `dictionary`. @@ -44,6 +44,9 @@ def get_last_index(dictionary: dict) -> int: class Context(BaseModel): """ A structure that is used to store data about the context of a dialog. + + Avoid storing unserializable data in the fields of this class in order for + context storages to work. """ id: Union[UUID, int, str] = Field(default_factory=uuid4) @@ -77,13 +80,15 @@ class Context(BaseModel): `misc` stores any custom data. The scripting doesn't use this dictionary by default, so storage of any data won't reflect on the work on the internal Dialog Flow Scripting functions. + Avoid storing unserializable data in order for context storages to work. + - key - Arbitrary data name. - value - Arbitrary data. """ validation: bool = False """ - `validation` is a flag that signals that :py:class:`~dff.script.Pipeline`, - while being initialized, checks the :py:class:`~dff.script.Script`. + `validation` is a flag that signals that :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline`, + while being initialized, checks the :py:class:`~dff.script.core.script.Script`. The functions that can give not valid data while being validated must use this flag to take the validation mode into account. Otherwise the validation will not be passed. @@ -91,12 +96,12 @@ class Context(BaseModel): framework_states: Dict[ModuleName, Dict[str, Any]] = {} """ `framework_states` is used for addons states or for - :py:class:`~dff.script.Pipeline`'s states. - :py:class:`~dff.script.Pipeline` + :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline`'s states. + :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline` records all its intermediate conditions into the `framework_states`. - After :py:class:`~dff.script.Context` processing is finished, - :py:class:`~dff.script.Pipeline` resets `framework_states` and - returns :py:class:`~dff.script.Context`. + After :py:class:`~.Context` processing is finished, + :py:class:`~dff.pipeline.pipeline.pipeline.Pipeline` resets `framework_states` and + returns :py:class:`~.Context`. - key - Temporary variable name. - value - Temporary variable data. @@ -106,7 +111,7 @@ class Context(BaseModel): @classmethod def sort_dict_keys(cls, dictionary: dict) -> dict: """ - Sorting the keys in the `dictionary`. This needs to be done after deserialization, + Sort the keys in the `dictionary`. This needs to be done after deserialization, since the keys are deserialized in a random order. :param dictionary: Dictionary with unsorted keys. @@ -117,16 +122,15 @@ def sort_dict_keys(cls, dictionary: dict) -> dict: @classmethod def cast(cls, ctx: Optional[Union["Context", dict, str]] = None, *args, **kwargs) -> "Context": """ - Transforms different data types to the objects of - :py:class:`~dff.script.Context` class. - Returns an object of :py:class:`~dff.script.Context` + Transform different data types to the objects of the + :py:class:`~.Context` class. + Return an object of the :py:class:`~.Context` type that is initialized by the input data. - :param ctx: Different data types, that are used to initialize object of - :py:class:`~dff.script.Context` type. - The empty object of :py:class:`~dff.script.Context` - type is created if no data are given. - :return: Object of :py:class:`~dff.script.Context` + :param ctx: Data that is used to initialize an object of the + :py:class:`~.Context` type. + An empty :py:class:`~.Context` object is returned if no data is given. + :return: Object of the :py:class:`~.Context` type that is initialized by the input data. """ if not ctx: @@ -137,14 +141,15 @@ def cast(cls, ctx: Optional[Union["Context", dict, str]] = None, *args, **kwargs ctx = Context.model_validate_json(ctx) elif not issubclass(type(ctx), Context): raise ValueError( - f"context expected as sub class of Context class or object of dict/str(json) type, but got {ctx}" + f"Context expected to be an instance of the Context class " + f"or an instance of the dict/str(json) type. Got: {type(ctx)}" ) return ctx def add_request(self, request: Message): """ - Adds to the context the next `request` corresponding to the next turn. - The addition takes place in the `requests` and `new_index = last_index + 1`. + Add a new `request` to the context. + The new `request` is added with the index of `last_index + 1`. :param request: `request` to be added to the context. """ @@ -154,8 +159,8 @@ def add_request(self, request: Message): def add_response(self, response: Message): """ - Adds to the context the next `response` corresponding to the next turn. - The addition takes place in the `responses`, and `new_index = last_index + 1`. + Add a new `response` to the context. + The new `response` is added with the index of `last_index + 1`. :param response: `response` to be added to the context. """ @@ -165,9 +170,8 @@ def add_response(self, response: Message): def add_label(self, label: NodeLabel2Type): """ - Adds to the context the next :py:const:`label `, - corresponding to the next turn. - The addition takes place in the `labels`, and `new_index = last_index + 1`. + Add a new :py:data:`~.NodeLabel2Type` to the context. + The new `label` is added with the index of `last_index + 1`. :param label: `label` that we need to add to the context. """ @@ -180,12 +184,12 @@ def clear( field_names: Union[Set[str], List[str]] = {"requests", "responses", "labels"}, ): """ - Deletes all recordings from the `requests`/`responses`/`labels` except for + Delete all records from the `requests`/`responses`/`labels` except for the last `hold_last_n_indices` turns. If `field_names` contains `misc` field, `misc` field is fully cleared. - :param hold_last_n_indices: Number of last turns that remain under clearing. - :param field_names: Properties of :py:class:`~dff.script.Context` we need to clear. + :param hold_last_n_indices: Number of last turns to keep. + :param field_names: Properties of :py:class:`~.Context` to clear. Defaults to {"requests", "responses", "labels"} """ field_names = field_names if isinstance(field_names, set) else set(field_names) @@ -206,9 +210,12 @@ def clear( @property def last_label(self) -> Optional[NodeLabel2Type]: """ - Returns the last :py:const:`~dff.script.NodeLabel2Type` of - the :py:class:`~dff.script.Context`. - Returns `None` if `labels` is empty. + Return the last :py:data:`~.NodeLabel2Type` of + the :py:class:`~.Context`. + Return `None` if `labels` is empty. + + Since `start_label` is not added to the `labels` field, + empty `labels` usually indicates that the current node is the `start_node`. """ last_index = get_last_index(self.labels) return self.labels.get(last_index) @@ -216,8 +223,8 @@ def last_label(self) -> Optional[NodeLabel2Type]: @property def last_response(self) -> Optional[Message]: """ - Returns the last `response` of the current :py:class:`~dff.script.Context`. - Returns `None` if `responses` is empty. + Return the last `response` of the current :py:class:`~.Context`. + Return `None` if `responses` is empty. """ last_index = get_last_index(self.responses) return self.responses.get(last_index) @@ -225,7 +232,7 @@ def last_response(self) -> Optional[Message]: @last_response.setter def last_response(self, response: Optional[Message]): """ - Sets the last `response` of the current :py:class:`~dff.core.engine.core.context.Context`. + Set the last `response` of the current :py:class:`~.Context`. Required for use with various response wrappers. """ last_index = get_last_index(self.responses) @@ -234,8 +241,8 @@ def last_response(self, response: Optional[Message]): @property def last_request(self) -> Optional[Message]: """ - Returns the last `request` of the current :py:class:`~dff.script.Context`. - Returns `None` if `requests` is empty. + Return the last `request` of the current :py:class:`~.Context`. + Return `None` if `requests` is empty. """ last_index = get_last_index(self.requests) return self.requests.get(last_index) @@ -243,7 +250,7 @@ def last_request(self) -> Optional[Message]: @last_request.setter def last_request(self, request: Optional[Message]): """ - Sets the last `request` of the current :py:class:`~dff.core.engine.core.context.Context`. + Set the last `request` of the current :py:class:`~.Context`. Required for use with various request wrappers. """ last_index = get_last_index(self.requests) @@ -252,7 +259,7 @@ def last_request(self, request: Optional[Message]): @property def current_node(self) -> Optional[Node]: """ - Returns current :py:class:`~dff.script.Node`. + Return current :py:class:`~dff.script.core.script.Node`. """ actor = self.framework_states.get("actor", {}) node = ( @@ -264,17 +271,21 @@ def current_node(self) -> Optional[Node]: ) if node is None: logger.warning( - "The `current_node` exists when an actor is running between `ActorStage.GET_PREVIOUS_NODE`" - " and `ActorStage.FINISH_TURN`" + "The `current_node` method should be called " + "when an actor is running between the " + "`ActorStage.GET_PREVIOUS_NODE` and `ActorStage.FINISH_TURN` stages." ) return node def overwrite_current_node_in_processing(self, processed_node: Node): """ - Overwrites the current node with a processed node. This method only works in processing functions. + Set the current node to be `processed_node`. + This method only works in processing functions (pre-response and pre-transition). + + The actual current node is not changed. - :param processed_node: `node` that we need to overwrite current node. + :param processed_node: `node` to set as the current node. """ is_processing = self.framework_states.get("actor", {}).get("processed_node") if is_processing: @@ -282,7 +293,7 @@ def overwrite_current_node_in_processing(self, processed_node: Node): else: logger.warning( f"The `{self.overwrite_current_node_in_processing.__name__}` " - "function can only be run during processing functions." + "method can only be called from processing functions (either pre-response or pre-transition)." ) diff --git a/docker-compose.yml b/docker-compose.yml index 9c33c632a..382dc1dea 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,18 +3,24 @@ services: mysql: env_file: [.env_file] image: mysql:latest + profiles: + - context_storage restart: unless-stopped ports: - 3307:3306 psql: env_file: [.env_file] image: postgres:latest + profiles: + - context_storage restart: unless-stopped ports: - 5432:5432 redis: env_file: [.env_file] image: redis:latest + profiles: + - context_storage restart: unless-stopped command: --requirepass pass ports: @@ -22,12 +28,16 @@ services: mongo: env_file: [.env_file] image: mongo:latest + profiles: + - context_storage restart: unless-stopped ports: - 27017:27017 ydb: env_file: [.env_file] image: cr.yandex/yc/yandex-docker-local-ydb:latest + profiles: + - context_storage restart: unless-stopped ports: - 8765:8765 @@ -40,11 +50,15 @@ services: context: ./dff/utils/docker dockerfile: dockerfile_stats image: ghcr.io/deeppavlov/superset_df_dashboard:latest + profiles: + - stats ports: - "8088:8088" clickhouse: env_file: [.env_file] image: clickhouse/clickhouse-server:latest + profiles: + - stats restart: unless-stopped ports: - '8123:8123' @@ -54,6 +68,8 @@ services: - ch-data:/var/lib/clickhouse/ otelcol: image: otel/opentelemetry-collector-contrib:latest + profiles: + - stats container_name: otel-col restart: unless-stopped command: [ "--config=/etc/otelcol-config.yml", "--config=/etc/otelcol-config-extras.yml" ] diff --git a/docs/source/conf.py b/docs/source/conf.py index f2e314ceb..aec507d90 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,7 +17,7 @@ author = "DeepPavlov" # The full version, including alpha/beta/rc tags -release = "0.6.0" +release = "0.6.1" # -- General configuration --------------------------------------------------- @@ -146,7 +146,13 @@ ] -autodoc_default_options = {"members": True, "undoc-members": False, "private-members": True} +autodoc_default_options = { + "members": True, + "undoc-members": False, + "private-members": True, + "member-order": "bysource", + "exclude-members": "_abc_impl, model_fields", +} def setup(_): diff --git a/docs/source/user_guides.rst b/docs/source/user_guides.rst index 8724a1489..024c84ecc 100644 --- a/docs/source/user_guides.rst +++ b/docs/source/user_guides.rst @@ -1,14 +1,20 @@ User guides ----------- -:doc:`Basic conceptions <./user_guides/basic_conceptions>` +:doc:`Basic concepts <./user_guides/basic_conceptions>` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In the ``basic conceptions`` tutorial the basics of DFF are described, +In the ``basic concepts`` tutorial the basics of DFF are described, those include but are not limited to: dialog graph creation, specifying start and fallback nodes, setting transitions and conditions, using ``Context`` object in order to receive information about current script execution. +:doc:`Context guide <./user_guides/context_guide>` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``context guide`` walks you through the details of working with the +``Context`` object, the backbone of the DFF API, including most of the relevant fields and methods. + :doc:`Superset guide <./user_guides/superset_guide>` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -22,4 +28,5 @@ Superset dashboard shipped with DFF. :hidden: user_guides/basic_conceptions + user_guides/context_guide user_guides/superset_guide diff --git a/docs/source/user_guides/basic_conceptions.rst b/docs/source/user_guides/basic_conceptions.rst index 9f81a8610..5344143e0 100644 --- a/docs/source/user_guides/basic_conceptions.rst +++ b/docs/source/user_guides/basic_conceptions.rst @@ -4,32 +4,58 @@ Basic Concepts Introduction ~~~~~~~~~~~~ -Dialog Flow Framework helps its users create conversational services, which is done by -defining a specialized dialog graph that dictates the behaviour of the dialog service. -This dialog graph essentially represents the dialog script that guides the conversation -between the chat-bot and the user. +The Dialog Flow Framework (DFF) is a modern tool for designing conversational services. -DFF leverages a specialized language known as a Domain-Specific Language (DSL) -to enable developers to quickly write and comprehend dialog graphs. -This DSL greatly simplifies the process of designing complex conversations and handling -various user inputs, making it easier to build sophisticated conversational systems. +DFF introduces a specialized Domain-Specific Language (DSL) based on standard Python functions and data structures +which makes it very easy for developers with any level of expertise to design a script for user - bot interaction. +The script comes in a form of a *dialog graph* where +each node equals a specific state of the dialog, i.e. a specific conversation turn. +The graph includes the majority of the conversation logic, and covers one or several user scenarios, all in a single Python dict. -DFF installation and requirements -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In this tutorial, we describe the basics of DFF API, +and walk you through the process of creating and maintaining a conversational service with the help of DFF. -For this very basic tutorial we will need only the core dependencies of DFF. -They can be installed via the following command: + +Creating Conversational Services with DFF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Installation +============ + +To get started with DFF, you need to install its core dependencies, which can be done using the following command: .. code-block:: shell pip3 install dff -Example conversational chat-bot -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Defining Dialogue Goals and User Scenarios +========================================== + +To create a conversational service using Dialog Flow Framework (DFF), you start by defining the overall dialogue goal +and breaking down the dialogue into smaller scenarios based on the user intents or actions that you want to cover. +DFF's Domain-Specific Language makes it easy to break down the dialog script into `flows`, i.e. named groups of nodes +unified by a specific purpose. + +For instance, if one of the dialog options that we provide to the user is to play a game, +the bot can have a 'game' flow that contains dialog states related to this subject, while other flows +cover other topics, e.g. 'time' flow can include questions and answers related to telling the time, +'weather' to telling the weather, etc. + +Creating Dialogue Flows for User Scenarios +========================================== -Let us go through the creation of a simple bot that would play a (virtual) ping-pong game with its users. -It would also greet them and handle exceptions. -First, we define the chat-bot in pseudo language: +Once you have DFF installed, you can define dialog flows targeting various user scenarios +and combine them in a global script object. A flow consists of one or more nodes +that represent conversation turns. + +.. note:: + + In other words, the script object has 3 levels of nestedness: + **script - flow - node** + +Let's assume that the only user scenario of the service is the chat bot playing ping pong with the user. +The practical implementation of this is that the bot is supposed to reply 'pong' to messages that say 'ping' +and handle any other messages as exceptions. The pseudo-code for the said flow would be as follows: .. code-block:: text @@ -40,52 +66,59 @@ First, we define the chat-bot in pseudo language: Respond with "Pong!" Repeat this behaviour - If user writes something else: - Respond with "You should've written 'Ping', not '[USER MESSAGE]'!" - Go to responding with "Hi! Let's play ping-pong!" if user writes anything - If user writes something else: - Respond with "You should've started the dialog with 'Hello!'" + Respond with "That was against the rules" Go to responding with "Hi! Let's play ping-pong!" if user writes anything -Later in this tutorial we will create this chat-bot using DFF, starting from the very basics -and then elaborating on more complicated topics. - -Example chat-bot graph -~~~~~~~~~~~~~~~~~~~~~~ +This leaves us with a single dialog flow in the dialog graph that we lay down below, with the annotations for +each part of the graph available under the code snippet. -Let's start from creating the very simple dialog agent: +Example flow & script +===================== .. code-block:: python + :linenos: from dff.pipeline import Pipeline from dff.script import TRANSITIONS, RESPONSE, Message import dff.script.conditions as cnd ping_pong_script = { - "ping_pong_flow": { + "greeting_flow": { "start_node": { - RESPONSE: Message(), + RESPONSE: Message(), # the response of the initial node is skipped TRANSITIONS: { - "greeting_node": cnd.exact_match(Message(text="Hello!")), + ("greeting_flow", "greeting_node"): + cnd.exact_match(Message(text="/start")), }, }, "greeting_node": { - RESPONSE: Message(text="Hi! Let's play ping-pong!"), + RESPONSE: Message(text="Hi!"), + TRANSITIONS: { + ("ping_pong_flow", "game_start_node"): + cnd.exact_match(Message(text="Hello!")) + } + }, + "fallback_node": { + RESPONSE: fallback_response, TRANSITIONS: { - "response_node": cnd.exact_match(Message(text="Ping!")), + ("greeting_flow", "greeting_node"): cnd.true(), }, }, - "response_node": { - RESPONSE: Message(text="Pong!"), + }, + "ping_pong_flow": { + "game_start_node": { + RESPONSE: Message(text="Let's play ping-pong!"), TRANSITIONS: { - "response_node": cnd.exact_match(Message(text="Ping!")), + ("ping_pong_flow", "response_node"): + cnd.exact_match(Message(text="Ping!")), }, }, - "fallback_node": { - RESPONSE: Message(text="That was against the rules!"), + "response_node": { + RESPONSE: Message(text="Pong!"), TRANSITIONS: { - "greeting_node": cnd.true(), + ("ping_pong_flow", "response_node"): + cnd.exact_match(Message(text="Ping!")), }, }, }, @@ -93,41 +126,38 @@ Let's start from creating the very simple dialog agent: pipeline = Pipeline.from_script( ping_pong_script, - start_label=("ping_pong_flow", "start_node"), - fallback_label=("ping_pong_flow", "fallback_node"), + start_label=("greeting_flow", "start_node"), + fallback_label=("greeting_flow", "fallback_node"), ) if __name__ == "__main__": pipeline.run() -.. warning:: +The code snippet defines a script with a single dialogue flow that emulates a ping-pong game. +Likewise, if additional scenarios need to be covered, additional flow objects can be embedded into the same script object. - Current dialog agent doesn't support different cases and/or marks in "Ping" - messages, it only supports exact "Ping!" message from user. - It also supports only one standard error message for any error. +* ``ping_pong_script``: The dialog **script** mentioned above is a dictionary that has one or more + dialog flows as its values. -That's what the agent consists of: - -* ``ping_pong_script``: in order to create a dialog agent, a dialog **script** is needed; - a script is a dictionary, where the keys are the names of the flows (that are "sub-dialogs", - used to separate the whole dialog into multiple sub-dialogs). - -* ``ping_pong_flow`` is our behaviour flow; a flow is a separated dialog, containing linked +* ``ping_pong_flow`` is the game emulation flow; it contains linked conversation nodes and possibly some extra data, transitions, etc. -* ``start_node`` is the initial node, contains no response, only transfers user to an other node - according to the first message user sends. - It transfers user to ``greeting_node`` if user writes text message exactly equal to "Hello!". +* A node object is an atomic part of the script. + The required fields of a node object are ``RESPONSE`` and ``TRANSITIONS``. -* Each node contains "RESPONSE" and "TRANSITIONS" elements. +* The ``RESPONSE`` field specifies the response that the dialog agent gives to the user in the current turn. -* ``RESPONSE`` value should be a ``Message`` object, that can contain text, images, - audios, attachments, etc. +* The ``TRANSITIONS`` field specifies the edges of the dialog graph that link the dialog states. + This is a dictionary that maps labels of other nodes to conditions, i.e. callback functions that + return `True` or `False`. These conditions determine whether respective nodes can be visited + in the next turn. + In the example script, we use standard transitions: ``exact_match`` requires the user request to + fully match the provided text, while ``true`` always allows a transition. However, passing custom + callbacks that implement arbitrary logic is also an option. -* ``TRANSITIONS`` value should be a dict, containing node names and conditions, - that should be met in order to go to the node specified. - Here, we can see two different types of transitions: ``exact_match`` requires user message text to - match the provided text exactly, while ``true`` allowes unconditional transition. +* ``start_node`` is the initial node, which contains an empty response and only transfers user to another node + according to the first message user sends. + It transfers user to ``greeting_node`` if user writes text message exactly equal to "Hello!". * ``greeting_node`` is the node that will greet user and propose him a ping-pong game. It transfers user to ``response_node`` if user writes text message exactly equal to "Ping!". @@ -135,169 +165,224 @@ That's what the agent consists of: * ``response_node`` is the node that will play ping-pong game with the user. It transfers user to ``response_node`` if user writes text message exactly equal to "Ping!". -* ``fallback_node`` is an "exception handling node"; user will be transferred here if in any node - no transition for the message given by user is found. +* ``fallback_node`` is an "exception handling node"; user will be transferred here if + none of the transition conditions (see ``TRANSITIONS``) is satisfied. It transfers user to ``greeting_node`` no matter what user writes. -* ``pipeline`` is a special object that processes user requests according to provided script. - In order to create a pipeline, the script should be provided and two two-string tuples: - the first specifies initial node flow and name and the second (optional) specifies fallback - node flow and name (if not provided it equals to the first one by default). +* ``pipeline`` is a special object that traverses the script graph based on the values of user input. + It is also capable of executing custom actions that you want to run on every turn of the conversation. + The pipeline can be initialized with a script, and with labels of two nodes: + the entrypoint of the graph, aka the 'start node', and the 'fallback node' + (if not provided it defaults to the same node as 'start node'). .. note:: - See `tutorial on basic dialog structure`_. + See `tutorial on basic dialog structure <../tutorials/tutorials.script.core.1_basics.html>`_. + +Processing Definition +===================== + +.. note:: + + The topic of this section is explained in greater detail in the following tutorials: + + * `Pre-response processing <../tutorials/tutorials.script.core.7_pre_response_processing.html>`_ + * `Pre-transitions processing <../tutorials/tutorials.script.core.9_pre_transitions_processing.html>`_ + * `Pipeline processors <../tutorials/tutorials.pipeline.2_pre_and_post_processors.html>`_ + +Processing user requests and extracting additional parameters is a crucial part of building a conversational bot. +DFF allows you to define how user requests will be processed to extract additional parameters. +This is done by passing callbacks to a special ``PROCESSING`` fields in a Node dict. -Advanced graph features -~~~~~~~~~~~~~~~~~~~~~~~ +* User input can be altered with ``PRE_RESPONSE_PROCESSING`` and will happen **before** response generation. See `tutorial on pre-response processing`_. +* Node response can be modified with ``PRE_TRANSITIONS_PROCESSING`` and will happen **after** response generation but **before** transition to the next node. See `tutorial on pre-transition processing`_. -Right now the agent we have created is a very simple one and does not behave **exactly** as we wanted -our bot to behave. Let's see how we can improve our script: +Depending on the requirements of your bot and the dialog goal, you may need to interact with external databases or APIs to retrieve data. +For instance, if a user wants to know a schedule, you may need to access a database and extract parameters such as date and location. .. code-block:: python - from dff.pipeline import Pipeline - from dff.script import TRANSITIONS, RESPONSE, Context, Message - import dff.script.conditions as cnd - import dff.script.labels as lbl + import requests + ... + def use_api_processing(ctx: Context, _: Pipeline, *args, **kwargs) -> Context: + # save to the context field for custom info + ctx.misc["api_call_results"] = requests.get("http://schedule.api/day1").json() + return ctx + ... + node = { + RESPONSE: ... + TRANSITIONS: ... + PRE_TRANSITIONS_PROCESSING: {"use_api": use_api_processing} + } - def get_previous_node_name(ctx: Context) -> str: - """ - Get the name of the previous visited script node. - """ - last_label = sorted(list(ctx.labels))[-2] if len(ctx.labels) >= 2 else None - # labels store the list of nodes the bot transitioned to, - # so the second to last label would be the label of a previous node - return ctx.labels[last_label][1] if last_label is not None else "start_node" - # label is a two-item tuple used to identify a node, - # the first element is flow name and the second is node name +.. note:: + + This function uses ``Context`` to store the result of a request for other functions to use. + Context is a data structure that keeps all the information about a specific conversation. + + To learn more about ``Context`` see the `relevant guide <../user_guides/context_guide.html>`__. + +If you retrieve data from the database or API, it's important to validate it to ensure it meets expectations. + +Since DFF extensively leverages pydantic, you can resort to the validation tools of this feature-rich library. +For instance, given that each processing routine is a callback, you can use tools like pydantic's `validate_call` +to ensure that the returned values match the function signature. +Error handling logic can also be incorporated into these callbacks. + +Generating a bot Response +========================= + +Generating a bot response involves creating a text or multimedia response that will be delivered to the user. +Response is defined in the ``RESPONSE`` section of each node and should be either a ``Message`` object, +that can contain text, images, audios, attachments, etc., or a callback that returns a ``Message``. +The latter allows you to customize the response based on the specific scenario and user input. + +.. code-block:: python + + def sample_response(ctx: Context, _: Pipeline, *args, **kwargs) -> Message: + if ctx.misc["user"] == 'vegan': + return Message(text="Here is a list of vegan cafes.") + return Message(text="Here is a list of cafes.") + +Handling Fallbacks +================== + +In DFF, you should provide handling for situations where the user makes requests +that do not trigger any of the transitions specified in the script graph. +To cover that use case, DFF requires you to define a fallback node that the agent will move to +when no adequate transition has been found. + +Like other nodes, the fallback node can either use a message or a callback to produce a response +which gives you a lot of freedom in creating situationally appropriate error messages. +Create friendly error messages and, if possible, suggest alternative options. +This ensures a smoother user experience even when the bot encounters unexpected inputs. + +.. code-block:: python def fallback_response(ctx: Context, _: Pipeline, *args, **kwargs) -> Message: """ - Generate response for fallback node, according to the previous node - we have been to. - If the previous node was `start_node`, a sample message will be returned, - otherwise the message will include user input. + Generate a special fallback response depending on the situation. """ - if get_previous_node_name(ctx) == "start_node": - return Message(text="You should've started the dialog with 'Hello!'") - elif ctx.last_request is not None: - last_request = ctx.last_request.text - note = f"You should've written 'Ping', not '{last_request}'!" - return Message(text=f"That was against the rules! {note}") + if ctx.last_request is not None: + if ctx.last_request.text != "/start" and ctx.last_label is None: + # an empty last_label indicates start_node + return Message(text="You should've started the dialog with '/start'") + else: + return Message( + text=f"That was against the rules!\n" + f"You should've written 'Ping', not '{ctx.last_request.text}'!" + ) else: raise RuntimeError("Error occurred: last request is None!") - - ping_pong_script = { - "ping_pong_flow": { - "start_node": { - RESPONSE: Message(), - TRANSITIONS: { - lbl.forward(): cnd.exact_match(Message(text="Hello!")), - }, - }, - "greeting_node": { - RESPONSE: Message(text="Hi! Let's play ping-pong!"), - TRANSITIONS: { - lbl.forward(): cnd.regexp(r"^[P|p]ing!?$"), - }, - }, - "ping_pong_node": { - RESPONSE: Message(text="Pong!"), - TRANSITIONS: { - lbl.repeat(): cnd.regexp(r"^[P|p]ing!?$"), - }, - }, - "fallback_node": { - RESPONSE: fallback_response, - TRANSITIONS: { - "greeting_node": cnd.true(), - }, - }, - }, - } +Testing and Debugging +~~~~~~~~~~~~~~~~~~~~~ - pipeline = Pipeline.from_script( - ping_pong_script, - start_label=("ping_pong_flow", "start_node"), - fallback_label=("ping_pong_flow", "fallback_node"), - ) - - if __name__ == "__main__": - pipeline.run() +Periodically testing the conversational service is crucial to ensure it works correctly. +You should also be prepared to debug the code and dialogue logic if problems are discovered during testing. +Thorough testing helps identify and resolve any potential problems in the conversation flow. -That's what we've changed: +The basic testing procedure offered by DFF is end-to-end testing of the pipeline and the script +which ensures that the pipeline yields correct responses for any given input. +It requires a sequence of user request - bot response pairs that form the happy path of your +conversational service. -* ``fallback_node`` has a callback response, it prints different messages depending on the - previous node. +.. code-block:: python -.. note:: + happy_path = ( + (Message(text="/start"), Message(text="Hi!")), + (Message(text="Hello!"), Message(text="Let's play ping-pong!")), + (Message(text="Ping!"), Message(text="Pong!")) + ) - See `tutorial on response functions`_. +A special function is then used to ascertain complete identity of the messages taken from +the happy path and the pipeline. The function will play out a dialog with the pipeline acting as a user while checking returned messages. -* A special function ``get_previous_node_name`` was written to determine the name of the previous - visited node. It utilizes ``labels`` attribute of the ``Context`` object. +.. code-block:: python -.. note:: + from dff.utils.testing.common import check_happy_path - See `documentation of Context object`_. + check_happy_path(pipeline, happy_path) -* Transitions were changed: transitions to next, previous and current node were replaced with special - standard transitions. +Monitoring and Analytics +~~~~~~~~~~~~~~~~~~~~~~~~ -.. note:: +Setting up bot performance monitoring and usage analytics is essential to monitor its operation and identify potential issues. +Monitoring helps you understand how users are interacting with the bot and whether any improvements are needed. +Analytics data can provide valuable insights for refining the bot's behavior and responses. - See `tutorial on transitions`_. - -* Conditions were changed: now regular expressions are used to check user text input value. +DFF provides a `statistics` module as an out-of-the-box solution for collecting arbitrary statistical metrics +from your service. Setting up the data collection is as easy as instantiating the relevant class in the same +context with the pipeline. +What's more, the data you obtain can be visualized right away using Apache Superset as a charting engine. .. note:: - See `tutorial on conditions`_. + More information is available in the respective `guide <../user_guides/superset_guide.html>`__. -Further exploration -~~~~~~~~~~~~~~~~~~~ +Iterative Improvement +~~~~~~~~~~~~~~~~~~~~~ -There are still a lot of capabilities of Dialog Flow Framework that remain uncovered by this tutorial. +To continually enhance your chat-bot's performance, monitor user feedback and analyze data on bot usage. +For instance, the statistics or the charts may reveal that some flow is visited by users more frequently or +less frequently than planned. This would mean that adjustments to the transition structure +of the graph need to be made. -For example: +Gradually improve the transition logic and response content based on the data received. +This iterative approach ensures that the bot becomes more effective over time. -* You can use ``GLOBAL`` transitions that will be available from every node in your script. - See `tutorial on global transitions`_. +Data Protection +~~~~~~~~~~~~~~~ -* You can serialize context (available on every transition and response) - to json or dictionary in order to debug it or extract some values. - See `tutorial on context serialization`_. +Data protection is a critical consideration in bot development, especially when handling sensitive information. -* You can alter user input and modify generated responses. - User input can be altered with ``PRE_RESPONSE_PROCESSING`` and will happen **before** response generation. - See `tutorial on pre-response processing`_. - Node response can be modified with ``PRE_TRANSITION_PROCESSING`` and will happen **after** response generation. - See `tutorial on pre-transition processing`_. +.. note:: -* Additional data ``MISC`` can be added to every node, flow and script itself. - See `tutorial on script MISC`_. + The DFF framework helps ensure the safety of your application by storing the history and other user data present + in the ``Context`` object under unique ids and abstracting the storage logic away from the user interface. + As a result, it offers the basic level of data protection making it impossible to gain unlawful access to personal information. -Conclusion -~~~~~~~~~~ +Documentation +~~~~~~~~~~~~~ -In this tutorial, we explored the basics of Dialog Flow Framework (DFF) to build dynamic conversational services. -By using DFF's intuitive Domain-Specific Language (DSL) and well-structured dialog graphs, we created a simple interaction between user and chat-bot. -We covered installation, understanding the DSL and building dialog graph. -However, this is just the beginning. DFF offers a world of possibilities in conversational chat-bot. -With practice and exploration of advanced features, you can create human-like conversations and reach a wider audience by integrating with various platforms. -Now, go forth, unleash your creativity, and create captivating conversational services with DFF. -Happy building! +Creating documentation is essential for teamwork and future bot maintenance. +Document how different parts of the script work and how the bot covers the expected interaction scenarios. +It is especially important to document the purpose and functionality of callback functions and pipeline services +that you may have in your project, using Python docstrings. +.. code-block:: python -.. _tutorial on basic dialog structure: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.1_basics.html -.. _tutorial on response functions: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.3_responses.html -.. _documentation of Context object: https://deeppavlov.github.io/dialog_flow_framework/apiref/dff.script.core.context.html -.. _tutorial on transitions: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.4_transitions.html -.. _tutorial on conditions: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.2_conditions.html -.. _tutorial on global transitions: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.5_global_transitions.html -.. _tutorial on context serialization: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.6_context_serialization.html -.. _tutorial on pre-response processing: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.7_pre_response_processing.html -.. _tutorial on pre-transition processing: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.9_pre_transitions_processing.html -.. _tutorial on script MISC: https://deeppavlov.github.io/dialog_flow_framework/tutorials/tutorials.script.core.8_misc.html + def fav_kitchen_response(ctx: Context, _: Pipeline, *args, **kwargs) -> Message: + """ + This function returns a user-targeted response depending on the value + of the 'kitchen preference' slot. + """ + ... + +This documentation serves as a reference for developers involved in the project. + +Scaling +~~~~~~~ + +If your bot becomes popular and requires scaling, consider scalability during development. +Scalability ensures that the bot can handle a growing user base without performance issues. +While having only one application instance will suffice in most cases, there are many ways +how you can adapt the application to a high load environment. + +* With the database connection support that DFF offers out of the box, DFF projects can be easily scaled through sharing the same database between multiple application instances. However, using an external database is required due to the fact that this is the only kind of storage that can be efficiently shared between processes. +* Likewise, using multiple database instances to ensure the availability of data is also an option. +* The structure of the `Context` object makes it easy to vertically partition the data storing different subsets of data across multiple database instances. + +Further reading +~~~~~~~~~~~~~~~ + +* `Tutorial on basic dialog structure <../tutorials/tutorials.script.core.1_basics.html>`_ +* `Tutorial on transitions <../tutorials/tutorials.script.core.4_transitions.html>`_ +* `Tutorial on conditions <../tutorials/tutorials.script.core.2_conditions.html>`_ +* `Tutorial on response functions <../tutorials/tutorials.script.core.3_responses.html>`_ +* `Tutorial on pre-response processing <../tutorials/tutorials.script.core.7_pre_response_processing.html>`_ +* `Tutorial on pre-transition processing <../tutorials/tutorials.script.core.9_pre_transitions_processing.html>`_ +* `Guide on Context <../user_guides/context_guide.html>`_ +* `Tutorial on global transitions <../tutorials/tutorials.script.core.5_global_transitions.html>`_ +* `Tutorial on context serialization <../tutorials/tutorials.script.core.6_context_serialization.html>`_ +* `Tutorial on script MISC <../tutorials/tutorials.script.core.8_misc.html>`_ \ No newline at end of file diff --git a/docs/source/user_guides/context_guide.rst b/docs/source/user_guides/context_guide.rst new file mode 100644 index 000000000..1dc47712c --- /dev/null +++ b/docs/source/user_guides/context_guide.rst @@ -0,0 +1,247 @@ +Context guide +-------------- + +Introduction +~~~~~~~~~~~~ + +The ``Context`` class is a backbone component of the DFF API. +Like the name suggests, this data structure is used to store information +about the current state, or context, of a particular conversation. +Each individual user has their own ``Context`` instance and can be identified by it. + +``Context`` is used to keep track of the user's requests, bot's replies, +user-related and request-related annotations, and any other information +that is relevant to the conversation with the user. + +.. note:: + + Since most callback functions used in DFF script and DFF pipeline (see the `basic guide <./basic_conceptions.rst>`__) + need to either read or update the current dialog state, + the framework-level convention is that all functions of this kind + use ``Context`` as their first parameter. This dependency is being + injected by the pipeline during its run. + Thus, understanding the ``Context`` class is essential for developing custom conversation logic + which is mostly made up by the said functions. + +As a callback parameter, ``Context`` provides a convenient interface for working with data, +allowing developers to easily add, retrieve, +and manipulate data as the conversation progresses. + +Let's consider some of the built-in callback instances to see how the context can be leveraged: + +.. code-block:: python + :linenos: + + pattern = re.compile("[a-zA-Z]+") + + def regexp_condition_handler( + ctx: Context, pipeline: Pipeline, *args, **kwargs + ) -> bool: + # retrieve the current request + request = ctx.last_request + if request.text is None: + return False + return bool(pattern.search(request.text)) + +The code above is a condition function (see the `basic guide <./basic_conceptions.rst>`__) +that belongs to the ``TRANSITIONS`` section of the script and returns `True` or `False` +depending on whether the current user request matches the given pattern. +As can be seen from the code block, the current +request (``last_request``) can be easily retrieved as one of the attributes of the ``Context`` object. +Likewise, the ``last_response`` (bot's current reply) or the ``last_label`` +(the name of the currently visited node) attributes can be used in the same manner. + +Another common use case is leveraging the ``misc`` field (see below for a detailed description): +pipeline functions or ``PROCESSING`` callbacks can write arbitrary values to the misc field, +making those available for other context-dependent functions. + +.. code-block:: python + :linenos: + + import urllib.request + import urllib.error + + def ping_example_com( + ctx: Context, *_, **__ + ): + try: + with urllib.request.urlopen("https://example.com/") as webpage: + web_content = webpage.read().decode( + webpage.headers.get_content_charset() + ) + result = "Example Domain" in web_content + except urllib.error.URLError: + result = False + ctx.misc["can_ping_example_com"] = result + +.. + todo: link to the user defined functions tutorial + + .. note:: + For more information about user-defined functions see the `user functions guide <./user_functions.rst>`__. + +API +~~~ + +This sections describes the API of the ``Context`` class. + +For more information, such as method signatures, see +`API reference <./apiref/dff.script.core.context.html#dff.script.core.context.Context>`__. + +Attributes +========== + +* **id**: This attribute represents the unique context identifier. By default, it is randomly generated using uuid4. + In most cases, this attribute will be used to identify a user. + +* **labels**: The labels attribute stores the history of all passed labels within the conversation. + It maps turn IDs to labels. The collection is ordered, so getting the last item of the mapping + always shows the last visited node. + + Note that `labels` only stores the nodes that were transitioned to + so `start_label` will not be in this attribute. + +* **requests**: The requests attribute maintains the history of all received requests by the agent. + It also maps turn IDs to requests. Like labels, it stores the requests in-order. + +* **responses**: This attribute keeps a record of all agent responses, mapping turn IDs to responses. + Stores the responses in-order. + +* **misc**: The misc attribute is a dictionary for storing custom data. This field is not used by any of the + built-in DFF classes or functions, so the values that you write there are guaranteed to persist + throughout the lifetime of the ``Context`` object. + +* **framework_states**: This attribute is used for storing addon or pipeline states. + Each turn, the DFF pipeline records the intermediary states of its components into this field, + and clears it at the end of the turn. For this reason, developers are discouraged from storing + their own data in this field. + +Methods +======= + +The methods of the ``Context`` class can be divided into two categories: + +* Public methods that get called manually in custom callbacks and in functions that depend on the context. +* Methods that are not designed for manual calls and get called automatically during pipeline runs, + i.e. quasi-private methods. You may still need them when developing extensions or heavily modifying DFF. + +Public methods +^^^^^^^^^^^^^^ + +* **last_request**: Return the last request of the context, or `None` if the ``requests`` field is empty. + + Note that a request is added right after the context is created/retrieved from db, + so an empty ``requests`` field usually indicates an issue with the messenger interface. + +* **last_response**: Return the last response of the context, or `None` if the ``responses`` field is empty. + + Responses are added at the end of each turn, so an empty ``response`` field is something you should definitely consider. + +* **last_label**: Return the last label of the context, or `None` if the ``labels`` field is empty. + Last label is always the name of the current node but not vice versa: + + Since ``start_label`` is not added to the ``labels`` field, + empty ``labels`` usually indicates that the current node is the `start_node`. + After a transition is made from the `start_node` + the label of that transition is added to the field. + +* **clear**: Clear all items from context fields, optionally keeping the data from `hold_last_n_indices` turns. + You can specify which fields to clear using the `field_names` parameter. This method is designed for cases + when contexts are shared over high latency networks. + +.. note:: + + See the `preprocessing tutorial <../tutorials/tutorials.script.core.7_pre_response_processing.py>`__. + +Private methods +^^^^^^^^^^^^^^^ + +* **set_last_response, set_last_request**: These methods allow you to set the last response or request for the current context. + This functionality can prove useful if you want to create a middleware component that overrides the pipeline functionality. + +* **add_request**: Add a request to the context. + It updates the `requests` dictionary. This method is called by the `Pipeline` component + before any of the `pipeline services <../tutorials/tutorials.pipeline.3_pipeline_dict_with_services_basic.py>`__ are executed, + including `Actor <../apiref/dff.pipeline.pipeline.actor.html>`__. + +* **add_response**: Add a response to the context. + It updates the `responses` dictionary. This function is run by the `Actor <../apiref/dff.pipeline.pipeline.actor.html>`__ pipeline component at the end of the turn, after it has run + the `PRE_RESPONSE_PROCESSING <../tutorials/tutorials.script.core.7_pre_response_processing.py>`__ functions. + + To be more precise, this method is called between the ``CREATE_RESPONSE`` and ``FINISH_TURN`` stages. + For more information about stages, see `ActorStages <../apiref/dff.script.core.types.html#dff.script.core.types.ActorStage>`__. + +* **add_label**: Add a label to the context. + It updates the `labels` field. This method is called by the `Actor <../apiref/dff.pipeline.pipeline.actor.html>`_ component when transition conditions + have been resolved, and when `PRE_TRANSITIONS_PROCESSING <../tutorials/tutorials.script.core.9_pre_transitions_processing.py>`__ callbacks have been run. + + To be more precise, this method is called between the ``GET_NEXT_NODE`` and ``REWRITE_NEXT_NODE`` stages. + For more information about stages, see `ActorStages <../apiref/dff.script.core.types.html#dff.script.core.types.ActorStage>`__. + +* **current_node**: Return the current node of the context. This is particularly useful for tracking the node during the conversation flow. + This method only returns a node inside ``PROCESSING`` callbacks yielding ``None`` in other contexts. + +Context storages +~~~~~~~~~~~~~~~~ + +Since context instances contain all the information, relevant for a particular user, there needs to be a way +to persistently store that information and to make it accessible in different user sessions. +This functionality is implemented by the ``context storages`` module that provides +the uniform ``DBContextStorage`` interface as well as child classes thereof that integrate +various database types (see the +`api reference <../apiref/dff.context_storages.database.html#dff.context_storages.database.DBContextStorage>`_). + +The supported storage options are as follows: + +* `JSON `_ +* `pickle `_ +* `shelve `_ +* `SQLite `_ +* `PostgreSQL `_ +* `MySQL `_ +* `MongoDB `_ +* `Redis `_ +* `Yandex DataBase `_ + +``DBContextStorage`` instances can be uniformly constructed using the ``context_storage_factory`` function. +The function's only parameter is a connection string that specifies both the database type +and the connection parameters, for example, *mongodb://admin:pass@localhost:27016/admin*. +(`see the reference <../apiref/dff.context_storages.database.html#dff.context_storages.database.context_storage_factory>`_) + +The GitHub-based distribution of DFF includes Docker images for each of the supported database types. +Therefore, the easiest way to deploy your service together with a database is to clone the GitHub +distribution and to take advantage of the packaged +`docker-compose file `_. + +.. code-block:: shell + :linenos: + + git clone https://github.com/deeppavlov/dialog_flow_framework.git + cd dialog_flow_framework + # assuming we need to deploy mongodb + docker-compose up mongo + +The images can be configured using the docker-compose file or the +`environment file `_, +also available in the distribution. Consult these files for more options. + +.. warning:: + + The data transmission protocols require the data to be JSON-serializable. DFF tackles this problem + through utilization of ``pydantic`` as described in the next section. + +Serialization +~~~~~~~~~~~~~ + +The fact that the ``Context`` class is a Pydantic model makes it easily convertible to other data formats, +such as JSON. For instance, as a developer, you don't need to implement instructions on how datetime fields +need to be marshalled, since this functionality is provided by Pydantic out of the box. +As a result, working with web interfaces and databases that require the transmitted data to be serialized +becomes as easy as calling the `model_dump_json` method: + +.. code-block:: python + + context = Context() + serialized_context = context.model_dump_json() + +Knowing that, you can easily extend DFF to work with storages like Memcache or web APIs of your liking. \ No newline at end of file diff --git a/docs/source/user_guides/superset_guide.rst b/docs/source/user_guides/superset_guide.rst index 5add23bde..c19142055 100644 --- a/docs/source/user_guides/superset_guide.rst +++ b/docs/source/user_guides/superset_guide.rst @@ -31,7 +31,7 @@ Collection procedure git clone https://github.com/deeppavlov/dialog_flow_framework.git # launch the required services cd dialog_flow_framework - docker-compose up otelcol clickhouse dashboard + docker-compose --profile stats up **Collecting data** diff --git a/docs/source/utils/generate_tutorials.py b/docs/source/utils/generate_tutorials.py index 1f74ca1a4..5e8e0e333 100644 --- a/docs/source/utils/generate_tutorials.py +++ b/docs/source/utils/generate_tutorials.py @@ -68,7 +68,16 @@ def sort_tutorial_file_tree(files: Set[Path]) -> List[Path]: :param files: Files list to sort. """ tutorials = {file for file in files if file.stem.split("_")[0].isdigit()} - return sorted(tutorials, key=lambda file: int(file.stem.split("_")[0])) + sorted(files - tutorials) + + def sort_key(tutorial_file_name: Path) -> float: + tutorial_number = float(tutorial_file_name.stem.split("_")[0]) + + # full tutorials should go after tutorials with the same number + if tutorial_file_name.stem.endswith("_full"): + return tutorial_number + 0.5 + return tutorial_number + + return sorted(tutorials, key=sort_key) + sorted(files - tutorials) def iterate_tutorials_dir_generating_links(source: Path, dest: Path, base: str) -> List[Path]: diff --git a/makefile b/makefile index b2941b236..e182bfcdf 100644 --- a/makefile +++ b/makefile @@ -5,7 +5,7 @@ SHELL = /bin/bash PYTHON = python3 VENV_PATH = venv VERSIONING_FILES = setup.py makefile docs/source/conf.py dff/__init__.py -CURRENT_VERSION = 0.6.0 +CURRENT_VERSION = 0.6.1 TEST_COVERAGE_THRESHOLD=95 TEST_ALLOW_SKIP=all # for more info, see tests/conftest.py @@ -50,7 +50,7 @@ lint: venv .PHONY: lint docker_up: - docker-compose up -d + docker-compose --profile context_storage --profile stats up -d .PHONY: docker_up wait_db: docker_up diff --git a/setup.py b/setup.py index 010f0ba89..54c234558 100644 --- a/setup.py +++ b/setup.py @@ -140,13 +140,13 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: ) tutorial_dependencies = [ - "flask[async]==2.3.2", + "flask[async]==3.0.0", "psutil==5.9.5", - "telethon==1.30.0", + "telethon==1.31.0", "fastapi==0.103.1", "uvicorn==0.23.1", "websockets==11.0.2", - "locust==2.16.1", + "locust==2.17.0", "streamlit==1.27.0", "streamlit-chat==0.1.1", ] @@ -160,7 +160,7 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: doc = merge_req_lists( [ "sphinx==7.2.2", - "pydata-sphinx-theme==0.13.3", + "pydata-sphinx-theme==0.14.1", "sphinxcontrib-apidoc==0.4.0", "sphinxcontrib-httpdomain==1.8.0", "sphinxcontrib-katex==0.9.0", @@ -182,7 +182,7 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: ] mypy_dependencies = [ - "mypy==1.5.0", + "mypy==1.6.0", ] devel_full = merge_req_lists( @@ -216,7 +216,7 @@ def merge_req_lists(*req_lists: List[str]) -> List[str]: setup( name="dff", - version="0.6.0", + version="0.6.1", description=description, long_description=long_description, long_description_content_type="text/markdown", diff --git a/tests/stats/test_patch.py b/tests/stats/test_patch.py deleted file mode 100644 index 284ea0b69..000000000 --- a/tests/stats/test_patch.py +++ /dev/null @@ -1,19 +0,0 @@ -import pytest - -try: - from dff import stats # noqa: F401 - from opentelemetry.proto.common.v1.common_pb2 import AnyValue - from opentelemetry.exporter.otlp.proto.grpc.exporter import _translate_value -except ImportError: - pytest.skip(allow_module_level=True, reason="One of the Opentelemetry packages is missing.") - - -@pytest.mark.parametrize( - ["value", "expected_field"], [(1, "int_value"), ({"a": "b"}, "kvlist_value"), (None, "string_value")] -) -def test_body_translation(value, expected_field): - assert _translate_value.__wrapped__.__name__ == "_translate_value" - translated_value = _translate_value(value) - assert isinstance(translated_value, AnyValue) - assert translated_value.IsInitialized() - assert getattr(translated_value, expected_field, None) is not None diff --git a/tutorials/script/core/7_pre_response_processing.py b/tutorials/script/core/7_pre_response_processing.py index d43e9c17d..233d62f75 100644 --- a/tutorials/script/core/7_pre_response_processing.py +++ b/tutorials/script/core/7_pre_response_processing.py @@ -37,13 +37,6 @@ # %% -def add_label_processing(ctx: Context, _: Pipeline, *args, **kwargs) -> Context: - processed_node = ctx.current_node - processed_node.response = Message(text=f"{ctx.last_label}: {processed_node.response.text}") - ctx.overwrite_current_node_in_processing(processed_node) - return ctx - - def add_prefix(prefix): def add_prefix_processing(ctx: Context, _: Pipeline, *args, **kwargs) -> Context: processed_node = ctx.current_node