deeppavlov · pseusys · Nov 21, 2023 · Nov 23, 2023 · Nov 23, 2023 · Nov 23, 2023
diff --git a/.gitignore b/.gitignore
@@ -32,3 +32,4 @@ dbs
 benchmarks
 benchmark_results_files.json
 uploaded_benchmarks
+**/.env
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -7,6 +7,7 @@ include dff/context_storages/protocols.json
 exclude makefile
 
 recursive-exclude tests *
+recursive-exclude examples *
 recursive-exclude tutorials *
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]

diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,10 @@
+# DFF examples
+
+This repository contains examples of bots build using [DFF](https://github.com/deeppavlov/dialog_flow_framework) (Dialog Flow Framework).
+
+The Dialog Flow Framework (DFF) allows you to write conversational services. The service is written by defining a
+special dialog graph that describes the behavior of the dialog service. The dialog graph contains the dialog script.
+DFF offers a specialized language (DSL) for quickly writing dialog graphs.
+You can use it in services such as writing skills for Amazon Alexa, etc., chatbots for social networks, website call centers, etc.
+
+In this repository, two bots are presented as examples: faq bot and customer service bot. Both bots use Telegram as an interface.
diff --git a/examples/customer_service_bot/.env.example b/examples/customer_service_bot/.env.example
@@ -0,0 +1,2 @@
+TG_BOT_TOKEN=bot_token
+OPENAI_API_TOKEN=openai_api_token
diff --git a/examples/customer_service_bot/README.md b/examples/customer_service_bot/README.md
@@ -0,0 +1,64 @@
+## Description
+
+### Customer service bot
+
+Customer service bot built on `DFF`. Uses telegram as an interface.
+This bot is designed to answer any type of user questions in a limited business domain (book shop).
+
+* [DeepPavlov Intent Catcher](#) force is used for intent retrieval.
+* [ChatGPT](https://openai.com/pricing#language-models) is used for context based question answering.
+
+### Intent Catcher
+
+Intent catcher is a DistilBERT-based classifier for user intent classes.
+We use DeepPavlov library for seamless training and inference.
+Sample code for training the model can be found in `Training_intent_catcher.ipynb`.
+The model is deployed as a separate microservice running at port 4999.
+
+Service bot interacts with the container via `/respond` endpoint.
+The API expects a json object with the dialog history passed as an array and labeled 'dialog_contexts'. Intents will be extracted from the last utterance.
+
+```json
+{
+    "dialog_contexts": ["phrase_1", "phrase_2"]
+}
+```
+
+The API responds with a nested array containing `label - score` pairs.
+
+```json
+[["no",0.3393537402153015]]
-[["no",0.3393537402153015]]
+[["no", 0.3393537402153015]]
-[["no",0.3393537402153015]]
+[["no", 0.3393537402153015]]
+```
+
+Run the intent catcher:
+```commandline
+docker compose up --build --abort-on-container-exit --exit-code-from intent_client
+```
+
+## Run the bot
+
+### Run with Docker & Docker-Compose environment
+In order for the bot to work, set the bot token via [.env](.env.example). You should start by creating your own `.env` file:
+```
+echo TG_BOT_TOKEN=*** >> .env
+echo OPENAI_API_TOKEN=*** >> .env
+```
+
+Build the bot:
+```commandline
+docker-compose build
+```
+Testing the bot:
+```commandline
+docker-compose run assistant pytest test.py
+```
+
+Running the bot:
+```commandline
+docker-compose run assistant python run.py
+```
+
+Running in background
+```commandline
+docker-compose up -d
+```
diff --git a/examples/customer_service_bot/Training_intent_catcher.ipynb b/examples/customer_service_bot/Training_intent_catcher.ipynb
@@ -0,0 +1,184 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "mount_file_id": "1K8dSq-mrFOR44N6CwDp8WiqVDHdtDHJQ",
+      "authorship_tag": "ABX9TyP5keJL46m+Vgb5Qj+tw1SA",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "gpuClass": "standard"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/ruthenian8/forhse2/blob/master/Training_intent_catcher.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install --upgrade pip"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "rY6UASGLpmt6",
+        "outputId": "f44435da-8658-43f2-f56d-e8987663663c"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+            "Requirement already satisfied: pip in /usr/local/lib/python3.10/dist-packages (23.1.2)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install deeppavlov"
+      ],
+      "metadata": {
+        "id": "RboxW9XRp57X"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y\n",
+        "# The required version of 'tokenizers' library depends on a Rust compiler."
+      ],
+      "metadata": {
+        "id": "BfpE0tExLbN2"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!export PATH=\"/$HOME/.cargo/bin:${PATH}\" && pip install 'tokenizers==0.10.3'\n",
+        "# Before installing 'tokenizers', we ensure system-wide Rust compiler availability."
+      ],
+      "metadata": {
+        "id": "aDJWGvk0tU1-"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Fl7obdeKFomg"
+      },
+      "outputs": [],
+      "source": [
+        "!git clone https://github.com/deeppavlov/dream.git"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install 'xeger==0.3.5'\n",
+        "!pip install 'transformers==4.6.0'"
+      ],
+      "metadata": {
+        "id": "Gl9xIpKFqiLs"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# In order to train the model with custom classes, we need to modify the 'intent_phrases.json' file.\n",
+        "# Each intent in the json structure includes a 'phrases' section.\n",
+        "# Regular expressions from that section will be used to generate the data used during training.\n",
+        "import json\n",
+        "INTENT_PHRASES = './dream/annotators/IntentCatcherTransformers/intent_phrases.json'\n",
+        "\n",
+        "with open(INTENT_PHRASES, 'r') as file:\n",
+        "    intents = json.load(file)\n",
+        "\n",
+        "intents['purchase'] = {\n",
+        "    \"phrases\": [\n",
+        "        \"i think i'll ((order)|(purchase)|(buy)) a book\",\n",
+        "        \"i plan on ((buying)|(purchasing)|(ordering)) a book\",\n",
+        "        \"i would ((love)|(like)) to ((order)|(purchase)|(buy)) a book\",\n",
+        "        \"i'm interested in ((buying)|(purchasing)|(ordering)) a book\",\n",
+        "        \"do you have this book in stock\",\n",
+        "        \"i'm looking to ((order)|(purchase)|(buy)) a book\",\n",
+        "        \"add this to my cart\",\n",
+        "        \"i want to make an order\"\n",
+        "    ],\n",
+        "    \"reg_phrases\": [\n",
+        "        \"i want to buy a book\",\n",
+        "        \"order an item\",\n",
+        "        \"order a book\"\n",
+        "    ],\n",
+        "    \"min_precision\": 0.94,\n",
+        "    \"punctuation\": [\n",
+        "        \".\",\n",
+        "        \"?\"\n",
+        "    ]\n",
+        "}\n",
+        "\n",
+        "with open(INTENT_PHRASES, 'w') as file:\n",
+        "    json.dump(itents, file)"
+      ],
+      "metadata": {
+        "id": "d26Ko8xFF6sH"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!cd /content/dream/annotators/IntentCatcherTransformers/ && export CUDA_VISIBLE_DEVICES=0 && python -m deeppavlov train intents_model_dp_config.json\n",
+        "# CUDA_VISIBLE_DEVICES variable is required for GPU-powered training with DeepPavlov."
+      ],
+      "metadata": {
+        "id": "lOmGOt6Wllly"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!cp /root/.deeppavlov/models/classifiers/intents_model_v2/model.pth.tar /content/drive/MyDrive/\n",
+        "!cp /root/.deeppavlov/models/classifiers/intents_model_v2/classes.dict /content/drive/MyDrive/\n",
+        "# Weights and metadata produced during training can be copied to mounted Google drive."
+      ],
+      "metadata": {
+        "id": "YUeJ67-CeuX5"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/examples/customer_service_bot/bot/api/__init__.py b/examples/customer_service_bot/bot/api/__init__.py
diff --git a/examples/customer_service_bot/bot/api/chatgpt.py b/examples/customer_service_bot/bot/api/chatgpt.py
@@ -0,0 +1,72 @@
+"""
+ChatGPT
+-------
+This module defines functions for OpenAI API interaction.
+"""
+import os
+import openai
+
+CHATGPT_MAIN_PROMPT = """
+You are a helpful assistant for a book shop "Book Lovers Paradise".
+Located at 123 Main Street.
+Open seven days a week, from 9 AM to 9 PM.
+Extensive collection of genres, including fiction, and non-fiction.
+Knowledgeable staff. Online catalogue for easy browsing and ordering.
+Comfortable seating areas and peaceful atmosphere.
+Refund policy within 30 days of purchase.
+Loyalty program for frequent customers (10% off purchases).
+"""  # shortened the prompt to reduce token consumption.
+
+CHATGPT_QUESTION_PROMPT = """
+What follows is a user query: answer if related to the given description or deny if unrelated.
+"""
+
+CHATGPT_COHERENCE_PROMPT = """
+What follows is a question and an answer. Just write 'true' if the answer was satisfactory or 'false' otherwise.
+"""
+
+openai.api_key = os.getenv("OPENAI_API_TOKEN")
+
+
+def get_output_factory():
+    """
+    Construct a get_output function encapsulating the execution counter.
+    The function prompts ChatGPT for generated output.
+    The main prompt is only included
+    on the first invocation of the function.
+    """
+
+    def get_output_inner(request: str) -> str:
+        messages = [
+            {"role": "system", "content": CHATGPT_MAIN_PROMPT},
+            {"role": "system", "content": CHATGPT_QUESTION_PROMPT},
+            {"role": "user", "content": request},
+        ]  # temporary fix until a better solution is found
+        get_output_inner.num_calls += 1
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=messages,
+        )
+        return response["choices"][0]["message"]["content"]
+
+    get_output_inner.num_calls = 0
+    return get_output_inner
+
+
+def get_coherence(request: str, response: str) -> str:
+    """
+    Prompt ChatGPT to evaluate the coherence of a request
+    response pair.
+    """
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": CHATGPT_COHERENCE_PROMPT},
+            {"role": "user", "content": request},
+            {"role": "assistant", "content": response},
+        ],
+    )
+    return response["choices"][0]["message"]["content"]
+
+
+get_output = get_output_factory()
diff --git a/examples/customer_service_bot/bot/api/intent_catcher.py b/examples/customer_service_bot/bot/api/intent_catcher.py
@@ -0,0 +1,27 @@
+"""
+Intent Catcher
+----
+This module includes queries to a local intent catcher service.
+"""
+import requests
+from dff.script import Message
+
+
+INTENT_CATCHER_SERVICE = "http://localhost:4999/respond"
+
+
+def get_intents(request: Message):
+    """
+    Query the local intent catcher service extracting intents from the
+    last user utterance.
+    """
+    if not request.text:
+        return []
+    request_body = {"dialog_contexts": [request.text]}
+    try:
+        response = requests.post(INTENT_CATCHER_SERVICE, json=request_body)
+    except requests.RequestException:
+        response = None
+    if response and response.status_code == 200:
+        return [response.json()[0][0]]
+    return []
diff --git a/examples/customer_service_bot/bot/dialog_graph/__init__.py b/examples/customer_service_bot/bot/dialog_graph/__init__.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		TG_BOT_TOKEN=bot_token
RLKRo marked this conversation as resolved. Show resolved Hide resolved
		OPENAI_API_TOKEN=openai_api_token