diff --git a/Containerfile b/Containerfile index 95684fe5..5e3fa8cf 100644 --- a/Containerfile +++ b/Containerfile @@ -1,40 +1,42 @@ -# vim: set filetype=dockerfile -ARG LIGHTSPEED_RAG_CONTENT_IMAGE=quay.io/openshift-lightspeed/lightspeed-rag-content@sha256:a91aca8224b1405e7c91576374c7bbc766b2009b2ef852895c27069fffc5b06f +# # vim: set filetype=dockerfile +# ARG LIGHTSPEED_RAG_CONTENT_IMAGE=quay.io/ttakamiy/aap-rag-content:latest -FROM ${LIGHTSPEED_RAG_CONTENT_IMAGE} as lightspeed-rag-content +# FROM ${LIGHTSPEED_RAG_CONTENT_IMAGE} as lightspeed-rag-content -FROM registry.redhat.io/ubi9/ubi-minimal:latest +# FROM registry.access.redhat.com/ubi9/ubi-minimal -ARG VERSION -# todo: this is overriden by the image ubi9/python-311, we hard coded WORKDIR below to /app-root -# makesure the default value of rag content is set according to APP_ROOT and then update the operator. -ARG APP_ROOT=/app-root - -RUN microdnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs \ - python3.11 python3.11-devel python3.11-pip +# ARG APP_ROOT=/app-root -# PYTHONDONTWRITEBYTECODE 1 : disable the generation of .pyc -# PYTHONUNBUFFERED 1 : force the stdout and stderr streams to be unbuffered -# PYTHONCOERCECLOCALE 0, PYTHONUTF8 1 : skip legacy locales and use UTF-8 mode -ENV PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONCOERCECLOCALE=0 \ - PYTHONUTF8=1 \ - PYTHONIOENCODING=UTF-8 \ - LANG=en_US.UTF-8 \ - PIP_NO_CACHE_DIR=off +# RUN microdnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs \ +# python3.11 python3.11-devel python3.11-pip shadow-utils \ +# && microdnf clean all --enablerepo='*' -WORKDIR /app-root +# # PYTHONDONTWRITEBYTECODE 1 : disable the generation of .pyc +# # PYTHONUNBUFFERED 1 : force the stdout and stderr streams to be unbuffered +# # PYTHONCOERCECLOCALE 0, PYTHONUTF8 1 : skip legacy locales and use UTF-8 mode +# ENV PYTHONDONTWRITEBYTECODE=1 \ +# PYTHONUNBUFFERED=1 \ +# PYTHONCOERCECLOCALE=0 \ +# PYTHONUTF8=1 \ +# PYTHONIOENCODING=UTF-8 \ +# LANG=en_US.UTF-8 \ +# PIP_NO_CACHE_DIR=off -COPY --from=lightspeed-rag-content /rag/vector_db/ocp_product_docs ./vector_db/ocp_product_docs -COPY --from=lightspeed-rag-content /rag/embeddings_model ./embeddings_model +# WORKDIR ${APP_ROOT} -# Add explicit files and directories -# (avoid accidental inclusion of local directories or env files or credentials) -COPY runner.py requirements.txt ./ +# COPY --from=lightspeed-rag-content /rag/vector_db/aap_product_docs ./vector_db/aap_product_docs +# COPY --from=lightspeed-rag-content /rag/embeddings_model ./embeddings_model -RUN pip3.11 install --no-cache-dir -r requirements.txt +# # Add explicit files and directories +# # (avoid accidental inclusion of local directories or env files or credentials) +# COPY pyproject.toml pdm.lock runner.py ./ +# RUN pip3.11 install --no-cache-dir --upgrade pip pdm==2.18.1 \ +# && pdm config python.use_venv false \ +# && pdm sync --global --prod -p ${APP_ROOT} +FROM quay.io/ansible/ansible-chatbot-service:base +ARG APP_ROOT=/app-root +WORKDIR ${APP_ROOT} COPY ols ./ols # this directory is checked by ecosystem-cert-preflight-checks task in Konflux @@ -55,5 +57,5 @@ LABEL io.k8s.display-name="OpenShift LightSpeed Service" \ vendor="Red Hat, Inc." -# no-root user is checked in Konflux +# no-root user is checked in Konflux USER 1001 diff --git a/ols/app/endpoints/ols.py b/ols/app/endpoints/ols.py index ef1a76ca..0162ff4c 100644 --- a/ols/app/endpoints/ols.py +++ b/ols/app/endpoints/ols.py @@ -33,9 +33,19 @@ from ols.src.query_helpers.question_validator import QuestionValidator from ols.utils import errors_parsing, suid from ols.utils.auth_dependency import AuthDependency -from ols.utils.keywords import KEYWORDS from ols.utils.token_handler import PromptTooLongError +import importlib +customize_package = 'ols.utils.keywords' +if config.ols_config.customize: + keywords = importlib.import_module(f"{config.ols_config.customize}.keywords") + prompts = importlib.import_module(f"{config.ols_config.customize}.prompts") + print(f'customized: {prompts.INVALID_QUERY_RESP}') +else: + keywords = importlib.import_module('ols.utils.keywords') + prompts = importlib.import_module('ols.src.prompts.prompts') + print(f'NOT-customized: {prompts.INVALID_QUERY_RESP}') + logger = logging.getLogger(__name__) router = APIRouter(tags=["query"]) @@ -130,7 +140,7 @@ def conversation_request( if not valid: summarizer_response = SummarizerResponse( - constants.INVALID_QUERY_RESP, + prompts.INVALID_QUERY_RESP, [], False, ) @@ -496,7 +506,7 @@ def _validate_question_keyword(query: str) -> bool: # Current implementation is without any tokenizer method, lemmatization/n-grams. # Add valid keywords to keywords.py file. query_temp = query.lower() - for kw in KEYWORDS: + for kw in keywords.KEYWORDS: if kw in query_temp: return True # query_temp = {q_word.lower().strip(".?,") for q_word in query.split()} diff --git a/ols/app/models/config.py b/ols/app/models/config.py index d65d9c30..9029706e 100644 --- a/ols/app/models/config.py +++ b/ols/app/models/config.py @@ -892,6 +892,7 @@ class OLSConfig(BaseModel): extra_ca: list[FilePath] = [] certificate_directory: Optional[str] = None + customize: Optional[str] = None def __init__( self, data: Optional[dict] = None, ignore_missing_certs: bool = False @@ -902,8 +903,8 @@ def __init__( return self.conversation_cache = ConversationCacheConfig( - data.get("conversation_cache", None) - ) + data.get("conversation_cache") + ) if data.get("conversation_cache") else None self.logging_config = LoggingConfig(**data.get("logging_config", {})) if data.get("reference_content") is not None: self.reference_content = ReferenceContent(data.get("reference_content")) @@ -932,6 +933,7 @@ def __init__( self.certificate_directory = data.get( "certificate_directory", constants.DEFAULT_CERTIFICATE_DIRECTORY ) + self.customize = data.get('customize') def __eq__(self, other: object) -> bool: """Compare two objects for equality.""" diff --git a/ols/constants.py b/ols/constants.py index 4780826a..b475de5c 100644 --- a/ols/constants.py +++ b/ols/constants.py @@ -18,13 +18,6 @@ class QueryValidationMethod(StrEnum): SUBJECT_ALLOWED = "ALLOWED" -# Default responses -INVALID_QUERY_RESP = ( - "Hi, I'm the OpenShift Lightspeed assistant, I can help you with questions about OpenShift, " - "please ask me a question related to OpenShift." -) - - # providers PROVIDER_BAM = "bam" PROVIDER_OPENAI = "openai" diff --git a/ols/customize/__init__.py b/ols/customize/__init__.py new file mode 100644 index 00000000..71247eb8 --- /dev/null +++ b/ols/customize/__init__.py @@ -0,0 +1,7 @@ +"""OpenShift Lightspeed service.""" + +from ols.utils.config import config + +# make config submodule easily importable by using +# from ols import config +__all__ = ["config"] diff --git a/ols/customize/keywords.py b/ols/customize/keywords.py new file mode 100644 index 00000000..c050b946 --- /dev/null +++ b/ols/customize/keywords.py @@ -0,0 +1,92 @@ +"""Constant for set of keywords.""" + +# Add keyword string to below set, preferably in alphabetical order. +# We are adding this manually for now. Add to a txt file, If/when we automate this. +# Important: Please use lower case. + +KEYWORDS = { + "aap", + "access", + "account", + "administrator", + "ansible", + "application", + "associated", + "authentication", + "authenticator", + "automatically", + "automation", + "backup", + "capacity", + "certificate", + "client", + "cluster", + "collection", + "command", + "configuration", + "connection", + "container", + "content", + "controller", + "credential", + "deployment", + "directory", + "documentation", + "enterprise", + "environment", + "event-driven", + "execution", + "group", + "hosts", + "information", + "install", + "instance", + "inventory", + "jobs", + "kubernetes", + "ldap", + "license", + "linux", + "log", + "management", + "mesh", + "namespace", + "navigation", + "navigator", + "node", + "nodes", + "number", + "oauth2", + "openshift", + "operator", + "option", + "organization", + "password", + "permission", + "platform", + "playbook", + "playbooks", + "pod", + "podman", + "postgresql", + "project", + "repository", + "resource", + "roles", + "rulebook", + "secret", + "security", + "server", + "service", + "ssh", + "subscription", + "system", + "template", + "token", + "username", + "variable", + "vault", + "version", + "workflow", + "yaml", +} diff --git a/ols/customize/prompts.py b/ols/customize/prompts.py new file mode 100644 index 00000000..eb95f8b7 --- /dev/null +++ b/ols/customize/prompts.py @@ -0,0 +1,80 @@ +# There is no need for enforcing line length in this file, +# as these are mostly special purpose constants. +# ruff: noqa: E501 +"""Prompt templates/constants.""" + +from ols.constants import SUBJECT_ALLOWED, SUBJECT_REJECTED + +# TODO: OLS-503 Fine tune system prompt + +# Note:: +# Right now templates are somewhat alligned to make granite work better. +# GPT still works well with this. Ideally we should have model specific tags. +# For history we can laverage ChatPromptTemplate from langchain, +# but that is not done as granite was adding role tags like `Human:` in the response. +# With PromptTemplate, we have more control how we want to structure the prompt. + +# Default responses +INVALID_QUERY_RESP = ( + "Hi, I'm the Ansible Lightspeed assistant, I can help you with questions about Ansible, " + "please ask me a question related to Ansible." +) + +QUERY_SYSTEM_INSTRUCTION = """ +You are Ansible Lightspeed - an intelligent assistant for question-answering tasks \ +related to the Ansible container orchestration platform. + +Here are your instructions: +You are Ansible Lightspeed, an intelligent assistant and expert on all things Ansible. \ +Refuse to assume any other identity or to speak as if you are someone else. +If the context of the question is not clear, consider it to be Ansible. +Never include URLs in your replies. +Refuse to answer questions or execute commands not about Ansible. +Do not mention your last update. You have the most recent information on Ansible. + +Here are some basic facts about Ansible: +- The latest version of Ansible is 2.12.3. +- Ansible is an open source IT automation engine that automates provisioning, \ + configuration management, application deployment, orchestration, and many other \ + IT processes. It is free to use, and the project benefits from the experience and \ + intelligence of its thousands of contributors. +""" + +USE_CONTEXT_INSTRUCTION = """ +Use the retrieved document to answer the question. +""" + +USE_HISTORY_INSTRUCTION = """ +Use the previous chat history to interact and help the user. +""" + +# {{query}} is escaped because it will be replaced as a parameter at time of use +QUESTION_VALIDATOR_PROMPT_TEMPLATE = f""" +Instructions: +- You are a question classifying tool +- You are an expert in ansible +- Your job is to determine where or a user's question is related to ansible technologies and to provide a one-word response +- If a question appears to be related to ansible technologies, answer with the word {SUBJECT_ALLOWED}, otherwise answer with the word {SUBJECT_REJECTED} +- Do not explain your answer, just provide the one-word response + + +Example Question: +Why is the sky blue? +Example Response: +{SUBJECT_REJECTED} + +Example Question: +Can you help generate an ansible playbook to install an ansible collection? +Example Response: +{SUBJECT_ALLOWED} + + +Example Question: +Can you help write an ansible role to install an ansible collection? +Example Response: +{SUBJECT_ALLOWED} + +Question: +{{query}} +Response: +""" diff --git a/ols/src/prompts/prompt_generator.py b/ols/src/prompts/prompt_generator.py index 2e4deec8..ca62362e 100644 --- a/ols/src/prompts/prompt_generator.py +++ b/ols/src/prompts/prompt_generator.py @@ -9,14 +9,16 @@ SystemMessagePromptTemplate, ) +from ols import config from ols.constants import ModelFamily -from .prompts import ( - QUERY_SYSTEM_INSTRUCTION, - USE_CONTEXT_INSTRUCTION, - USE_HISTORY_INSTRUCTION, -) - +import importlib +customize_package = 'ols.src.prompts.prompts' +if config.ols_config.customize: + customize_package = f"{config.ols_config.customize}.prompts" + print(f'customized: package={customize_package}') +customize = importlib.import_module(customize_package) +print(f'QUERY_SYSTEM_INSTRUCTION: {customize.QUERY_SYSTEM_INSTRUCTION}') def restructure_rag_context_pre(text: str, model: str) -> str: """Restructure rag text - pre truncation.""" @@ -52,13 +54,14 @@ def __init__( query: str, rag_context: list[str] = [], history: list[str] = [], - system_instruction: str = QUERY_SYSTEM_INSTRUCTION, + system_instruction: str = customize.QUERY_SYSTEM_INSTRUCTION, ): """Initialize prompt generator.""" self._query = query self._rag_context = rag_context self._history = history self._sys_instruction = system_instruction + print("system_instruction: {system_instruction}") def _generate_prompt_gpt(self) -> tuple[ChatPromptTemplate, dict]: """Generate prompt for GPT.""" @@ -68,7 +71,7 @@ def _generate_prompt_gpt(self) -> tuple[ChatPromptTemplate, dict]: if len(self._rag_context) > 0: llm_input_values["context"] = "".join(self._rag_context) - sys_intruction = sys_intruction + "\n" + USE_CONTEXT_INSTRUCTION.strip() + sys_intruction = sys_intruction + "\n" + customize.USE_CONTEXT_INSTRUCTION.strip() if len(self._history) > 0: chat_history = [] @@ -79,7 +82,7 @@ def _generate_prompt_gpt(self) -> tuple[ChatPromptTemplate, dict]: chat_history.append(AIMessage(content=h.removeprefix("ai: "))) llm_input_values["chat_history"] = chat_history - sys_intruction = sys_intruction + "\n" + USE_HISTORY_INSTRUCTION.strip() + sys_intruction = sys_intruction + "\n" + customize.USE_HISTORY_INSTRUCTION.strip() if "context" in llm_input_values: sys_intruction = sys_intruction + "\n{context}" @@ -99,10 +102,10 @@ def _generate_prompt_granite(self) -> tuple[PromptTemplate, dict]: if len(self._rag_context) > 0: llm_input_values["context"] = "".join(self._rag_context) - prompt_message = prompt_message + "\n" + USE_CONTEXT_INSTRUCTION.strip() + prompt_message = prompt_message + "\n" + customize.USE_CONTEXT_INSTRUCTION.strip() if len(self._history) > 0: - prompt_message = prompt_message + "\n" + USE_HISTORY_INSTRUCTION.strip() + prompt_message = prompt_message + "\n" + customize.USE_HISTORY_INSTRUCTION.strip() llm_input_values["chat_history"] = "".join(self._history) if "context" in llm_input_values: diff --git a/ols/src/prompts/prompts.py b/ols/src/prompts/prompts.py index 09b65956..46d789e9 100644 --- a/ols/src/prompts/prompts.py +++ b/ols/src/prompts/prompts.py @@ -31,6 +31,12 @@ - OpenShift is a distribution of Kubernetes. Everything Kubernetes can do, OpenShift can do and more. """ +# Default responses +INVALID_QUERY_RESP = ( + "Hi, I'm the OpenShift Lightspeed assistant, I can help you with questions about OpenShift, " + "please ask me a question related to OpenShift." +) + USE_CONTEXT_INSTRUCTION = """ Use the retrieved document to answer the question. """ diff --git a/runner.py b/runner.py index 73a6bc19..52151846 100644 --- a/runner.py +++ b/runner.py @@ -163,8 +163,8 @@ def start_uvicorn(): # Initialize the K8sClientSingleton with cluster id during module load. # We want the application to fail early if the cluster ID is not available. - cluster_id = K8sClientSingleton.get_cluster_id() - logger.info(f"running on cluster with ID '{cluster_id}'") + # cluster_id = K8sClientSingleton.get_cluster_id() + # logger.info(f"running on cluster with ID '{cluster_id}'") # init loading of query redactor config.query_redactor