customize ols

ansible · Oct 29, 2024 · b5c0e2e · b5c0e2e
1 parent a30427f
commit b5c0e2e
Show file tree

Hide file tree

Showing 14 changed files with 55 additions and 48 deletions.
diff --git a/Containerfile b/Containerfile
@@ -1,13 +1,11 @@
 # vim: set filetype=dockerfile
-ARG LIGHTSPEED_RAG_CONTENT_IMAGE=quay.io/openshift-lightspeed/lightspeed-rag-content@sha256:a91aca8224b1405e7c91576374c7bbc766b2009b2ef852895c27069fffc5b06f
+ARG LIGHTSPEED_RAG_CONTENT_IMAGE=quay.io/openshift-lightspeed/lightspeed-rag-content@sha256:24699b4ebe31dfb09ba706e44140db48772b37590a1839e2c9f5de2005c8c385
+ARG RAG_CONTENTS_SUB_FOLDER=vector_db/ocp_product_docs
 
 FROM ${LIGHTSPEED_RAG_CONTENT_IMAGE} as lightspeed-rag-content
 
-FROM registry.redhat.io/ubi9/ubi-minimal:latest
+FROM registry.access.redhat.com/ubi9/ubi-minimal
 
-ARG VERSION
-# todo: this is overriden by the image ubi9/python-311, we hard coded WORKDIR below to /app-root
-# makesure the default value of rag content is set according to APP_ROOT and then update the operator.
 ARG APP_ROOT=/app-root
 
 RUN microdnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs \
@@ -26,7 +24,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 
 WORKDIR /app-root
 
-COPY --from=lightspeed-rag-content /rag/vector_db/ocp_product_docs ./vector_db/ocp_product_docs
+COPY --from=lightspeed-rag-content /rag/${RAG_CONTENTS_SUB_FOLDER} ${APP_ROOT}/${RAG_CONTENTS_SUB_FOLDER}
 COPY --from=lightspeed-rag-content /rag/embeddings_model ./embeddings_model
 
 # Add explicit files and directories
@@ -45,14 +43,7 @@ EXPOSE 8080
 EXPOSE 8443
 CMD ["python3.11", "runner.py"]
 
-LABEL io.k8s.display-name="OpenShift LightSpeed Service" \
-      io.k8s.description="AI-powered OpenShift Assistant Service." \
-      io.openshift.tags="openshift-lightspeed,ols" \
-      description="Red Hat OpenShift Lightspeed Service" \
-      summary="Red Hat OpenShift Lightspeed Service" \
-      com.redhat.component=openshift-lightspeed-service \
-      name=openshift-lightspeed-service \
-      vendor="Red Hat, Inc."
+LABEL vendor="Red Hat, Inc."
 
 
 # no-root user is checked in Konflux

diff --git a/ols/app/endpoints/ols.py b/ols/app/endpoints/ols.py
@@ -27,13 +27,13 @@
     SummarizerResponse,
     UnauthorizedResponse,
 )
+from ols.customize import keywords, prompts
 from ols.src.llms.llm_loader import LLMConfigurationError, resolve_provider_config
 from ols.src.query_helpers.attachment_appender import append_attachments_to_query
 from ols.src.query_helpers.docs_summarizer import DocsSummarizer
 from ols.src.query_helpers.question_validator import QuestionValidator
 from ols.utils import errors_parsing, suid
 from ols.utils.auth_dependency import AuthDependency
-from ols.utils.keywords import KEYWORDS
 from ols.utils.token_handler import PromptTooLongError
 
 logger = logging.getLogger(__name__)
@@ -130,7 +130,7 @@ def conversation_request(
 
     if not valid:
         summarizer_response = SummarizerResponse(
-            constants.INVALID_QUERY_RESP,
+            prompts.INVALID_QUERY_RESP,
             [],
             False,
         )
@@ -496,7 +496,7 @@ def _validate_question_keyword(query: str) -> bool:
     # Current implementation is without any tokenizer method, lemmatization/n-grams.
     # Add valid keywords to keywords.py file.
     query_temp = query.lower()
-    for kw in KEYWORDS:
+    for kw in keywords.KEYWORDS:
         if kw in query_temp:
             return True
     # query_temp = {q_word.lower().strip(".?,") for q_word in query.split()}

diff --git a/ols/app/models/config.py b/ols/app/models/config.py
@@ -892,6 +892,7 @@ class OLSConfig(BaseModel):
 
     extra_ca: list[FilePath] = []
     certificate_directory: Optional[str] = None
+    customize: Optional[str] = None
 
     def __init__(
         self, data: Optional[dict] = None, ignore_missing_certs: bool = False
@@ -901,8 +902,10 @@ def __init__(
         if data is None:
             return
 
-        self.conversation_cache = ConversationCacheConfig(
-            data.get("conversation_cache", None)
+        self.conversation_cache = (
+            ConversationCacheConfig(data.get("conversation_cache"))
+            if data.get("conversation_cache")
+            else None
         )
         self.logging_config = LoggingConfig(**data.get("logging_config", {}))
         if data.get("reference_content") is not None:
@@ -932,6 +935,7 @@ def __init__(
         self.certificate_directory = data.get(
             "certificate_directory", constants.DEFAULT_CERTIFICATE_DIRECTORY
         )
+        self.customize = data.get("customize")
 
     def __eq__(self, other: object) -> bool:
         """Compare two objects for equality."""

diff --git a/ols/constants.py b/ols/constants.py
@@ -18,13 +18,6 @@ class QueryValidationMethod(StrEnum):
 SUBJECT_ALLOWED = "ALLOWED"
 
 
-# Default responses
-INVALID_QUERY_RESP = (
-    "Hi, I'm the OpenShift Lightspeed assistant, I can help you with questions about OpenShift, "
-    "please ask me a question related to OpenShift."
-)
-
-
 # providers
 PROVIDER_BAM = "bam"
 PROVIDER_OPENAI = "openai"

diff --git a/ols/customize/__init__.py b/ols/customize/__init__.py
@@ -0,0 +1,8 @@
+"""Contains customization packages for individual projects (for prompts/keyvords)."""
+
+import importlib
+import os
+
+project = os.getenv("PROJECT", "ols")
+prompts = importlib.import_module(f"ols.customize.{project}.prompts")
+keywords = importlib.import_module(f"ols.customize.{project}.keywords")
diff --git a/ols/customize/ols/__init__.py b/ols/customize/ols/__init__.py
@@ -0,0 +1 @@
+"""Customized prompts/keyvords for OpenShift Lightspeed Service (ols)."""
diff --git a/ols/utils/keywords.py → ols/customize/ols/keywords.py b/ols/utils/keywords.py → ols/customize/ols/keywords.py
diff --git a/ols/src/prompts/prompts.py → ols/customize/ols/prompts.py b/ols/src/prompts/prompts.py → ols/customize/ols/prompts.py
@@ -14,6 +14,12 @@
 # but that is not done as granite was adding role tags like `Human:` in the response.
 # With PromptTemplate, we have more control how we want to structure the prompt.
 
+# Default responses
+INVALID_QUERY_RESP = (
+    "Hi, I'm the OpenShift Lightspeed assistant, I can help you with questions about OpenShift, "
+    "please ask me a question related to OpenShift."
+)
+
 QUERY_SYSTEM_INSTRUCTION = """
 You are OpenShift Lightspeed - an intelligent assistant for question-answering tasks \
 related to the OpenShift container orchestration platform.

diff --git a/ols/src/prompts/prompt_generator.py b/ols/src/prompts/prompt_generator.py
@@ -10,12 +10,7 @@
 )
 
 from ols.constants import ModelFamily
-
-from .prompts import (
-    QUERY_SYSTEM_INSTRUCTION,
-    USE_CONTEXT_INSTRUCTION,
-    USE_HISTORY_INSTRUCTION,
-)
+from ols.customize import prompts
 
 
 def restructure_rag_context_pre(text: str, model: str) -> str:
@@ -52,7 +47,7 @@ def __init__(
         query: str,
         rag_context: list[str] = [],
         history: list[str] = [],
-        system_instruction: str = QUERY_SYSTEM_INSTRUCTION,
+        system_instruction: str = prompts.QUERY_SYSTEM_INSTRUCTION,
     ):
         """Initialize prompt generator."""
         self._query = query
@@ -68,7 +63,9 @@ def _generate_prompt_gpt(self) -> tuple[ChatPromptTemplate, dict]:
 
         if len(self._rag_context) > 0:
             llm_input_values["context"] = "".join(self._rag_context)
-            sys_intruction = sys_intruction + "\n" + USE_CONTEXT_INSTRUCTION.strip()
+            sys_intruction = (
+                sys_intruction + "\n" + prompts.USE_CONTEXT_INSTRUCTION.strip()
+            )
 
         if len(self._history) > 0:
             chat_history = []
@@ -79,7 +76,9 @@ def _generate_prompt_gpt(self) -> tuple[ChatPromptTemplate, dict]:
                     chat_history.append(AIMessage(content=h.removeprefix("ai: ")))
             llm_input_values["chat_history"] = chat_history
 
-            sys_intruction = sys_intruction + "\n" + USE_HISTORY_INSTRUCTION.strip()
+            sys_intruction = (
+                sys_intruction + "\n" + prompts.USE_HISTORY_INSTRUCTION.strip()
+            )
 
         if "context" in llm_input_values:
             sys_intruction = sys_intruction + "\n{context}"
@@ -99,10 +98,14 @@ def _generate_prompt_granite(self) -> tuple[PromptTemplate, dict]:
 
         if len(self._rag_context) > 0:
             llm_input_values["context"] = "".join(self._rag_context)
-            prompt_message = prompt_message + "\n" + USE_CONTEXT_INSTRUCTION.strip()
+            prompt_message = (
+                prompt_message + "\n" + prompts.USE_CONTEXT_INSTRUCTION.strip()
+            )
 
         if len(self._history) > 0:
-            prompt_message = prompt_message + "\n" + USE_HISTORY_INSTRUCTION.strip()
+            prompt_message = (
+                prompt_message + "\n" + prompts.USE_HISTORY_INSTRUCTION.strip()
+            )
             llm_input_values["chat_history"] = "".join(self._history)
 
         if "context" in llm_input_values:

diff --git a/ols/src/query_helpers/docs_summarizer.py b/ols/src/query_helpers/docs_summarizer.py
@@ -11,8 +11,8 @@
 from ols.app.models.config import ProviderConfig
 from ols.app.models.models import SummarizerResponse
 from ols.constants import RAG_CONTENT_LIMIT, GenericLLMParameters
+from ols.customize import prompts
 from ols.src.prompts.prompt_generator import GeneratePrompt
-from ols.src.prompts.prompts import QUERY_SYSTEM_INSTRUCTION
 from ols.src.query_helpers.query_helper import QueryHelper
 from ols.utils.token_handler import TokenHandler
 
@@ -31,7 +31,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
             GenericLLMParameters.MAX_TOKENS_FOR_RESPONSE: model_config.parameters.max_tokens_for_response  # noqa: E501
         }
         # default system prompt fine-tuned for the service
-        self._system_prompt = QUERY_SYSTEM_INSTRUCTION
+        self._system_prompt = prompts.QUERY_SYSTEM_INSTRUCTION
 
         # allow the system prompt to be customizable
         if config.ols_config.system_prompt is not None:

diff --git a/ols/src/query_helpers/question_validator.py b/ols/src/query_helpers/question_validator.py
@@ -9,7 +9,7 @@
 from ols import config
 from ols.app.metrics import TokenMetricUpdater
 from ols.constants import SUBJECT_REJECTED, GenericLLMParameters
-from ols.src.prompts.prompts import QUESTION_VALIDATOR_PROMPT_TEMPLATE
+from ols.customize import prompts
 from ols.src.query_helpers.query_helper import QueryHelper
 from ols.utils.token_handler import TokenHandler
 
@@ -54,7 +54,7 @@ def validate_question(
         logger.info(f"{conversation_id} call settings: {settings_string}")
 
         prompt_instructions = PromptTemplate.from_template(
-            QUESTION_VALIDATOR_PROMPT_TEMPLATE
+            prompts.QUESTION_VALIDATOR_PROMPT_TEMPLATE
         )
 
         bare_llm = self.llm_loader(self.provider, self.model, self.generic_llm_params)

diff --git a/runner.py b/runner.py
@@ -13,7 +13,6 @@
 
 import ols.app.models.config as config_model
 from ols import constants
-from ols.utils.auth_dependency import K8sClientSingleton
 from ols.utils.logging import configure_logging
 
 
@@ -163,8 +162,8 @@ def start_uvicorn():
 
     # Initialize the K8sClientSingleton with cluster id during module load.
     # We want the application to fail early if the cluster ID is not available.
-    cluster_id = K8sClientSingleton.get_cluster_id()
-    logger.info(f"running on cluster with ID '{cluster_id}'")
+    # cluster_id = K8sClientSingleton.get_cluster_id()
+    # logger.info(f"running on cluster with ID '{cluster_id}'")
 
     # init loading of query redactor
     config.query_redactor

diff --git a/tests/integration/test_ols.py b/tests/integration/test_ols.py
@@ -12,6 +12,7 @@
     ProviderConfig,
     QueryFilter,
 )
+from ols.customize import prompts
 from ols.utils import suid
 from ols.utils.errors_parsing import DEFAULT_ERROR_MESSAGE, DEFAULT_STATUS_CODE
 from tests.mock_classes.mock_langchain_interface import mock_langchain_interface
@@ -84,7 +85,7 @@ def test_post_question_on_invalid_question(_setup):
 
         expected_json = {
             "conversation_id": conversation_id,
-            "response": constants.INVALID_QUERY_RESP,
+            "response": prompts.INVALID_QUERY_RESP,
             "referenced_documents": [],
             "truncated": False,
         }

diff --git a/tests/unit/app/endpoints/test_ols.py b/tests/unit/app/endpoints/test_ols.py
@@ -20,6 +20,7 @@
     ReferencedDocument,
     SummarizerResponse,
 )
+from ols.customize import prompts
 from ols.src.llms.llm_loader import LLMConfigurationError
 from ols.utils import suid
 from ols.utils.errors_parsing import DEFAULT_ERROR_MESSAGE
@@ -651,7 +652,7 @@ def test_conversation_request(
     mock_validate_question.return_value = False
     llm_request = LLMRequest(query="Generate a yaml")
     response = ols.conversation_request(llm_request, auth)
-    assert response.response == constants.INVALID_QUERY_RESP
+    assert response.response == prompts.INVALID_QUERY_RESP
     assert suid.check_suid(
         response.conversation_id
     ), "Improper conversation ID returned"
@@ -738,7 +739,7 @@ def test_question_validation_in_conversation_start(auth):
 
     response = ols.conversation_request(llm_request, auth)
 
-    assert response.response.startswith(constants.INVALID_QUERY_RESP)
+    assert response.response.startswith(prompts.INVALID_QUERY_RESP)
 
 
 @pytest.mark.usefixtures("_load_config")
@@ -778,7 +779,7 @@ def test_conversation_request_invalid_subject(mock_validate, auth):
 
     mock_validate.return_value = False
     response = ols.conversation_request(llm_request, auth)
-    assert response.response == constants.INVALID_QUERY_RESP
+    assert response.response == prompts.INVALID_QUERY_RESP
     assert len(response.referenced_documents) == 0
     assert not response.truncated
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Customized prompts/keyvords for OpenShift Lightspeed Service (ols)."""