Skip to content

Commit

Permalink
pgvector
Browse files Browse the repository at this point in the history
  • Loading branch information
TamiTakamiya committed Dec 1, 2024
1 parent 0628df1 commit 7b27ef9
Show file tree
Hide file tree
Showing 6 changed files with 592 additions and 148 deletions.
9 changes: 9 additions & 0 deletions examples/rcsconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,18 @@ llm_providers:
ols_config:
# max_workers: 1
reference_content:
vector_store_type: postgres
# product_docs_index_path: "./vector_db/ocp_product_docs/4.15"
# product_docs_index_id: ocp-product-docs-4_15
# embeddings_model_path: "./embeddings_model"
postgres:
host: localhost
port: 5432
dbname: postgres
user: postgres
password_path: /home/ttakamiy/secrets/postgres.txt
# ssl_mode:
# ca_cert_path:
conversation_cache:
type: memory
memory:
Expand Down
16 changes: 16 additions & 0 deletions ols/app/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,19 +870,35 @@ def __init__(self, **data: Optional[dict]) -> None:
class ReferenceContent(BaseModel):
"""Reference content configuration."""

vector_store_type: Optional[str] = None
product_docs_index_path: Optional[FilePath] = None
product_docs_index_id: Optional[str] = None
embeddings_model_path: Optional[FilePath] = None
postgres: Optional[PostgresConfig] = None

def __init__(self, data: Optional[dict] = None) -> None:
"""Initialize configuration and perform basic validation."""
super().__init__()
if data is None:
return

self.vector_store_type = data.get(
"vector_store_type", constants.VectorStoreType.FAISS
)
valid_vector_store_types = list(constants.VectorStoreType)
if self.vector_store_type not in valid_vector_store_types:
raise InvalidConfigurationError(
f"invalid vector store type: {self.vector_store_type}, supported types are"
f" {valid_vector_store_types}"
)
self.product_docs_index_path = data.get("product_docs_index_path", None)
self.product_docs_index_id = data.get("product_docs_index_id", None)
self.embeddings_model_path = data.get("embeddings_model_path", None)
if (
self.vector_store_type == constants.VectorStoreType.POSTGRES
and "postgres" in data
):
self.postgres = PostgresConfig(**data.get("postgres"))

def __eq__(self, other: object) -> bool:
"""Compare two objects for equality."""
Expand Down
8 changes: 8 additions & 0 deletions ols/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,11 @@ class GenericLLMParameters:

# All supported authentication modules
SUPPORTED_AUTHENTICATION_MODULES = {"k8s", "noop"}


# Vector store types
class VectorStoreType(StrEnum):
"""Supported vector store types."""

FAISS = "faiss"
POSTGRES = "postgres"
82 changes: 61 additions & 21 deletions ols/src/rag_index/index_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Optional

from ols.app.models.config import ReferenceContent
from ols.constants import VectorStoreType

logger = logging.getLogger(__name__)

Expand All @@ -14,28 +15,41 @@
# we load it only when it is required.
# As these dependencies are lazily loaded, we can't use them in type hints.
# So this module is excluded from mypy checks as a whole.
def load_llama_index_deps():
def load_llama_index_deps(vector_store_type: str):
"""Load llama_index dependencies."""
global Settings
global StorageContext
global load_index_from_storage
global EmbedType
global BaseIndex
global resolve_llm
global FaissVectorStore
from llama_index.core import Settings, StorageContext, load_index_from_storage
from llama_index.core.embeddings.utils import EmbedType
from llama_index.core.indices.base import BaseIndex
from llama_index.core.llms.utils import resolve_llm
from llama_index.vector_stores.faiss import FaissVectorStore

if vector_store_type == VectorStoreType.FAISS:
global FaissVectorStore
from llama_index.vector_stores.faiss import FaissVectorStore
elif vector_store_type == VectorStoreType.POSTGRES:
global VectorStoreIndex
global SupabaseVectorStore
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.supabase import SupabaseVectorStore


class IndexLoader:
"""Load index from local file storage."""

def __init__(self, index_config: Optional[ReferenceContent]) -> None:
"""Initialize loader."""
load_llama_index_deps()
self._vector_store_type = (
VectorStoreType.FAISS
if index_config is None
else index_config.vector_store_type
)

load_llama_index_deps(self._vector_store_type)
self._index = None

self._index_config = index_config
Expand Down Expand Up @@ -73,26 +87,52 @@ def _set_context(self) -> None:
Settings.llm = resolve_llm(None)
logger.info("Setting up storage context for index load...")
# pylint: disable=W0201
self._storage_context = StorageContext.from_defaults(
vector_store=FaissVectorStore.from_persist_dir(self._index_path),
persist_dir=self._index_path,
)
if self._vector_store_type == VectorStoreType.FAISS:
self._vector_store = FaissVectorStore.from_persist_dir(self._index_path)
self._storage_context = StorageContext.from_defaults(
vector_store=self._vector_store,
persist_dir=self._index_path,
)
elif self._vector_store_type == VectorStoreType.POSTGRES:
postgres = self._index_config.postgres
user = postgres.user
password = postgres.password
host = postgres.host
port = postgres.port
dbname = postgres.dbname

connection = f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
collection_name = self._index_id.replace("-", "_")

self._vector_store = SupabaseVectorStore(
postgres_connection_string=connection,
collection_name=collection_name,
)
self._storage_context = StorageContext.from_defaults(
vector_store=self._vector_store,
)

def _load_index(self) -> None:
"""Load vector index."""
if self._index_path is None:
logger.warning("Index path is not set.")
else:
try:
self._set_context()
logger.info("Loading vector index...")
self._index = load_index_from_storage(
storage_context=self._storage_context,
index_id=self._index_id,
)
logger.info("Vector index is loaded.")
except Exception as err:
logger.exception(f"Error loading vector index:\n{err}")
if self._vector_store_type == VectorStoreType.FAISS:
if self._index_path is None:
logger.warning("Index path is not set.")
else:
try:
self._set_context()
logger.info("Loading vector index...")
self._index = load_index_from_storage(
storage_context=self._storage_context,
index_id=self._index_id,
)
logger.info("Vector index is loaded.")
except Exception as err:
logger.exception(f"Error loading vector index:\n{err}")
elif self._vector_store_type == VectorStoreType.POSTGRES:
self._set_context()
self._index = VectorStoreIndex.from_vector_store(
vector_store=self._vector_store,
)

@property
def vector_index(self):
Expand Down
Loading

0 comments on commit 7b27ef9

Please sign in to comment.