diff --git a/README.md b/README.md index 6e533f2..93f71f8 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ + # Welcome to Byaldi _Did you know? In the movie RAGatouille, the dish Remy makes is not actually a ratatouille, but a refined version of the dish called "Confit Byaldi"._ @@ -56,23 +57,36 @@ ColPali uses multi-billion parameter models to encode documents. We recommend us Byaldi is largely modeled after RAGatouille, meaning that everything is designed to take the fewest lines of code possible, so you can very quickly build on top of it rather than spending time figuring out how to create a retrieval pipeline. ### Loading a model - Loading a model with `byaldi` is extremely straightforward: +Byaldi as of now supports the new better `vidore/colqwen2-v1.0` checkpoints + ```python3 from byaldi import RAGMultiModalModel -# Optionally, you can specify an `index_root`, which is where it'll save the index. It defaults to ".byaldi/". + +# Load the model. New: "vidore/colqwen2-v1.0" Older: "vidore/colpali-v1.2" RAG = RAGMultiModalModel.from_pretrained("vidore/colqwen2-v1.0") -``` + +# The indexes of the documents are stored in ".byaldi/" by default. +# If you want to store the indexes in a custom Directory (In which you can navigate easily to) use this : +RAG = RAGMultiModalModel.from_pretrained("vidore/colqwen2-v1.0", index_root = "./your_directory" ) + +``` If you've already got an index, and wish to load it along with the model necessary to query it, you can do so just as easily: + ```python3 from byaldi import RAGMultiModalModel -# Optionally, you can specify an `index_root`, which is where it'll look for the index. It defaults to ".byaldi/". + +# Load already present index from default directory : ".byaldi/". RAG = RAGMultiModalModel.from_index("your_index_name") -``` + +# Load already present index from your custom directory. +RAG = RAGMultiModalModel.from_index("your_index_name", index_root = "./your_directory") + +``` ### Creating an index Creating an index with `byaldi` is simple and flexible. **You can index a single PDF file, a single image file, or a directory containing multiple of those**. Here's how to create an index: @@ -131,3 +145,14 @@ RAG.add_to_index("path_to_new_docs", ... ) ``` +### Save the model and processor to a specified directory. + +```python3 +RAG.save_pretrained(directory_path="/your_directory") +``` + +> This function saves both the model and processor components of the current instance to the specified directory, allowing the model to be reloaded later +> from this checkpoint. +> +> However, for complete local setup follow this - +> https://github.com/illuin-tech/colpali/issues/129 diff --git a/byaldi/RAGModel.py b/byaldi/RAGModel.py index 32b66bf..a9a6fd5 100644 --- a/byaldi/RAGModel.py +++ b/byaldi/RAGModel.py @@ -1,6 +1,6 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Union - +import os from PIL import Image from byaldi.colpali import ColPaliModel @@ -179,3 +179,36 @@ def get_doc_ids_to_file_names(self): def as_langchain_retriever(self, **kwargs: Any): return ByaldiLangChainRetriever(model=self, kwargs=kwargs) + + def save_pretrained(self, directory_path: str) -> None: + """ + Save the model and processor to a specified directory. + + Parameters: + directory_path (str): The path to the directory where the model and processor should be saved. + + Returns: + None + + This function saves both the model and processor components of the current instance + to the specified directory, allowing the model to be reloaded later from this checkpoint. + However, for complete local setup follow this - https://github.com/illuin-tech/colpali/issues/129 + + Raises: + FileNotFoundError: If the specified directory does not exist. + PermissionError: If there are insufficient permissions to write to the directory. + Exception: For any other unexpected errors during the save process. + """ + try: + # Create the directory if it does not exist + os.makedirs(directory_path, exist_ok=True) + + # Attempt to save the model and processor + self.model.model.save_pretrained(directory_path) + self.model.processor.save_pretrained(directory_path) + + except PermissionError as perm_error: + raise PermissionError(f"Insufficient permissions to write to '{directory_path}'.") from perm_error + except Exception as e: + raise Exception(f"An unexpected error occurred while saving: {e}") from e + \ No newline at end of file