Update README

postrational · Nov 10, 2024 · f993eb5 · f993eb5
1 parent b56bc77
commit f993eb5
Show file tree

Hide file tree

Showing 7 changed files with 52 additions and 28 deletions.
diff --git a/README.md b/README.md
@@ -1,8 +1,9 @@
-# Ragamuffin - Zotero Chat 🐈
+# Ragamuffin - Chat with your documents 🐈
 
-Ragamuffin is a [RAG][rag]-powered chat agent which can access your [Zotero][zotero] library.
+Ragamuffin is a [RAG][rag]-powered chat agent which can access documents, articles from your [Zotero][zotero] library
+and code from any GitHub repository.
 
-You can ask questions, and the agent will respond using information from documents in your collection.
+You can ask questions, and the agent will respond using information from documents in your library.
 It will also display the sources used to generate each answer.
 
 ![Zotero Chat](screenshot.png)
@@ -20,36 +21,60 @@ Ragamuffin requires Python 3.10 or higher. It's recommended to use a virtual env
 
     $ python3 -m venv venv
     $ source venv/bin/activate
-    (venv) $ pip install git+https://github.com/postrational/ragamuffin.git
+    (venv) $ pip install --upgrade git+https://github.com/postrational/ragamuffin.git
 
 ## Usage
 
-In order to use Ragamuffin, you need to generate a [Zotero API key][zotero-key] and an [OpenAI API key][openai-key].
-Set these as environment variables before running the chat agent. 
+In order to use Ragamuffin, you need to generate an [OpenAI API key][openai-key].
+Please set this as an environment variable before running the `muffin` commands. 
 
-    $ export ZOTERO_LIBRARY_ID=1234567
-    $ export ZOTERO_API_KEY=XXXX........
     $ export OPENAI_API_KEY=sk-proj-XXXX........
 
+### Create a Chat Agent based on a directory of documents
+
+You can generate a RAG index based on a directory of files (e.g. TXT, PDF, EPUB, etc.).
+
+Use the `muffin` command to generate an agent named `my_agent` based on the documents in `/path/to/my/documents/`:
+
+    (venv) $ muffin generate from_files my_agent /path/to/my/documents/
+
+Start the chat agent using the following command:
+
+    (venv) $ muffin chat my_agent
+
 ### Generate a RAG index based on your Zotero library
 
+In order to use Ragamuffin with Zotero, you need to generate a [Zotero API key][zotero-key] and 
+an [OpenAI API key][openai-key]. Set these as environment variables before running `muffin`. 
+
+    $ export OPENAI_API_KEY=sk-proj-XXXX........
+    $ export ZOTERO_LIBRARY_ID=1234567
+    $ export ZOTERO_API_KEY=XXXX........
     (venv) $ muffin generate from_zotero zotero_agent
 
-This will generate a RAG index based on the papers in your Zotero library.
+This will generate a RAG index based on all papers in your Zotero library.
+
+You can also create an agent for a specific collection in your Zotero library using the `--collection` option:
+
+    (venv) $ muffin generate from_zotero zotero_agent --collection "My Collection"
 
 Later, you can chat with Ragamuffin using the `muffin chat` command:
 
     (venv) $ muffin chat zotero_agent
 
-### Create a Chat Agent based on a directory of documents
+### Generate a RAG index based on a Git repository
 
-You can also generate a RAG index based on a directory of files (e.g. PDFs, EPUB, etc.).
+If you want to learn about a specific codebase, you can generate a RAG index based on a GitHub repository.
 
-Use the `muffin` command to generate an agent named `my_agent` based on the documents in `/path/to/my/documents/`:
+    (venv) $ muffin generate from_git my_agent https://github.com/postrational/ragamuffin/
 
-    (venv) $ muffin generate from_files my_agent /path/to/my/documents/
+If you want to use a specific branch, tag or commit, you can use the `--ref` option:
 
-Start the chat agent using the following command:
+    (venv) $ muffin generate from_git poetry https://github.com/python-poetry/poetry --ref 1.8.4
+
+### Chat with the agent
+
+You can chat with the agent using the `muffin chat` command:
 
     (venv) $ muffin chat my_agent
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "ragamuffin"
-version = "0.4.0"
+version = "0.4.1"
 description = ""
 authors = ["Michal Karzynski <[email protected]>"]
 readme = "README.md"

diff --git a/src/ragamuffin/cli/muffin.py b/src/ragamuffin/cli/muffin.py
@@ -39,15 +39,14 @@ def create_agent_from_files(name: str, source_dir: str) -> None:
         source_dir: A directory containing the documents it will know.
     """
     logger.info(f"Creating a new chat agent '{name}' from '{source_dir}'.")
-    storage = get_storage()
 
+    storage = get_storage()
     library = LocalLibrary(library_dir=source_dir)
     reader = library.get_reader()
-
-    logger.info("Generating RAG embeddings...")
     storage.generate_index(name, reader)
 
     logger.info(f"Agent '{name}' created successfully.")
+    logger.info(f"Use this command to chat: muffin chat {name}")
 
 
 @generate.command(name="from_zotero")
@@ -63,9 +62,8 @@ def create_agent_from_zotero(collection: list[str], name: str) -> None:
     lib_id = ensure_string(settings.get("zotero_library_id"))
     api_key = ensure_string(settings.get("zotero_api_key"))
     library = ZoteroLibrary(library_id=lib_id, api_key=api_key, collections=collection)
-    reader = library.get_reader()
 
-    logger.info("Generating RAG embeddings...")
+    reader = library.get_reader()
     storage.generate_index(name, reader)
 
     logger.info(f"Agent '{name}' created successfully.")
@@ -82,10 +80,7 @@ def create_agent_from_git(name: str, repo_url: str, ref: str | None) -> None:
     logger.info("Creating a chat agent from a Git repository...")
 
     library = GitLibrary(git_repo=repo_url, ref=ref)
-
     reader = library.get_reader()
-
-    logger.info("Generating RAG embeddings...")
     storage = get_storage()
     storage.generate_index(name, reader)
 

diff --git a/src/ragamuffin/models/enhancer.py b/src/ragamuffin/models/enhancer.py
@@ -4,7 +4,6 @@
 
 
 class QueryEnhancer:
-
     def __init__(self):
         settings = get_settings()
         llm_model = ensure_string(settings.get("llm_model"))

diff --git a/src/ragamuffin/storage/cassandra.py b/src/ragamuffin/storage/cassandra.py
@@ -44,6 +44,8 @@ def generate_index(self, agent_name: str, reader: BaseReader) -> BaseIndex:
         vector_store = CassandraVectorStore(table=agent_name, embedding_dimension=embed_dim)
         storage_context = StorageContext.from_defaults(vector_store=vector_store)
         configure_llamaindex_embedding_model()
+
+        logger.info("Generating RAG embeddings...")
         index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
         index.storage_context.persist()
         return index

diff --git a/src/ragamuffin/storage/file.py b/src/ragamuffin/storage/file.py
@@ -35,6 +35,7 @@ def generate_index(self, agent_name: str, reader: BaseReader) -> BaseIndex:
         configure_llamaindex_embedding_model()
 
         # Build the index from documents and persist to disk
+        logger.info("Generating RAG embeddings...")
         index = VectorStoreIndex.from_documents(documents)
         index.storage_context.persist(persist_dir=self.get_agent_storage_dir(agent_name))
         return index

diff --git a/src/ragamuffin/webui/gradio_chat.py b/src/ragamuffin/webui/gradio_chat.py
@@ -133,8 +133,9 @@ def generate_sources_html(self, query: str, source_nodes: list[NodeWithScore]) -
             filename_html = f"<a href='{url}' target='_blank'>{name}</a>" if url else f"<b>{name}</b>"
 
             # Append text and metadata to lists
-            if text_node.text:
-                sources_text.append(text_node.text)
+            node_content = text_node.get_content()
+            if node_content:
+                sources_text.append(node_content)
                 nodes_info.append(
                     {
                         "filename_html": filename_html,
@@ -150,8 +151,9 @@ def generate_sources_html(self, query: str, source_nodes: list[NodeWithScore]) -
         # Construct the output using the highlighted texts and metadata
         for highlighted_text, info in zip(highlighted_texts, nodes_info, strict=False):
             source_footer = f"<br>Page {info['page']}" if info["page"] else "<br>"
-            similarity_class = int(min(score * 10, 9))
-            source_footer += f" <span class='badge similarity-{similarity_class}'>{info['score']:.2f}</span>"
+            if info["score"] is not None:
+                similarity_class = int(min(info["score"] * 10, 9))
+                source_footer += f" <span class='badge similarity-{similarity_class}'>{info['score']:.2f}</span>"
             output_html += f"<p><b>{info['filename_html']}</b><br>{highlighted_text}{source_footer}</p>"
 
         return output_html