-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f993eb5
commit 504f41a
Showing
13 changed files
with
255 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" | |
|
||
[tool.poetry] | ||
name = "ragamuffin" | ||
version = "0.4.1" | ||
version = "0.4.2" | ||
description = "" | ||
authors = ["Michal Karzynski <[email protected]>"] | ||
readme = "README.md" | ||
|
@@ -43,6 +43,7 @@ types-python-dateutil = "^2.9.0.20241003" | |
types-redis = "^4.6.0.20241004" | ||
types-requests = "^2.32.0.20241016" | ||
types-tabulate = "^0.9.0.20240106" | ||
pytest = "^8.3.3" | ||
|
||
[tool.mypy] | ||
ignore_missing_imports = true | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import logging | ||
from pathlib import Path | ||
|
||
from click.testing import CliRunner | ||
|
||
from ragamuffin.cli.muffin import cli | ||
from ragamuffin.storage.utils import get_storage | ||
from tests.utils import env_vars | ||
|
||
|
||
@env_vars( | ||
RAGAMUFFIN_STORAGE_TYPE="file", | ||
RAGAMUFFIN_EMBEDDING_DIMENSION="312", | ||
RAGAMUFFIN_EMBEDDING_MODEL="huggingface.co/huawei-noah/TinyBERT_General_4L_312D", | ||
) | ||
def test_muffin_cli(caplog): | ||
caplog.set_level(logging.INFO, logger="ragamuffin") | ||
runner = CliRunner() | ||
|
||
agent_name = "test_agent" | ||
test_data_path = Path(__file__).parent / "data" / "udhr" | ||
|
||
result = runner.invoke(cli, ["generate", "from_files", agent_name, str(test_data_path)]) | ||
assert result.exit_code == 0 | ||
assert agent_name in get_storage().list_agents() | ||
|
||
result = runner.invoke(cli, ["delete", agent_name]) | ||
assert result.exit_code == 0 | ||
assert agent_name not in get_storage().list_agents() | ||
|
||
result = runner.invoke(cli, ["generate", "from_git", agent_name, "https://github.com/postrational/ragamuffin/"]) | ||
assert result.exit_code == 0 | ||
assert agent_name in get_storage().list_agents() | ||
|
||
caplog.clear() | ||
result = runner.invoke(cli, ["agents"]) | ||
assert result.exit_code == 0 | ||
assert agent_name in caplog.text | ||
|
||
result = runner.invoke(cli, ["delete", agent_name]) | ||
assert result.exit_code == 0 | ||
assert agent_name not in get_storage().list_agents() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from pathlib import Path | ||
|
||
from llama_index.core import Document, SimpleDirectoryReader | ||
|
||
from ragamuffin.libraries.files import LocalLibrary | ||
from tests.utils import seed | ||
|
||
|
||
@seed(42) | ||
def test_local_directory(): | ||
test_data_path = Path(__file__).parent / "data" / "udhr" | ||
library = LocalLibrary(str(test_data_path)) | ||
reader = library.get_reader() | ||
|
||
assert isinstance(reader, SimpleDirectoryReader) | ||
assert len(reader.list_resources()) == 2 | ||
|
||
data = reader.load_data() | ||
assert len(data) == 9 | ||
|
||
document = data[0] | ||
assert isinstance(document, Document) | ||
assert document.metadata["file_name"] == "udhr-en.pdf" | ||
assert document.metadata["file_type"] == "application/pdf" | ||
assert "progress and better standards of life in larger freedom" in document.text | ||
|
||
|
||
def test_local_file(): | ||
test_data_path = Path(__file__).parent / "data" / "udhr" / "udhr-en.pdf" | ||
library = LocalLibrary(str(test_data_path)) | ||
reader = library.get_reader() | ||
|
||
assert isinstance(reader, SimpleDirectoryReader) | ||
assert len(reader.list_resources()) == 1 | ||
|
||
data = reader.load_data() | ||
assert len(data) == 8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from pathlib import Path | ||
|
||
from ragamuffin.libraries.files import LocalLibrary | ||
from ragamuffin.storage.file import FileStorage | ||
from tests.utils import env_vars, seed | ||
|
||
|
||
@seed(42) | ||
def test_file_storage_create_agent(): | ||
test_data_path = Path(__file__).parent / "data" / "udhr" | ||
library = LocalLibrary(str(test_data_path)) | ||
reader = library.get_reader() | ||
|
||
storage = FileStorage() | ||
agent_name = "test_agent" | ||
with env_vars( | ||
RAGAMUFFIN_EMBEDDING_DIMENSION="312", | ||
RAGAMUFFIN_EMBEDDING_MODEL="huggingface.co/huawei-noah/TinyBERT_General_4L_312D", | ||
): | ||
storage.generate_index(agent_name, reader=reader) | ||
|
||
assert agent_name in storage.list_agents() | ||
|
||
index = storage.load_index(agent_name) | ||
ingested_doc_metadata = list(index.ref_doc_info.values())[0].metadata | ||
assert ingested_doc_metadata["file_name"] == "udhr-en.pdf" | ||
assert ingested_doc_metadata["file_type"] == "application/pdf" | ||
assert ingested_doc_metadata["page_label"] == "1" | ||
|
||
storage.delete_agent(agent_name) | ||
assert agent_name not in storage.list_agents() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import os | ||
import random | ||
from collections.abc import Callable | ||
from contextlib import contextmanager | ||
from functools import wraps | ||
from typing import Any | ||
|
||
import numpy as np | ||
import torch | ||
|
||
|
||
def seed(seed_value: int): | ||
"""Decorator to set RNG seed values for reproducibility.""" | ||
|
||
def decorator(func): | ||
@wraps(func) | ||
def wrapper(*args, **kwargs): | ||
# Save current RNG states | ||
prev_random_state = random.getstate() | ||
prev_numpy_state = np.random.get_state() | ||
prev_torch_state = torch.get_rng_state() | ||
|
||
# Set the provided seed | ||
random.seed(seed_value) | ||
np.random.seed(seed_value) | ||
torch.manual_seed(seed_value) | ||
|
||
try: | ||
# Run the test function | ||
result = func(*args, **kwargs) | ||
finally: | ||
# Reset RNG states | ||
random.setstate(prev_random_state) | ||
np.random.set_state(prev_numpy_state) | ||
torch.set_rng_state(prev_torch_state) | ||
|
||
return result | ||
|
||
return wrapper | ||
|
||
return decorator | ||
|
||
|
||
@contextmanager | ||
def _env_vars(vars: dict[str, str]): | ||
"""Context manager which temporarily sets environment variables.""" | ||
original_values = {key: os.getenv(key) for key in vars} | ||
|
||
try: | ||
for key, value in vars.items(): | ||
os.environ[key] = value | ||
yield | ||
finally: | ||
for key, original_value in original_values.items(): | ||
if original_value is None: | ||
del os.environ[key] | ||
else: | ||
os.environ[key] = original_value | ||
|
||
|
||
def env_vars(func: Callable | None = None, **vars) -> Any: | ||
"""Can be used as both a context manager and a decorator to set environment variables. | ||
Usage: | ||
- As a decorator: @env_vars(VAR1="value1", VAR2="value2") | ||
- As a context manager: with env_vars(VAR1="value1", VAR2="value2"): | ||
""" | ||
if func is None: | ||
# Used as a context manager | ||
return _env_vars(vars) | ||
|
||
# Used as a decorator | ||
@wraps(func) | ||
def wrapper(*args, **kwargs): | ||
with _env_vars(vars): | ||
return func(*args, **kwargs) | ||
|
||
return wrapper |