add tqdm loading

sib-swiss · Dec 2, 2024 · 67990b3 · 67990b3
1 parent 5aaec73
commit 67990b3
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 3 deletions.
diff --git a/src/sparql_llm/embed.py b/src/sparql_llm/embed.py
@@ -192,7 +192,9 @@ def init_vectordb(vectordb_host: str = settings.vectordb_host) -> None:
             ),
             # wait=False, # Waiting for indexing to finish or not
         )
-        print(f"Done generating and indexing {len(docs)} documents into the vectordb in {time.time() - start_time} seconds")
+        print(
+            f"Done generating and indexing {len(docs)} documents into the vectordb in {time.time() - start_time} seconds"
+        )
 
     if not vectordb.collection_exists(settings.entities_collection_name):
         vectordb.create_collection(

diff --git a/src/sparql_llm/embed_entities.py b/src/sparql_llm/embed_entities.py
@@ -5,14 +5,15 @@
 
 from langchain_core.documents import Document
 from qdrant_client import models
+from tqdm import tqdm
 
 from sparql_llm.config import get_embedding_model, get_vectordb, settings
 from sparql_llm.utils import query_sparql
-from tqdm import tqdm
 
 entities_embeddings_dir = os.path.join("data", "embeddings")
 entities_embeddings_filepath = os.path.join(entities_embeddings_dir, "entities_embeddings.csv")
 
+
 def retrieve_index_data(entity: dict, docs: list[Document], pagination: (int, int) = None):
     query = f"{entity['query']} LIMIT {pagination[0]} OFFSET {pagination[1]}" if pagination else entity["query"]
     try:
@@ -307,7 +308,9 @@ def load_entities_embeddings_to_vectordb():
                 )
             )
             embeddings.append(literal_eval(row["embedding"]))
-    print(f"Found embeddings for {len(docs)} entities in {time.time() - start_time} seconds. Now adding them to the vectordb")
+    print(
+        f"Found embeddings for {len(docs)} entities in {time.time() - start_time} seconds. Now adding them to the vectordb"
+    )
     vectordb.upsert(
         collection_name=settings.entities_collection_name,
         points=models.Batch(