Skip to content

Commit

Permalink
index documents in batch
Browse files Browse the repository at this point in the history
  • Loading branch information
AlisoSouza committed Nov 27, 2024
1 parent 4c075cf commit 4a9dc37
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions app/indexer/content_bases.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
import os
from typing import List
from uuid import UUID

from langchain.docstore.document import Document

from app.handlers.products import Product
from app.indexer import IDocumentIndexer
from app.store import IStorage
from typing import List
from uuid import UUID


class ContentBaseIndexer(IDocumentIndexer):
def __init__(self, storage: IStorage):
self.storage = storage

def index_documents(self, docs: List[Document]):
DOCUMENTS_BATCH_SIZE: int = os.environ.get("DOCUMENTS_BATCH_SIZE", 500)
docs_size: int = len(docs)

file_uuid = docs[0].metadata["file_uuid"]
content_base_uuid = docs[0].metadata["content_base_uuid"]

Expand All @@ -23,8 +28,11 @@ def index_documents(self, docs: List[Document]):
if len(results) > 0:
ids = [item["_id"] for item in results]
self.storage.delete(ids=ids)

for i in range(0, docs_size, DOCUMENTS_BATCH_SIZE):
self.storage.save(docs[i:DOCUMENTS_BATCH_SIZE+1])

return self.storage.save(docs)
return

def index(self, texts: List, metadatas: dict):
results = self._search_docs_by_content_base_uuid(
Expand Down

0 comments on commit 4a9dc37

Please sign in to comment.