Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add migation users memories to firestore #914

Merged
merged 4 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backend/migration/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from memories import migration_incorrect_start_finish_time

migration_incorrect_start_finish_time()
77 changes: 77 additions & 0 deletions backend/migration/memories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import math
from typing import Optional
from pydantic import BaseModel
from datetime import datetime, timedelta
from database._client import db
from google.cloud import firestore
from google.cloud.firestore_v1.field_path import FieldPath
from google.cloud.firestore_v1 import FieldFilter

class MemoryTime(BaseModel):
id: str
created_at: datetime
started_at: Optional[datetime]
finished_at: Optional[datetime]


def migration_incorrect_start_finish_time():
user_offset = 0
user_limit = 400
while True:
users_ref = (
db.collection('users')
.order_by(FieldPath.document_id(), direction=firestore.Query.ASCENDING)
)

memories_ref = users_ref.limit(user_limit).offset(user_offset)
users = [doc for doc in memories_ref.stream()]
if not users or len(users) == 0:
print("no users")
break
for user in users:
memories_ref = (
db.collection('users').document(user.id).collection("memories")
.order_by(FieldPath.document_id(), direction=firestore.Query.ASCENDING)
)
offset = 0
limit = 400
while True:
print(f"running...user...{user.id}...{offset}")
memories_ref = memories_ref.limit(limit).offset(offset)
docs = [doc for doc in memories_ref.stream()]
if not docs or len(docs) == 0:
print("done")
break
batch = db.batch()
for doc in docs:
if not doc:
continue

memory = MemoryTime(**doc.to_dict())
if not memory.started_at:
continue

delta = memory.created_at.timestamp() - memory.started_at.timestamp()
print(delta)
if math.fabs(delta) < 15*60:
continue
td = None
if delta > 0:
td = timedelta(seconds=delta)
else:
td = timedelta(seconds=-delta)
if memory.finished_at:
memory.finished_at = memory.finished_at + td
memory.started_at = memory.started_at + td
print(f'{memory.dict()}')

memory_ref = (
db.collection('users').document(user.id).collection("memories").document(memory.id)
)

batch.update(memory_ref, memory.dict())

batch.commit()
offset += len(docs)

user_offset = user_offset + len(users)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Image description Entelligence.AI

The function migration_incorrect_start_finish_time is quite large and does multiple things. It would be beneficial to break it down into smaller, more manageable functions. This will improve readability, maintainability, and testability of the code.

Here's a suggestion on how you could refactor this function:

- def migration_incorrect_start_finish_time():
+ def get_users(user_offset, user_limit):
    while True:
        users_ref = (
            db.collection('users')
            .order_by(FieldPath.document_id(), direction=firestore.Query.ASCENDING)
        )

        memories_ref = users_ref.limit(user_limit).offset(user_offset)
        users = [doc for doc in memories_ref.stream()]
        if not users or len(users) == 0:
            print("no users")
            break
        return users

+ def get_memories(user, offset, limit):
    while True:
        print(f"running...user...{user.id}...{offset}")
        memories_ref = (
            db.collection('users').document(user.id).collection("memories")
            .order_by(FieldPath.document_id(), direction=firestore.Query.ASCENDING)
        )
        memories_ref = memories_ref.limit(limit).offset(offset)
        docs = [doc for doc in memories_ref.stream()]
        if not docs or len(docs) == 0:
            print("done")
            break
        return docs

+ def update_memory_times(user, docs):
    batch = db.batch()
    for doc in docs:
        if not doc:
            continue

        memory = MemoryTime(**doc.to_dict())
        if not memory.started_at:
            continue

        delta = memory.created_at.timestamp() - memory.started_at.timestamp()
        print(delta)
        if math.fabs(delta) < 15*60:
            continue
        td = None
        if delta > 0:
            td = timedelta(seconds=delta)
        else:
            td = timedelta(seconds=-delta)
        if memory.finished_at:
            memory.finished_at = memory.finished_at + td
        memory.started_at = memory.started_at + td
        print(f'{memory.dict()}')

        memory_ref = (
            db.collection('users').document(user.id).collection("memories").document(memory.id)
        )

        batch.update(memory_ref, memory.dict())

    batch.commit()

+ def migration_incorrect_start_finish_time():
    user_offset = 0
    user_limit = 400
    while True:
        users = get_users(user_offset, user_limit)
        for user in users:
            offset = 0
            limit = 400
            while True:
                docs = get_memories(user, offset, limit)
                update_memory_times(user, docs)
                offset += len(docs)

        user_offset = user_offset + len(users)

This way, each function has a single responsibility and the code is much easier to understand and test.

Loading