Skip to content

Commit

Permalink
Merge pull request #351 from opencivicdata/hcg/max
Browse files Browse the repository at this point in the history
Make failsafe configurable and remove redundant option
  • Loading branch information
hancush authored Jul 23, 2024
2 parents 1c9d673 + 639124f commit 108272c
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 76 deletions.
58 changes: 25 additions & 33 deletions pupa/cli/commands/clean.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import sys
from datetime import datetime, timezone, timedelta
import sys

import django
from django.apps import apps
Expand Down Expand Up @@ -31,6 +31,12 @@ def add_args(self):
"objects not seen in this many days will be deleted from the database"
),
)
self.add_argument(
"--max",
type=int,
default=10,
help="max number of objects to delete without triggering failsafe",
)
self.add_argument(
"--report",
action="store_true",
Expand All @@ -39,11 +45,6 @@ def add_args(self):
" would delete without making any changes to the database"
),
)
self.add_argument(
"--noinput",
action="store_true",
help="delete objects without getting user confirmation",
)
self.add_argument(
"--yes",
action="store_true",
Expand Down Expand Up @@ -97,34 +98,25 @@ def handle(self, args, other):
stale_objects = list(self.get_stale_objects(args.window))
num_stale_objects = len(stale_objects)

if args.noinput and args.yes:
self.remove_stale_objects(args.window)
sys.exit()
print(
f"{num_stale_objects} objects in your database have not been seen "
f"in {args.window} days."
)

if args.noinput:
# Fail-safe to avoid deleting a large amount of objects
# without explicit confimation
if num_stale_objects > 10:
print(
f"This command would delete {num_stale_objects} objects: "
f"\n{stale_objects}"
"\nIf you're sure, re-run without --noinput to provide confirmation."
"\nOr re-run with --yes to assume a yes answer to all prompts."
)
sys.exit(1)
else:
if num_stale_objects > args.max:
print(
f"This will permanently delete"
f" {num_stale_objects} objects from your database"
f" that have not been scraped within the last {args.window}"
" days. Are you sure? (Y/N)"
f"{num_stale_objects} exceeds the failsafe limit of {args.max}. "
"Run this command with a larger --max value to proceed."
)
resp = input()
if resp != "Y":
sys.exit()
sys.exit()

print(
"Removing objects that haven't been seen in a scrape within"
f" the last {args.window} days..."
)
self.remove_stale_objects(args.window)
if args.yes:
print("Proceeding to deletion because you specified --yes.")

else:
print(f"Permanently delete {num_stale_objects} objects? [Y/n]")
response = input()

if args.yes or response == "Y":
self.remove_stale_objects(args.window)
print(f"Removed {num_stale_objects} from your database.")
105 changes: 62 additions & 43 deletions pupa/tests/clean/test_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,40 +24,58 @@ def subparsers():
return parser.add_subparsers(dest="subcommand")


def create_jurisdiction():
@pytest.fixture
def jurisdiction():
Division.objects.create(id="ocd-division/country:us", name="USA")
return Jurisdiction.objects.create(id="jid", division_id="ocd-division/country:us")


@pytest.fixture
def organization(jurisdiction):
return Organization.objects.create(name="WWE", jurisdiction_id="jid")


@pytest.fixture
def person():
class PersonFactory:
def build(self, **kwargs):
person_info = {
"name": "George Washington",
"family_name": "Washington",
}

person_info.update(kwargs)

return Person.objects.create(**person_info)

return PersonFactory()


@pytest.mark.django_db
def test_get_stale_objects(subparsers):
_ = create_jurisdiction()
o = Organization.objects.create(name="WWE", jurisdiction_id="jid")
p = Person.objects.create(name="George Washington", family_name="Washington")
m = p.memberships.create(organization=o)
def test_get_stale_objects(subparsers, organization, person):
stale_person = person.build()
membership = stale_person.memberships.create(organization=organization)

expected_stale_objects = {p, o, m}
expected_stale_objects = {stale_person, organization, membership}

a_week_from_now = datetime.now(tz=timezone.utc) + timedelta(days=7)
with freeze_time(a_week_from_now):
p = Person.objects.create(name="Thomas Jefferson", family_name="Jefferson")
p.memberships.create(organization=o)
fresh_person = person.build(name="Thomas Jefferson", family_name="Jefferson")
fresh_person.memberships.create(organization=organization)
assert set(Command(subparsers).get_stale_objects(7)) == expected_stale_objects


@pytest.mark.django_db
def test_remove_stale_objects(subparsers):
_ = create_jurisdiction()
o = Organization.objects.create(name="WWE", jurisdiction_id="jid")
p = Person.objects.create(name="George Washington", family_name="Washington")
m = p.memberships.create(organization=o)
def test_remove_stale_objects(subparsers, organization, person):
stale_person = person.build()
membership = stale_person.memberships.create(organization=organization)

expected_stale_objects = {p, o, m}
expected_stale_objects = {stale_person, organization, membership}

a_week_from_now = datetime.now(tz=timezone.utc) + timedelta(days=7)
with freeze_time(a_week_from_now):
p = Person.objects.create(name="Thomas Jefferson", family_name="Jefferson")
p.memberships.create(organization=o)
fresh_person = person.build(name="Thomas Jefferson", family_name="Jefferson")
fresh_person.memberships.create(organization=organization)

Command(subparsers).remove_stale_objects(7)
for obj in expected_stale_objects:
Expand All @@ -66,56 +84,57 @@ def test_remove_stale_objects(subparsers):


@pytest.mark.django_db
def test_clean_command(subparsers):
_ = create_jurisdiction()
o = Organization.objects.create(name="WWE", jurisdiction_id="jid")

stale_person = Person.objects.create(
name="George Washington", family_name="Washington"
)
stale_membership = stale_person.memberships.create(organization=o)
def test_clean_command(subparsers, organization, person):
stale_person = person.build()
stale_membership = stale_person.memberships.create(organization=organization)

a_week_from_now = datetime.now(tz=timezone.utc) + timedelta(days=7)
with freeze_time(a_week_from_now):
not_stale_person = Person.objects.create(
name="Thomas Jefferson", family_name="Jefferson"
fresh_person = person.build(name="Thomas Jefferson", family_name="Jefferson")
not_stale_membership = fresh_person.memberships.create(
organization=organization
)
not_stale_membership = not_stale_person.memberships.create(organization=o)
o.save() # Update org's last_seen field
organization.save() # Update org's last_seen field

# Call clean command
Command(subparsers).handle(
argparse.Namespace(noinput=True, report=False, window=7, yes=False), []
argparse.Namespace(report=False, window=7, yes=True, max=10), []
)

expected_stale_objects = {stale_person, stale_membership}
for obj in expected_stale_objects:
was_deleted = not type(obj).objects.filter(id=obj.id).exists()
assert was_deleted

expected_not_stale_objects = {o, not_stale_person, not_stale_membership}
expected_not_stale_objects = {organization, fresh_person, not_stale_membership}
for obj in expected_not_stale_objects:
was_not_deleted = type(obj).objects.filter(id=obj.id).exists()
assert was_not_deleted


@pytest.mark.django_db
def test_clean_command_failsafe(subparsers):
_ = create_jurisdiction()
o = Organization.objects.create(name="WWE", jurisdiction_id="jid")

stale_people = [
Person.objects.create(name="George Washington", family_name="Washington")
for i in range(20)
]
stale_memberships = [ # noqa
p.memberships.create(organization=o) for p in stale_people
]
def test_clean_command_failsafe(subparsers, organization, person):
stale_people = [person.build() for i in range(20)]
for p in stale_people:
p.memberships.create(organization=organization)

a_week_from_now = datetime.now(tz=timezone.utc) + timedelta(days=7)
with freeze_time(a_week_from_now):
with pytest.raises(SystemExit):
# Should trigger failsafe exist when deleting more than 10 objects
Command(subparsers).handle(
argparse.Namespace(noinput=True, report=False, window=7, yes=False), []
argparse.Namespace(report=False, window=7, yes=False, max=10), []
)

with pytest.raises(SystemExit):
# Should trigger failsafe exist when deleting more than 10 objects,
# even when yes is specified
Command(subparsers).handle(
argparse.Namespace(report=False, window=7, yes=True, max=10), []
)

# Should proceed without error, since max is increased (1 organization,
# 20 people, 20 memberships)
Command(subparsers).handle(
argparse.Namespace(report=False, window=7, max=41, yes=True), []
)

0 comments on commit 108272c

Please sign in to comment.