Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run migrations to fix incorrect source fields of contentnodes #4720

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ migrate:
# 4) Remove the management command from this `deploy-migrate` recipe
# 5) Repeat!
deploy-migrate:
echo "Nothing to do here!"
python contentcuration/manage.py rectify_incorrect_contentnode_source_fields

contentnodegc:
python contentcuration/manage.py garbage_collect
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# DELETE THIS FILE AFTER RUNNING THE MIGRATIONSSS
import datetime
import uuid

from django.core.management import call_command
from django.utils import timezone
from le_utils.constants import content_kinds

from contentcuration.models import Channel
from contentcuration.models import ContentNode
from contentcuration.models import License
from contentcuration.tests import testdata
from contentcuration.tests.base import StudioAPITestCase
from contentcuration.utils.publish import publish_channel


class TestRectifyMigrationCommand(StudioAPITestCase):

@classmethod
def setUpClass(cls):
super(TestRectifyMigrationCommand, cls).setUpClass()

def tearDown(self):
super(TestRectifyMigrationCommand, self).tearDown()

def setUp(self):
super(TestRectifyMigrationCommand, self).setUp()
self.original_channel = testdata.channel()
self.license_original = License.objects.all()[0]
self.original_contentnode = ContentNode.objects.create(
id=uuid.uuid4().hex,
title="Original Node",
parent=self.original_channel.main_tree,
license=self.license_original,
original_channel_id=None,
source_channel_id=None,
author="old author"
)
self.user = testdata.user()
self.original_channel.editors.add(self.user)
self.client.force_authenticate(user=self.user)

def create_base_channel_and_contentnode(self, source_contentnode, source_channel):
base_channel = testdata.channel()
base_channel.public = True
base_channel.save()
license_changed = License.objects.all()[2]
base_node = ContentNode.objects.create(
id=uuid.uuid4().hex,
title="base contentnode",
parent=base_channel.main_tree,
kind_id=content_kinds.VIDEO,
original_channel_id=self.original_channel.id,
original_source_node_id=self.original_contentnode.node_id,
source_channel_id=source_channel.id,
source_node_id=source_contentnode.node_id,
author="source author",
license=license_changed,
)
return base_node, base_channel

def create_source_channel_and_contentnode(self):
source_channel = testdata.channel()
source_channel.public = True
source_channel.save()
license_changed = License.objects.all()[1]
source_node = ContentNode.objects.create(
id=uuid.uuid4().hex,
title="base contentnode",
parent=source_channel.main_tree,
kind_id=content_kinds.VIDEO,
license=license_changed,
original_channel_id=self.original_channel.id,
source_channel_id=self.original_channel.id,
source_node_id=self.original_contentnode.node_id,
original_source_node_id=self.original_contentnode.node_id,
author="source author",
)

return source_node, source_channel

def run_migrations(self):
call_command('rectify_incorrect_contentnode_source_fields', user_id=self.user.id, is_test=True)

def test_two_node_case(self):
base_node, base_channel = self.create_base_channel_and_contentnode(self.original_contentnode, self.original_channel)

publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id)

# main_tree node still has changed=true even after the publish
for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True):
print(node.id)
print(node.id == base_channel.main_tree.id)
print("checking if the node is complete or not ", node.complete)
node.changed = False
# This should probably again change the changed=true but suprisingly it doesnot
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case this helps this feel less surprising: https://github.com/learningequality/studio/blob/unstable/contentcuration/contentcuration/models.py#L1831

We have an explicit exclude list of fields for which updates to them do not trigger change to be set as True.

# Meaning the changed boolean doesnot change for the main_tree no matter what we do
# through ContentNode model methods like save.
node.save()

ContentNode.objects.filter(pk=base_node.pk).update(
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc)
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One suggestion would be to use the same copy/import utilities that we use elsewhere, then override the things that shouldn't have changed, but it isn't a big deal. From my perspective, I like to do my best to ensure the tests are founded upon the app's behaviors as much as possible, because too many differences could cause the tests to pass when they shouldn't (under the typical behaviors of the app)


self.run_migrations()
updated_base_node = ContentNode.objects.get(pk=base_node.pk)
self.assertEqual(updated_base_node.license, self.original_contentnode.license)
self.assertEqual(updated_base_node.author, self.original_contentnode.author)
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False)

def test_three_node_case_implicit(self):
source_node, source_channel = self.create_source_channel_and_contentnode()
base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel)
source_node.aggregator = "Nami"
source_node.save()
# Implicit case
base_node.author = source_node.author
base_node.license = source_node.license
base_node.aggregator = source_node.aggregator
base_node.save()

publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id)

for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True):
print(node.id)
print(node.id == base_channel.main_tree.id)
print("checking if the node is complete or not ", node.complete)
node.changed = False
node.save()

ContentNode.objects.filter(pk=base_node.pk).update(
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc)
)

ContentNode.objects.filter(pk=source_node.pk).update(
modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc)
)

self.run_migrations()
updated_base_node = ContentNode.objects.get(pk=base_node.pk)
updated_source_node = ContentNode.objects.get(pk=source_node.pk)
self.assertEqual(updated_base_node.license, self.original_contentnode.license)
self.assertEqual(updated_base_node.author, self.original_contentnode.author)
self.assertEqual(updated_source_node.license, self.original_contentnode.license)
self.assertEqual(updated_source_node.author, self.original_contentnode.author)
self.assertEqual(updated_source_node.aggregator, self.original_contentnode.aggregator)
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False)

def test_three_node_case_explicit(self):
source_node, source_channel = self.create_source_channel_and_contentnode()
base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel)
source_node.provider = "luffy"
base_node.provider = "zoro"
base_node.save()
source_node.save()
publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id)

for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True):
print(node.id)
print(node.id == base_channel.main_tree.id)
print("checking if the node is complete or not ", node.complete)
node.changed = False
node.save()

ContentNode.objects.filter(pk=base_node.pk).update(
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc)
)

ContentNode.objects.filter(pk=source_node.pk).update(
modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc)
)

self.run_migrations()
updated_base_node = ContentNode.objects.get(pk=base_node.pk)
updated_source_node = ContentNode.objects.get(pk=source_node.pk)
self.assertEqual(updated_base_node.license, self.original_contentnode.license)
self.assertEqual(updated_base_node.author, self.original_contentnode.author)
self.assertEqual(updated_base_node.provider, self.original_contentnode.provider)
self.assertEqual(updated_source_node.license, self.original_contentnode.license)
self.assertEqual(updated_source_node.author, self.original_contentnode.author)
self.assertEqual(updated_source_node.provider, self.original_contentnode.provider)
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False)
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import datetime
import logging

from django.core.management.base import BaseCommand
from django.db.models import Exists
from django.db.models import F
from django.db.models import OuterRef
from django.db.models import Q
from django.db.models import Value
from django.db.models.functions import Coalesce
from django.utils import timezone
from django_cte import With

from contentcuration.models import Channel
from contentcuration.models import ContentNode
from contentcuration.models import User
from contentcuration.utils.publish import publish_channel

logger = logging.getLogger(__file__)


class Command(BaseCommand):

def add_arguments(self, parser):

parser.add_argument(
'--is_test',
action='store_true',
help="Indicate if the command is running in a test environment.",
)

parser.add_argument(
'--user_id',
type=int,
help="User ID for the operation",
required=True
)

def handle(self, *args, **options):
# Filter Date : July 9, 2023
# Link https://github.com/learningequality/studio/pull/4189
# The PR date for the frontend change is July 10, 2023
# we would set the filter day one day back just to be sure

is_test = options['is_test']
user_id = options['user_id']

if not is_test:
user_id = User.objects.get(email='[email protected]').pk

filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc)
main_trees_cte = With(
(
Channel.objects.filter(
main_tree__isnull=False
)
.annotate(channel_id=F("id"))
.values("channel_id", "deleted", tree_id=F("main_tree__tree_id"))
),
name="main_trees",
)

nodes = main_trees_cte.join(
ContentNode.objects.all(),
tree_id=main_trees_cte.col.tree_id,
).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted)

original_source_nodes = (
nodes.with_cte(main_trees_cte)
.filter(
node_id=OuterRef("original_source_node_id"),
)
.exclude(
tree_id=OuterRef("tree_id"),
)
.annotate(
coalesced_provider=Coalesce("provider", Value("")),
coalesced_author=Coalesce("author", Value("")),
coalesced_aggregator=Coalesce("aggregator", Value("")),
coalesced_license_id=Coalesce("license_id", -1),
)
)
# We filter out according to last_modified date before this PR
# https://github.com/learningequality/studio/pull/4189
# As we want to lossen up the public=True Filter and open the
# migration for all the nodes even if they are not published
diff = (
nodes.with_cte(main_trees_cte).filter(
deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes
source_node_id__isnull=False,
original_source_node_id__isnull=False,
modified__lt=filter_date
)
).annotate(
coalesced_provider=Coalesce("provider", Value("")),
coalesced_author=Coalesce("author", Value("")),
coalesced_aggregator=Coalesce("aggregator", Value("")),
coalesced_license_id=Coalesce("license_id", -1),
)
diff_combined = diff.annotate(
original_source_node_f_changed=Exists(
original_source_nodes.filter(
~Q(coalesced_provider=OuterRef("coalesced_provider"))
| ~Q(coalesced_author=OuterRef("coalesced_author"))
| ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator"))
| ~Q(coalesced_license_id=OuterRef("coalesced_license_id"))
)
)
).filter(original_source_node_f_changed=True)

final_nodes = diff_combined.values(
"id",
"channel_id",
"original_channel_id",
"original_source_node_id",
"coalesced_provider",
"coalesced_author",
"coalesced_aggregator",
"coalesced_license_id",
"original_source_node_f_changed",
).order_by()

channel_ids_to_republish = set()

for item in final_nodes:
base_node = ContentNode.objects.get(pk=item["id"])

original_source_channel_id = item["original_channel_id"]
original_source_node_id = item["original_source_node_id"]
tree_id = (
Channel.objects.filter(pk=original_source_channel_id)
.values_list("main_tree__tree_id", flat=True)
.get()
)
original_source_node = ContentNode.objects.filter(
tree_id=tree_id, node_id=original_source_node_id
)

base_channel = Channel.objects.get(pk=item['channel_id'])

to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists())

if original_source_channel_id is not None and original_source_node.exists():
# original source node exists and its source fields dont match
# update the base node
if base_node.author != original_source_node[0].author:
base_node.author = original_source_node[0].author
if base_node.provider != original_source_node[0].provider:
base_node.provider = original_source_node[0].provider
if base_node.aggregator != original_source_node[0].aggregator:
base_node.aggregator = original_source_node[0].aggregator
if base_node.license != original_source_node[0].license:
base_node.license = original_source_node[0].license
base_node.save()

if to_be_republished and base_channel.last_published is not None:
ozer550 marked this conversation as resolved.
Show resolved Hide resolved
channel_ids_to_republish.add(base_channel.id)

# we would repbulish the channel
for channel_id in channel_ids_to_republish:
publish_channel(user_id, channel_id)