Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run migrations to fix incorrect source fields of contentnodes #4720

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ migrate:
# 4) Remove the management command from this `deploy-migrate` recipe
# 5) Repeat!
deploy-migrate:
echo "Nothing to do here!"
python contentcuration/manage.py rectify_incorrect_contentnode_source_fields

contentnodegc:
python contentcuration/manage.py garbage_collect
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# DELETE THIS FILE AFTER RUNNING THE MIGRATIONSSS
import datetime
import uuid

from django.db.models import Exists
from django.db.models import F
from django.db.models import OuterRef
from django.db.models import Q
from django.db.models import Value
from django.db.models.functions import Coalesce
from django.utils import timezone
from django_cte import With
from le_utils.constants import content_kinds

from contentcuration.models import Channel
from contentcuration.models import ContentNode
from contentcuration.models import License
from contentcuration.tests import testdata
from contentcuration.tests.base import StudioAPITestCase
from contentcuration.utils.publish import publish_channel


class TestRectifyMigrationCommand(StudioAPITestCase):

@classmethod
def setUpClass(cls):
super(TestRectifyMigrationCommand, cls).setUpClass()

def tearDown(self):
super(TestRectifyMigrationCommand, self).tearDown()

def setUp(self):
super(TestRectifyMigrationCommand, self).setUp()
self.original_channel = testdata.channel()
self.license_original = License.objects.all()[0]
self.original_contentnode = ContentNode.objects.create(
id=uuid.uuid4().hex,
title="Original Node",
parent=self.original_channel.main_tree,
license=self.license_original,
original_channel_id=None,
source_channel_id=None,
)
self.user = testdata.user()
self.original_channel.editors.add(self.user)
self.client.force_authenticate(user=self.user)

def run_migrations(self):
ozer550 marked this conversation as resolved.
Show resolved Hide resolved
filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc)
main_trees_cte = With(
(
Channel.objects.filter(
main_tree__isnull=False
)
.annotate(channel_id=F("id"))
.values("channel_id", "deleted", tree_id=F("main_tree__tree_id"))
),
name="main_trees",
)

nodes = main_trees_cte.join(
ContentNode.objects.all(),
tree_id=main_trees_cte.col.tree_id,
).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted)

original_source_nodes = (
nodes.with_cte(main_trees_cte)
.filter(
node_id=OuterRef("original_source_node_id"),
)
.exclude(
tree_id=OuterRef("tree_id"),
)
.annotate(
coalesced_provider=Coalesce("provider", Value("")),
coalesced_author=Coalesce("author", Value("")),
coalesced_aggregator=Coalesce("aggregator", Value("")),
coalesced_license_id=Coalesce("license_id", -1),
)
)
# We filter out according to last_modified date before this PR
# https://github.com/learningequality/studio/pull/4189
# As we want to lossen up the public=True Filter and open the
# migration for all the nodes even if they are not published
diff = (
nodes.with_cte(main_trees_cte).filter(
deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes
source_node_id__isnull=False,
original_source_node_id__isnull=False,
modified__lt=filter_date
# published=True,
)
).annotate(
coalesced_provider=Coalesce("provider", Value("")),
coalesced_author=Coalesce("author", Value("")),
coalesced_aggregator=Coalesce("aggregator", Value("")),
coalesced_license_id=Coalesce("license_id", -1),
)

diff_combined = diff.annotate(
original_source_node_f_changed=Exists(
original_source_nodes.filter(
~Q(coalesced_provider=OuterRef("coalesced_provider"))
| ~Q(coalesced_author=OuterRef("coalesced_author"))
| ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator"))
| ~Q(coalesced_license_id=OuterRef("coalesced_license_id"))
)
)
).filter(original_source_node_f_changed=True)
final_nodes = diff_combined.values(
"id",
"channel_id",
"original_channel_id",
"original_source_node_id",
"coalesced_provider",
"coalesced_author",
"coalesced_aggregator",
"coalesced_license_id",
"original_source_node_f_changed",
).order_by()

for item in final_nodes:
base_node = ContentNode.objects.get(pk=item["id"])
original_source_channel_id = item["original_channel_id"]
original_source_node_id = item["original_source_node_id"]
tree_id = (
Channel.objects.filter(pk=original_source_channel_id)
.values_list("main_tree__tree_id", flat=True)
.get()
)
original_source_node = ContentNode.objects.filter(
tree_id=tree_id, node_id=original_source_node_id
)

base_channel = Channel.objects.get(pk=item['channel_id'])

to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists())
print("onga bonga 2")
print(base_channel.main_tree.get_family().filter(changed=True))
print(to_be_republished)
if original_source_channel_id is not None and original_source_node.exists():
# original source node exists and its source fields dont match
# update the base node
if base_node.author != original_source_node[0].author:
base_node.author = original_source_node[0].author
if base_node.provider != original_source_node[0].provider:
base_node.provider = original_source_node[0].provider
if base_node.aggregator != original_source_node[0].aggregator:
base_node.aggregator = original_source_node[0].aggregator
if base_node.license != original_source_node[0].license:
base_node.license = original_source_node[0].license

base_node.save()

if to_be_republished and base_channel.published:
# we would repbulish the channel
print("publishingg the channel!!")
publish_channel(self.user.id, base_channel.id)
else:
continue

def test_two_node_case(self):
base_channel = testdata.channel()
license_changed = License.objects.all()[1]
base_node = ContentNode.objects.create(
id=uuid.uuid4().hex,
title="base contentnode",
parent=base_channel.main_tree,
kind_id=content_kinds.VIDEO,
license=license_changed,
original_channel_id=self.original_channel.id,
source_channel_id=self.original_channel.id,
source_node_id=self.original_contentnode.node_id,
original_source_node_id=self.original_contentnode.node_id,
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One suggestion would be to use the same copy/import utilities that we use elsewhere, then override the things that shouldn't have changed, but it isn't a big deal. From my perspective, I like to do my best to ensure the tests are founded upon the app's behaviors as much as possible, because too many differences could cause the tests to pass when they shouldn't (under the typical behaviors of the app)


ContentNode.objects.filter(pk=base_node.pk).update(
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc)
)
# print("onga bongaa")
# print(base_node.changed)
base_node.changed = False
self.run_migrations()
base_node.refresh_from_db()
self.assertEqual(base_node.license, self.original_contentnode.license)
self.assertEqual(base_channel.published, True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import datetime
import logging

from django.core.management.base import BaseCommand
from django.db.models import Exists
from django.db.models import F
from django.db.models import OuterRef
from django.db.models import Q
from django.db.models import Value
from django.db.models.functions import Coalesce
from django.utils import timezone
from django_cte import With

from contentcuration.models import Channel
from contentcuration.models import ContentNode
from contentcuration.utils.publish import publish_channel

logger = logging.getLogger(__file__)


class Command(BaseCommand):
def handle(self, *args, **options):
# Filter Date : July 9, 2023
# Link https://github.com/learningequality/studio/pull/4189
# The PR date for the frontend change is July 10, 2023
# we would set the filter day one day back just to be sure
filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc)
main_trees_cte = With(
(
Channel.objects.filter(
main_tree__isnull=False
)
.annotate(channel_id=F("id"))
.values("channel_id", "deleted", tree_id=F("main_tree__tree_id"))
),
name="main_trees",
)

nodes = main_trees_cte.join(
ContentNode.objects.all(),
tree_id=main_trees_cte.col.tree_id,
).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted)

original_source_nodes = (
nodes.with_cte(main_trees_cte)
.filter(
node_id=OuterRef("original_source_node_id"),
)
.exclude(
tree_id=OuterRef("tree_id"),
)
.annotate(
coalesced_provider=Coalesce("provider", Value("")),
coalesced_author=Coalesce("author", Value("")),
coalesced_aggregator=Coalesce("aggregator", Value("")),
coalesced_license_id=Coalesce("license_id", -1),
)
)
# We filter out according to last_modified date before this PR
# https://github.com/learningequality/studio/pull/4189
# As we want to lossen up the public=True Filter and open the
# migration for all the nodes even if they are not published
diff = (
nodes.with_cte(main_trees_cte).filter(
deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes
source_node_id__isnull=False,
original_source_node_id__isnull=False,
modified__lt=filter_date
# published=True,
)
).annotate(
coalesced_provider=Coalesce("provider", Value("")),
coalesced_author=Coalesce("author", Value("")),
coalesced_aggregator=Coalesce("aggregator", Value("")),
coalesced_license_id=Coalesce("license_id", -1),
)

diff_combined = diff.annotate(
original_source_node_f_changed=Exists(
original_source_nodes.filter(
~Q(coalesced_provider=OuterRef("coalesced_provider"))
| ~Q(coalesced_author=OuterRef("coalesced_author"))
| ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator"))
| ~Q(coalesced_license_id=OuterRef("coalesced_license_id"))
)
)
).filter(original_source_node_f_changed=True)

final_nodes = diff_combined.values(
"id",
"channel_id",
"original_channel_id",
"original_source_node_id",
"coalesced_provider",
"coalesced_author",
"coalesced_aggregator",
"coalesced_license_id",
"original_source_node_f_changed",
).order_by()

for item in final_nodes:
base_node = ContentNode.objects.get(pk=item["id"])

original_source_channel_id = item["original_channel_id"]
original_source_node_id = item["original_source_node_id"]
tree_id = (
Channel.objects.filter(pk=original_source_channel_id)
.values_list("main_tree__tree_id", flat=True)
.get()
)
original_source_node = ContentNode.objects.filter(
tree_id=tree_id, node_id=original_source_node_id
)

base_channel = Channel.objects.get(pk=item['channel_id'])

to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists())

if original_source_channel_id is not None and original_source_node.exists():
# original source node exists and its source fields dont match
# update the base node
if base_node.author != original_source_node[0].author:
base_node.author = original_source_node[0].author
if base_node.provider != original_source_node[0].provider:
base_node.provider = original_source_node[0].provider
if base_node.aggregator != original_source_node[0].aggregator:
base_node.aggregator = original_source_node[0].aggregator
if base_node.license != original_source_node[0].license:
base_node.license = original_source_node[0].license
base_node.save()

if to_be_republished and base_channel.published:
# we would repbulish the channel
publish_channel("some_id", base_channel.id)
else:
continue
Loading