From c9231fecb84c86e8dced10766674ec5cf5fe9117 Mon Sep 17 00:00:00 2001 From: ozer550 Date: Mon, 5 Aug 2024 17:34:56 +0530 Subject: [PATCH] Add rectifying migration command --- Makefile | 2 +- ...ify_incorrect_contentnode_source_fields.py | 163 ++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py diff --git a/Makefile b/Makefile index 051053bab3..619fcee41e 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ migrate: # 4) Remove the management command from this `deploy-migrate` recipe # 5) Repeat! deploy-migrate: - echo "Nothing to do here!" + python contentcuration/manage.py rectify_incorrect_contentnode_source_fields contentnodegc: python contentcuration/manage.py garbage_collect diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py new file mode 100644 index 0000000000..9c454dc798 --- /dev/null +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -0,0 +1,163 @@ +import logging + +from django.core.management.base import BaseCommand +from django.db.models import Exists +from django.db.models import F +from django.db.models import OuterRef +from django.db.models import Q +from django.db.models import Value +from django.db.models.functions import Coalesce +from django_cte import With + +from contentcuration.models import Channel +from contentcuration.models import ContentNode + +logger = logging.getLogger(__file__) + + +class Command(BaseCommand): + def handle(self, *args, **options): + main_trees_cte = With( + ( + Channel.objects.filter( + deleted=False, last_published__isnull=False, main_tree__isnull=False + ) + .annotate(channel_id=F("id")) + .values("channel_id", tree_id=F("main_tree__tree_id")) + ), + name="main_trees", + ) + + source_original_node_cte = With( + ( + Channel.objects.filter(main_tree__isnull=False) + .annotate(channel_id=F("id")) + .values("channel_id", tree_id=F("main_tree__tree_id")) + ), + name="source_original_nodes", + ) + + nodes = main_trees_cte.join( + ContentNode.objects.filter(published=True), + tree_id=main_trees_cte.col.tree_id, + ).annotate(channel_id=main_trees_cte.col.channel_id) + + parent_nodes = source_original_node_cte.join( + ContentNode.objects.all(), tree_id=source_original_node_cte.col.tree_id + ).annotate(channel_id=source_original_node_cte.col.channel_id) + + original_source_nodes = ( + parent_nodes.with_cte(source_original_node_cte) + .filter( + node_id=OuterRef("original_source_node_id"), + ) + .exclude( + tree_id=OuterRef("tree_id"), + ) + .annotate( + coalesced_provider=Coalesce("provider", Value("")), + coalesced_author=Coalesce("author", Value("")), + coalesced_aggregator=Coalesce("aggregator", Value("")), + coalesced_license_id=Coalesce("license_id", -1), + ) + ) + + diff = ( + nodes.with_cte(main_trees_cte).filter( + source_node_id__isnull=False, + original_source_node_id__isnull=False, + published=True, + ) + ).annotate( + coalesced_provider=Coalesce("provider", Value("")), + coalesced_author=Coalesce("author", Value("")), + coalesced_aggregator=Coalesce("aggregator", Value("")), + coalesced_license_id=Coalesce("license_id", -1), + ) + + diff_combined = diff.annotate( + original_source_node_f_changed=Exists( + original_source_nodes.filter( + ~Q(coalesced_provider=OuterRef("coalesced_provider")) + | ~Q(coalesced_author=OuterRef("coalesced_author")) + | ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator")) + | ~Q(coalesced_license_id=OuterRef("coalesced_license_id")) + ) + ) + ).filter(original_source_node_f_changed=True) + + final_nodes = diff_combined.values( + "id", + "original_channel_id", + "original_source_node_id", + "coalesced_provider", + "coalesced_author", + "coalesced_aggregator", + "coalesced_license_id", + "original_source_node_f_changed", + ).order_by() + + # diff_provider = diff.annotate(coalesced_provider=Coalesce('provider', Value(''))).annotate( + # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_provider=Coalesce('provider', Value(''))).exclude(coalesced_provider=OuterRef('coalesced_provider'))) + # ).filter( + # original_source_node_f_changed=True + # ) + + # diff_author = diff.annotate(coalesced_author=Coalesce('author', Value(''))).annotate( + # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_author=Coalesce('author', Value(''))).exclude(coalesced_author=OuterRef('coalesced_author'))) + # ).filter( + # original_source_node_f_changed=True + # ) + + # diff_aggregator = diff.annotate(coalesced_aggregator=Coalesce('aggregator', Value(''))).annotate( + # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_aggregator=Coalesce('aggregator', Value(''))).exclude(coalesced_aggregator=OuterRef('coalesced_aggregator'))) + # ).filter( + # original_source_node_f_changed=True + # ) + + # diff_lic = diff.annotate(coalesced_license_id=Coalesce('license_id', -1)).annotate( + # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_license_id=Coalesce('license_id', -1)).exclude(coalesced_license_id=OuterRef('coalesced_license_id'))) + # ).filter( + # original_source_node_f_changed=True + # ) + + # final_nodes_author = diff_author.values('id', 'original_channel_id', 'original_source_node_id', 'source_node_f_changed', 'original_source_node_f_changed').order_by() + + # final_nodes_provider = diff_provider.values('id', 'original_channel_id', 'original_source_node_id', 'source_node_f_changed', 'original_source_node_f_changed').order_by() + + # final_nodes_aggregator = diff_aggregator.values('id', 'original_channel_id', 'original_source_node_id', 'coalesced_aggregator','source_node_f_changed', 'original_source_node_f_changed').order_by() + + # final_nodes_license = diff_lic.values('id', 'original_channel_id', 'original_source_node_id', 'coalesced_license_id','source_node_f_changed', 'original_source_node_f_changed').order_by() + + # final_nodes = final_nodes_provider + final_nodes_aggregator + final_nodes_license + final_nodes_author + + for item in final_nodes: + base_node = ContentNode.objects.filter(pk=item["id"]) + + original_source_channel_id = item["original_channel_id"] + original_source_node_id = item["original_source_node_id"] + tree_id = ( + Channel.objects.filter(pk=original_source_channel_id) + .values_list("main_tree__tree_id", flat=True) + .get() + ) + original_source_node = ContentNode.objects.filter( + tree_id=tree_id, node_id=original_source_node_id + ) + + if original_source_channel_id is not None and original_source_node.exists(): + ## original source node exists and its source fields dont match + ## update the base node + if base_node[0].author != original_source_node[0].author: + base_node[0].author = original_source_node[0].author + if base_node[0].provider != original_source_node[0].provider: + base_node[0].provider = original_source_node[0].provider + if base_node[0].aggregator != original_source_node[0].aggregator: + base_node[0].aggregator = original_source_node[0].aggregator + if base_node[0].license != original_source_node[0].license: + base_node[0].license = original_source_node[0].license + + base_node[0].save() + + else: + continue