From c9231fecb84c86e8dced10766674ec5cf5fe9117 Mon Sep 17 00:00:00 2001 From: ozer550 Date: Mon, 5 Aug 2024 17:34:56 +0530 Subject: [PATCH 01/10] Add rectifying migration command --- Makefile | 2 +- ...ify_incorrect_contentnode_source_fields.py | 163 ++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py diff --git a/Makefile b/Makefile index 051053bab3..619fcee41e 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ migrate: # 4) Remove the management command from this `deploy-migrate` recipe # 5) Repeat! deploy-migrate: - echo "Nothing to do here!" + python contentcuration/manage.py rectify_incorrect_contentnode_source_fields contentnodegc: python contentcuration/manage.py garbage_collect diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py new file mode 100644 index 0000000000..9c454dc798 --- /dev/null +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -0,0 +1,163 @@ +import logging + +from django.core.management.base import BaseCommand +from django.db.models import Exists +from django.db.models import F +from django.db.models import OuterRef +from django.db.models import Q +from django.db.models import Value +from django.db.models.functions import Coalesce +from django_cte import With + +from contentcuration.models import Channel +from contentcuration.models import ContentNode + +logger = logging.getLogger(__file__) + + +class Command(BaseCommand): + def handle(self, *args, **options): + main_trees_cte = With( + ( + Channel.objects.filter( + deleted=False, last_published__isnull=False, main_tree__isnull=False + ) + .annotate(channel_id=F("id")) + .values("channel_id", tree_id=F("main_tree__tree_id")) + ), + name="main_trees", + ) + + source_original_node_cte = With( + ( + Channel.objects.filter(main_tree__isnull=False) + .annotate(channel_id=F("id")) + .values("channel_id", tree_id=F("main_tree__tree_id")) + ), + name="source_original_nodes", + ) + + nodes = main_trees_cte.join( + ContentNode.objects.filter(published=True), + tree_id=main_trees_cte.col.tree_id, + ).annotate(channel_id=main_trees_cte.col.channel_id) + + parent_nodes = source_original_node_cte.join( + ContentNode.objects.all(), tree_id=source_original_node_cte.col.tree_id + ).annotate(channel_id=source_original_node_cte.col.channel_id) + + original_source_nodes = ( + parent_nodes.with_cte(source_original_node_cte) + .filter( + node_id=OuterRef("original_source_node_id"), + ) + .exclude( + tree_id=OuterRef("tree_id"), + ) + .annotate( + coalesced_provider=Coalesce("provider", Value("")), + coalesced_author=Coalesce("author", Value("")), + coalesced_aggregator=Coalesce("aggregator", Value("")), + coalesced_license_id=Coalesce("license_id", -1), + ) + ) + + diff = ( + nodes.with_cte(main_trees_cte).filter( + source_node_id__isnull=False, + original_source_node_id__isnull=False, + published=True, + ) + ).annotate( + coalesced_provider=Coalesce("provider", Value("")), + coalesced_author=Coalesce("author", Value("")), + coalesced_aggregator=Coalesce("aggregator", Value("")), + coalesced_license_id=Coalesce("license_id", -1), + ) + + diff_combined = diff.annotate( + original_source_node_f_changed=Exists( + original_source_nodes.filter( + ~Q(coalesced_provider=OuterRef("coalesced_provider")) + | ~Q(coalesced_author=OuterRef("coalesced_author")) + | ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator")) + | ~Q(coalesced_license_id=OuterRef("coalesced_license_id")) + ) + ) + ).filter(original_source_node_f_changed=True) + + final_nodes = diff_combined.values( + "id", + "original_channel_id", + "original_source_node_id", + "coalesced_provider", + "coalesced_author", + "coalesced_aggregator", + "coalesced_license_id", + "original_source_node_f_changed", + ).order_by() + + # diff_provider = diff.annotate(coalesced_provider=Coalesce('provider', Value(''))).annotate( + # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_provider=Coalesce('provider', Value(''))).exclude(coalesced_provider=OuterRef('coalesced_provider'))) + # ).filter( + # original_source_node_f_changed=True + # ) + + # diff_author = diff.annotate(coalesced_author=Coalesce('author', Value(''))).annotate( + # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_author=Coalesce('author', Value(''))).exclude(coalesced_author=OuterRef('coalesced_author'))) + # ).filter( + # original_source_node_f_changed=True + # ) + + # diff_aggregator = diff.annotate(coalesced_aggregator=Coalesce('aggregator', Value(''))).annotate( + # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_aggregator=Coalesce('aggregator', Value(''))).exclude(coalesced_aggregator=OuterRef('coalesced_aggregator'))) + # ).filter( + # original_source_node_f_changed=True + # ) + + # diff_lic = diff.annotate(coalesced_license_id=Coalesce('license_id', -1)).annotate( + # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_license_id=Coalesce('license_id', -1)).exclude(coalesced_license_id=OuterRef('coalesced_license_id'))) + # ).filter( + # original_source_node_f_changed=True + # ) + + # final_nodes_author = diff_author.values('id', 'original_channel_id', 'original_source_node_id', 'source_node_f_changed', 'original_source_node_f_changed').order_by() + + # final_nodes_provider = diff_provider.values('id', 'original_channel_id', 'original_source_node_id', 'source_node_f_changed', 'original_source_node_f_changed').order_by() + + # final_nodes_aggregator = diff_aggregator.values('id', 'original_channel_id', 'original_source_node_id', 'coalesced_aggregator','source_node_f_changed', 'original_source_node_f_changed').order_by() + + # final_nodes_license = diff_lic.values('id', 'original_channel_id', 'original_source_node_id', 'coalesced_license_id','source_node_f_changed', 'original_source_node_f_changed').order_by() + + # final_nodes = final_nodes_provider + final_nodes_aggregator + final_nodes_license + final_nodes_author + + for item in final_nodes: + base_node = ContentNode.objects.filter(pk=item["id"]) + + original_source_channel_id = item["original_channel_id"] + original_source_node_id = item["original_source_node_id"] + tree_id = ( + Channel.objects.filter(pk=original_source_channel_id) + .values_list("main_tree__tree_id", flat=True) + .get() + ) + original_source_node = ContentNode.objects.filter( + tree_id=tree_id, node_id=original_source_node_id + ) + + if original_source_channel_id is not None and original_source_node.exists(): + ## original source node exists and its source fields dont match + ## update the base node + if base_node[0].author != original_source_node[0].author: + base_node[0].author = original_source_node[0].author + if base_node[0].provider != original_source_node[0].provider: + base_node[0].provider = original_source_node[0].provider + if base_node[0].aggregator != original_source_node[0].aggregator: + base_node[0].aggregator = original_source_node[0].aggregator + if base_node[0].license != original_source_node[0].license: + base_node[0].license = original_source_node[0].license + + base_node[0].save() + + else: + continue From a8adee4c9508925001574eda8df2a6c455402127 Mon Sep 17 00:00:00 2001 From: ozer550 Date: Thu, 22 Aug 2024 11:46:35 +0530 Subject: [PATCH 02/10] change filter method to get --- ...ify_incorrect_contentnode_source_fields.py | 40 ++----------------- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index 9c454dc798..fc22aebe03 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -97,42 +97,8 @@ def handle(self, *args, **options): "original_source_node_f_changed", ).order_by() - # diff_provider = diff.annotate(coalesced_provider=Coalesce('provider', Value(''))).annotate( - # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_provider=Coalesce('provider', Value(''))).exclude(coalesced_provider=OuterRef('coalesced_provider'))) - # ).filter( - # original_source_node_f_changed=True - # ) - - # diff_author = diff.annotate(coalesced_author=Coalesce('author', Value(''))).annotate( - # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_author=Coalesce('author', Value(''))).exclude(coalesced_author=OuterRef('coalesced_author'))) - # ).filter( - # original_source_node_f_changed=True - # ) - - # diff_aggregator = diff.annotate(coalesced_aggregator=Coalesce('aggregator', Value(''))).annotate( - # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_aggregator=Coalesce('aggregator', Value(''))).exclude(coalesced_aggregator=OuterRef('coalesced_aggregator'))) - # ).filter( - # original_source_node_f_changed=True - # ) - - # diff_lic = diff.annotate(coalesced_license_id=Coalesce('license_id', -1)).annotate( - # original_source_node_f_changed=Exists(original_source_nodes.annotate(coalesced_license_id=Coalesce('license_id', -1)).exclude(coalesced_license_id=OuterRef('coalesced_license_id'))) - # ).filter( - # original_source_node_f_changed=True - # ) - - # final_nodes_author = diff_author.values('id', 'original_channel_id', 'original_source_node_id', 'source_node_f_changed', 'original_source_node_f_changed').order_by() - - # final_nodes_provider = diff_provider.values('id', 'original_channel_id', 'original_source_node_id', 'source_node_f_changed', 'original_source_node_f_changed').order_by() - - # final_nodes_aggregator = diff_aggregator.values('id', 'original_channel_id', 'original_source_node_id', 'coalesced_aggregator','source_node_f_changed', 'original_source_node_f_changed').order_by() - - # final_nodes_license = diff_lic.values('id', 'original_channel_id', 'original_source_node_id', 'coalesced_license_id','source_node_f_changed', 'original_source_node_f_changed').order_by() - - # final_nodes = final_nodes_provider + final_nodes_aggregator + final_nodes_license + final_nodes_author - for item in final_nodes: - base_node = ContentNode.objects.filter(pk=item["id"]) + base_node = ContentNode.objects.get(pk=item["id"]) original_source_channel_id = item["original_channel_id"] original_source_node_id = item["original_source_node_id"] @@ -146,8 +112,8 @@ def handle(self, *args, **options): ) if original_source_channel_id is not None and original_source_node.exists(): - ## original source node exists and its source fields dont match - ## update the base node + # original source node exists and its source fields dont match + # update the base node if base_node[0].author != original_source_node[0].author: base_node[0].author = original_source_node[0].author if base_node[0].provider != original_source_node[0].provider: From 1d867c58cf7e797cb70cca6c5372a749c3c733b6 Mon Sep 17 00:00:00 2001 From: ozer550 Date: Tue, 10 Sep 2024 15:51:06 +0530 Subject: [PATCH 03/10] add filter based on last modified --- ...ctify_incorrect_contentnode_source_fields.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index fc22aebe03..486a343c8c 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -1,3 +1,4 @@ +import datetime import logging from django.core.management.base import BaseCommand @@ -7,6 +8,7 @@ from django.db.models import Q from django.db.models import Value from django.db.models.functions import Coalesce +from django.utils import timezone from django_cte import With from contentcuration.models import Channel @@ -17,6 +19,11 @@ class Command(BaseCommand): def handle(self, *args, **options): + # Filter Date : July 9, 2023 + # Link https://github.com/learningequality/studio/pull/4189 + # The PR date for the frontend change is July 10, 2023 + # we would set the filter day one day back just to be sure + filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) main_trees_cte = With( ( Channel.objects.filter( @@ -61,12 +68,16 @@ def handle(self, *args, **options): coalesced_license_id=Coalesce("license_id", -1), ) ) - + # We filter out according to last_modified date before this PR + # https://github.com/learningequality/studio/pull/4189 + # As we want to lossen up the public=True Filter and open the + # migration for all the nodes even if they are not published diff = ( nodes.with_cte(main_trees_cte).filter( source_node_id__isnull=False, original_source_node_id__isnull=False, - published=True, + modified__lt=filter_date + # published=True, ) ).annotate( coalesced_provider=Coalesce("provider", Value("")), @@ -122,8 +133,6 @@ def handle(self, *args, **options): base_node[0].aggregator = original_source_node[0].aggregator if base_node[0].license != original_source_node[0].license: base_node[0].license = original_source_node[0].license - base_node[0].save() - else: continue From 5641e146c9c5e5e6781571566d96d3832ee9ec53 Mon Sep 17 00:00:00 2001 From: ozer550 Date: Fri, 13 Sep 2024 15:39:56 +0530 Subject: [PATCH 04/10] refine the migrations --- ...ify_incorrect_contentnode_source_fields.py | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index 486a343c8c..769c4f4618 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -27,34 +27,21 @@ def handle(self, *args, **options): main_trees_cte = With( ( Channel.objects.filter( - deleted=False, last_published__isnull=False, main_tree__isnull=False + main_tree__isnull=False ) .annotate(channel_id=F("id")) - .values("channel_id", tree_id=F("main_tree__tree_id")) + .values("channel_id", "deleted", tree_id=F("main_tree__tree_id")) ), name="main_trees", ) - source_original_node_cte = With( - ( - Channel.objects.filter(main_tree__isnull=False) - .annotate(channel_id=F("id")) - .values("channel_id", tree_id=F("main_tree__tree_id")) - ), - name="source_original_nodes", - ) - nodes = main_trees_cte.join( - ContentNode.objects.filter(published=True), + ContentNode.objects.all(), tree_id=main_trees_cte.col.tree_id, - ).annotate(channel_id=main_trees_cte.col.channel_id) - - parent_nodes = source_original_node_cte.join( - ContentNode.objects.all(), tree_id=source_original_node_cte.col.tree_id - ).annotate(channel_id=source_original_node_cte.col.channel_id) + ).annotate(channel_id=main_trees_cte.col.channel_id, deletd=main_trees_cte.col.deleted) original_source_nodes = ( - parent_nodes.with_cte(source_original_node_cte) + nodes.with_cte(main_trees_cte) .filter( node_id=OuterRef("original_source_node_id"), ) @@ -74,6 +61,7 @@ def handle(self, *args, **options): # migration for all the nodes even if they are not published diff = ( nodes.with_cte(main_trees_cte).filter( + deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes source_node_id__isnull=False, original_source_node_id__isnull=False, modified__lt=filter_date @@ -122,6 +110,10 @@ def handle(self, *args, **options): tree_id=tree_id, node_id=original_source_node_id ) + base_channel = Channel.objects.get(pk=item['channel_id']) + + to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) + if original_source_channel_id is not None and original_source_node.exists(): # original source node exists and its source fields dont match # update the base node @@ -134,5 +126,9 @@ def handle(self, *args, **options): if base_node[0].license != original_source_node[0].license: base_node[0].license = original_source_node[0].license base_node[0].save() + + if to_be_republished and base_channel.public: + # we would repbulsih the channel + pass else: continue From c02201be8436034aa47526c0f6b68405b6c212f5 Mon Sep 17 00:00:00 2001 From: ozer550 Date: Fri, 20 Sep 2024 16:37:35 +0530 Subject: [PATCH 05/10] add tests --- ..._rectify_source_field_migraiton_command.py | 186 ++++++++++++++++++ ...ify_incorrect_contentnode_source_fields.py | 28 +-- 2 files changed, 201 insertions(+), 13 deletions(-) create mode 100644 contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py diff --git a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py new file mode 100644 index 0000000000..9791801941 --- /dev/null +++ b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py @@ -0,0 +1,186 @@ +# DELETE THIS FILE AFTER RUNNING THE MIGRATIONSSS +import datetime +import uuid + +from django.db.models import Exists +from django.db.models import F +from django.db.models import OuterRef +from django.db.models import Q +from django.db.models import Value +from django.db.models.functions import Coalesce +from django.utils import timezone +from django_cte import With +from le_utils.constants import content_kinds + +from contentcuration.models import Channel +from contentcuration.models import ContentNode +from contentcuration.models import License +from contentcuration.tests import testdata +from contentcuration.tests.base import StudioAPITestCase +from contentcuration.utils.publish import publish_channel + + +class TestRectifyMigrationCommand(StudioAPITestCase): + + @classmethod + def setUpClass(cls): + super(TestRectifyMigrationCommand, cls).setUpClass() + + def tearDown(self): + super(TestRectifyMigrationCommand, self).tearDown() + + def setUp(self): + super(TestRectifyMigrationCommand, self).setUp() + self.original_channel = testdata.channel() + self.license_original = License.objects.all()[0] + self.original_contentnode = ContentNode.objects.create( + id=uuid.uuid4().hex, + title="Original Node", + parent=self.original_channel.main_tree, + license=self.license_original, + original_channel_id=None, + source_channel_id=None, + ) + self.user = testdata.user() + self.original_channel.editors.add(self.user) + self.client.force_authenticate(user=self.user) + + def run_migrations(self): + filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) + main_trees_cte = With( + ( + Channel.objects.filter( + main_tree__isnull=False + ) + .annotate(channel_id=F("id")) + .values("channel_id", "deleted", tree_id=F("main_tree__tree_id")) + ), + name="main_trees", + ) + + nodes = main_trees_cte.join( + ContentNode.objects.all(), + tree_id=main_trees_cte.col.tree_id, + ).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted) + + original_source_nodes = ( + nodes.with_cte(main_trees_cte) + .filter( + node_id=OuterRef("original_source_node_id"), + ) + .exclude( + tree_id=OuterRef("tree_id"), + ) + .annotate( + coalesced_provider=Coalesce("provider", Value("")), + coalesced_author=Coalesce("author", Value("")), + coalesced_aggregator=Coalesce("aggregator", Value("")), + coalesced_license_id=Coalesce("license_id", -1), + ) + ) + # We filter out according to last_modified date before this PR + # https://github.com/learningequality/studio/pull/4189 + # As we want to lossen up the public=True Filter and open the + # migration for all the nodes even if they are not published + diff = ( + nodes.with_cte(main_trees_cte).filter( + deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes + source_node_id__isnull=False, + original_source_node_id__isnull=False, + modified__lt=filter_date + # published=True, + ) + ).annotate( + coalesced_provider=Coalesce("provider", Value("")), + coalesced_author=Coalesce("author", Value("")), + coalesced_aggregator=Coalesce("aggregator", Value("")), + coalesced_license_id=Coalesce("license_id", -1), + ) + + diff_combined = diff.annotate( + original_source_node_f_changed=Exists( + original_source_nodes.filter( + ~Q(coalesced_provider=OuterRef("coalesced_provider")) + | ~Q(coalesced_author=OuterRef("coalesced_author")) + | ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator")) + | ~Q(coalesced_license_id=OuterRef("coalesced_license_id")) + ) + ) + ).filter(original_source_node_f_changed=True) + final_nodes = diff_combined.values( + "id", + "channel_id", + "original_channel_id", + "original_source_node_id", + "coalesced_provider", + "coalesced_author", + "coalesced_aggregator", + "coalesced_license_id", + "original_source_node_f_changed", + ).order_by() + + for item in final_nodes: + base_node = ContentNode.objects.get(pk=item["id"]) + original_source_channel_id = item["original_channel_id"] + original_source_node_id = item["original_source_node_id"] + tree_id = ( + Channel.objects.filter(pk=original_source_channel_id) + .values_list("main_tree__tree_id", flat=True) + .get() + ) + original_source_node = ContentNode.objects.filter( + tree_id=tree_id, node_id=original_source_node_id + ) + + base_channel = Channel.objects.get(pk=item['channel_id']) + + to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) + print("onga bonga 2") + print(base_channel.main_tree.get_family().filter(changed=True)) + print(to_be_republished) + if original_source_channel_id is not None and original_source_node.exists(): + # original source node exists and its source fields dont match + # update the base node + if base_node.author != original_source_node[0].author: + base_node.author = original_source_node[0].author + if base_node.provider != original_source_node[0].provider: + base_node.provider = original_source_node[0].provider + if base_node.aggregator != original_source_node[0].aggregator: + base_node.aggregator = original_source_node[0].aggregator + if base_node.license != original_source_node[0].license: + base_node.license = original_source_node[0].license + + base_node.save() + + if to_be_republished and base_channel.published: + # we would repbulish the channel + print("publishingg the channel!!") + publish_channel(self.user.id, base_channel.id) + else: + continue + + def test_two_node_case(self): + base_channel = testdata.channel() + license_changed = License.objects.all()[1] + base_node = ContentNode.objects.create( + id=uuid.uuid4().hex, + title="base contentnode", + parent=base_channel.main_tree, + kind_id=content_kinds.VIDEO, + license=license_changed, + original_channel_id=self.original_channel.id, + source_channel_id=self.original_channel.id, + source_node_id=self.original_contentnode.node_id, + original_source_node_id=self.original_contentnode.node_id, + ) + + ContentNode.objects.filter(pk=base_node.pk).update( + modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) + ) + # print("onga bongaa") + # print(base_node.changed) + base_node.changed = False + self.run_migrations() + base_node.refresh_from_db() + self.assertEqual(base_node.license, self.original_contentnode.license) + self.assertEqual(base_channel.published, True) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index 769c4f4618..d4c1298b7b 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -13,6 +13,7 @@ from contentcuration.models import Channel from contentcuration.models import ContentNode +from contentcuration.utils.publish import publish_channel logger = logging.getLogger(__file__) @@ -38,7 +39,7 @@ def handle(self, *args, **options): nodes = main_trees_cte.join( ContentNode.objects.all(), tree_id=main_trees_cte.col.tree_id, - ).annotate(channel_id=main_trees_cte.col.channel_id, deletd=main_trees_cte.col.deleted) + ).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted) original_source_nodes = ( nodes.with_cte(main_trees_cte) @@ -87,6 +88,7 @@ def handle(self, *args, **options): final_nodes = diff_combined.values( "id", + "channel_id", "original_channel_id", "original_source_node_id", "coalesced_provider", @@ -117,18 +119,18 @@ def handle(self, *args, **options): if original_source_channel_id is not None and original_source_node.exists(): # original source node exists and its source fields dont match # update the base node - if base_node[0].author != original_source_node[0].author: - base_node[0].author = original_source_node[0].author - if base_node[0].provider != original_source_node[0].provider: - base_node[0].provider = original_source_node[0].provider - if base_node[0].aggregator != original_source_node[0].aggregator: - base_node[0].aggregator = original_source_node[0].aggregator - if base_node[0].license != original_source_node[0].license: - base_node[0].license = original_source_node[0].license - base_node[0].save() + if base_node.author != original_source_node[0].author: + base_node.author = original_source_node[0].author + if base_node.provider != original_source_node[0].provider: + base_node.provider = original_source_node[0].provider + if base_node.aggregator != original_source_node[0].aggregator: + base_node.aggregator = original_source_node[0].aggregator + if base_node.license != original_source_node[0].license: + base_node.license = original_source_node[0].license + base_node.save() - if to_be_republished and base_channel.public: - # we would repbulsih the channel - pass + if to_be_republished and base_channel.published: + # we would repbulish the channel + publish_channel("some_id", base_channel.id) else: continue From 71ce33b1260cdc0f09273df626d12a8ae36107ed Mon Sep 17 00:00:00 2001 From: ozer550 Date: Mon, 23 Sep 2024 16:54:06 +0530 Subject: [PATCH 06/10] update tests --- ..._rectify_source_field_migraiton_command.py | 33 ++++++++++++------- ...ify_incorrect_contentnode_source_fields.py | 3 +- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py index 9791801941..53207a34a3 100644 --- a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py +++ b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py @@ -40,6 +40,7 @@ def setUp(self): license=self.license_original, original_channel_id=None, source_channel_id=None, + author="old author hehehe" ) self.user = testdata.user() self.original_channel.editors.add(self.user) @@ -97,6 +98,8 @@ def run_migrations(self): coalesced_license_id=Coalesce("license_id", -1), ) + print(diff) + diff_combined = diff.annotate( original_source_node_f_changed=Exists( original_source_nodes.filter( @@ -133,11 +136,8 @@ def run_migrations(self): ) base_channel = Channel.objects.get(pk=item['channel_id']) - to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) - print("onga bonga 2") - print(base_channel.main_tree.get_family().filter(changed=True)) - print(to_be_republished) + print("value of to be repblushied", to_be_republished) if original_source_channel_id is not None and original_source_node.exists(): # original source node exists and its source fields dont match # update the base node @@ -151,8 +151,7 @@ def run_migrations(self): base_node.license = original_source_node[0].license base_node.save() - - if to_be_republished and base_channel.published: + if to_be_republished and base_channel.public: # we would repbulish the channel print("publishingg the channel!!") publish_channel(self.user.id, base_channel.id) @@ -161,6 +160,8 @@ def run_migrations(self): def test_two_node_case(self): base_channel = testdata.channel() + base_channel.public = True + base_channel.save() license_changed = License.objects.all()[1] base_node = ContentNode.objects.create( id=uuid.uuid4().hex, @@ -172,15 +173,23 @@ def test_two_node_case(self): source_channel_id=self.original_channel.id, source_node_id=self.original_contentnode.node_id, original_source_node_id=self.original_contentnode.node_id, + author="new herhe" ) + # publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + node.changed = False + node.save() ContentNode.objects.filter(pk=base_node.pk).update( modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) ) - # print("onga bongaa") - # print(base_node.changed) - base_node.changed = False + + # base_channel.refresh_from_db() + # print(base_channel.main_tree.get_family().filter(changed=True)) + self.run_migrations() - base_node.refresh_from_db() - self.assertEqual(base_node.license, self.original_contentnode.license) - self.assertEqual(base_channel.published, True) + updated_base_node = ContentNode.objects.get(pk=base_node.pk) + self.assertEqual(updated_base_node.license, self.original_contentnode.license) + self.assertEqual(updated_base_node.author, self.original_contentnode.author) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + # assert(3==2) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index d4c1298b7b..9df74f9f8b 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -128,8 +128,7 @@ def handle(self, *args, **options): if base_node.license != original_source_node[0].license: base_node.license = original_source_node[0].license base_node.save() - - if to_be_republished and base_channel.published: + if to_be_republished and base_channel.public: # we would repbulish the channel publish_channel("some_id", base_channel.id) else: From 37b3181c85b49642f60efbbff72bfd677b422bfc Mon Sep 17 00:00:00 2001 From: ozer550 Date: Tue, 24 Sep 2024 16:31:30 +0530 Subject: [PATCH 07/10] add implicit and explicit tests --- ..._rectify_source_field_migraiton_command.py | 241 +++++++++--------- ...ify_incorrect_contentnode_source_fields.py | 30 ++- 2 files changed, 140 insertions(+), 131 deletions(-) diff --git a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py index 53207a34a3..a563fb712a 100644 --- a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py +++ b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py @@ -2,14 +2,8 @@ import datetime import uuid -from django.db.models import Exists -from django.db.models import F -from django.db.models import OuterRef -from django.db.models import Q -from django.db.models import Value -from django.db.models.functions import Coalesce +from django.core.management import call_command from django.utils import timezone -from django_cte import With from le_utils.constants import content_kinds from contentcuration.models import Channel @@ -40,156 +34,149 @@ def setUp(self): license=self.license_original, original_channel_id=None, source_channel_id=None, - author="old author hehehe" + author="old author" ) self.user = testdata.user() self.original_channel.editors.add(self.user) self.client.force_authenticate(user=self.user) - def run_migrations(self): - filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) - main_trees_cte = With( - ( - Channel.objects.filter( - main_tree__isnull=False - ) - .annotate(channel_id=F("id")) - .values("channel_id", "deleted", tree_id=F("main_tree__tree_id")) - ), - name="main_trees", - ) - - nodes = main_trees_cte.join( - ContentNode.objects.all(), - tree_id=main_trees_cte.col.tree_id, - ).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted) - - original_source_nodes = ( - nodes.with_cte(main_trees_cte) - .filter( - node_id=OuterRef("original_source_node_id"), - ) - .exclude( - tree_id=OuterRef("tree_id"), - ) - .annotate( - coalesced_provider=Coalesce("provider", Value("")), - coalesced_author=Coalesce("author", Value("")), - coalesced_aggregator=Coalesce("aggregator", Value("")), - coalesced_license_id=Coalesce("license_id", -1), - ) - ) - # We filter out according to last_modified date before this PR - # https://github.com/learningequality/studio/pull/4189 - # As we want to lossen up the public=True Filter and open the - # migration for all the nodes even if they are not published - diff = ( - nodes.with_cte(main_trees_cte).filter( - deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes - source_node_id__isnull=False, - original_source_node_id__isnull=False, - modified__lt=filter_date - # published=True, - ) - ).annotate( - coalesced_provider=Coalesce("provider", Value("")), - coalesced_author=Coalesce("author", Value("")), - coalesced_aggregator=Coalesce("aggregator", Value("")), - coalesced_license_id=Coalesce("license_id", -1), - ) - - print(diff) - - diff_combined = diff.annotate( - original_source_node_f_changed=Exists( - original_source_nodes.filter( - ~Q(coalesced_provider=OuterRef("coalesced_provider")) - | ~Q(coalesced_author=OuterRef("coalesced_author")) - | ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator")) - | ~Q(coalesced_license_id=OuterRef("coalesced_license_id")) - ) - ) - ).filter(original_source_node_f_changed=True) - final_nodes = diff_combined.values( - "id", - "channel_id", - "original_channel_id", - "original_source_node_id", - "coalesced_provider", - "coalesced_author", - "coalesced_aggregator", - "coalesced_license_id", - "original_source_node_f_changed", - ).order_by() - - for item in final_nodes: - base_node = ContentNode.objects.get(pk=item["id"]) - original_source_channel_id = item["original_channel_id"] - original_source_node_id = item["original_source_node_id"] - tree_id = ( - Channel.objects.filter(pk=original_source_channel_id) - .values_list("main_tree__tree_id", flat=True) - .get() - ) - original_source_node = ContentNode.objects.filter( - tree_id=tree_id, node_id=original_source_node_id - ) - - base_channel = Channel.objects.get(pk=item['channel_id']) - to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) - print("value of to be repblushied", to_be_republished) - if original_source_channel_id is not None and original_source_node.exists(): - # original source node exists and its source fields dont match - # update the base node - if base_node.author != original_source_node[0].author: - base_node.author = original_source_node[0].author - if base_node.provider != original_source_node[0].provider: - base_node.provider = original_source_node[0].provider - if base_node.aggregator != original_source_node[0].aggregator: - base_node.aggregator = original_source_node[0].aggregator - if base_node.license != original_source_node[0].license: - base_node.license = original_source_node[0].license - - base_node.save() - if to_be_republished and base_channel.public: - # we would repbulish the channel - print("publishingg the channel!!") - publish_channel(self.user.id, base_channel.id) - else: - continue - - def test_two_node_case(self): + def create_base_channel_and_contentnode(self, source_contentnode, source_channel): base_channel = testdata.channel() base_channel.public = True base_channel.save() - license_changed = License.objects.all()[1] + license_changed = License.objects.all()[2] base_node = ContentNode.objects.create( id=uuid.uuid4().hex, title="base contentnode", parent=base_channel.main_tree, kind_id=content_kinds.VIDEO, + original_channel_id=self.original_channel.id, + original_source_node_id=self.original_contentnode.node_id, + source_channel_id=source_channel.id, + source_node_id=source_contentnode.node_id, + author="source author", + license=license_changed, + ) + return base_node, base_channel + + def create_source_channel_and_contentnode(self): + source_channel = testdata.channel() + source_channel.public = True + source_channel.save() + license_changed = License.objects.all()[1] + source_node = ContentNode.objects.create( + id=uuid.uuid4().hex, + title="base contentnode", + parent=source_channel.main_tree, + kind_id=content_kinds.VIDEO, license=license_changed, original_channel_id=self.original_channel.id, source_channel_id=self.original_channel.id, source_node_id=self.original_contentnode.node_id, original_source_node_id=self.original_contentnode.node_id, - author="new herhe" + author="source author", ) - # publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + return source_node, source_channel + + def run_migrations(self): + call_command('rectify_incorrect_contentnode_source_fields', user_id=self.user.id, is_test=True) + + def test_two_node_case(self): + base_node, base_channel = self.create_base_channel_and_contentnode(self.original_contentnode, self.original_channel) + + publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + # main_tree node still has changed=true even after the publish for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + print(node.id) + print(node.id == base_channel.main_tree.id) + print("checking if the node is complete or not ", node.complete) node.changed = False + # This should probably again change the changed=true but suprisingly it doesnot + # Meaning the changed boolean doesnot change for the main_tree no matter what we do + # through ContentNode model methods like save. node.save() ContentNode.objects.filter(pk=base_node.pk).update( modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) ) - # base_channel.refresh_from_db() - # print(base_channel.main_tree.get_family().filter(changed=True)) + self.run_migrations() + updated_base_node = ContentNode.objects.get(pk=base_node.pk) + self.assertEqual(updated_base_node.license, self.original_contentnode.license) + self.assertEqual(updated_base_node.author, self.original_contentnode.author) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + + def test_three_node_case_implicit(self): + source_node, source_channel = self.create_source_channel_and_contentnode() + base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) + source_node.aggregator = "Nami" + source_node.save() + # Implicit case + base_node.author = source_node.author + base_node.license = source_node.license + base_node.aggregator = source_node.aggregator + base_node.save() + + publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + print(node.id) + print(node.id == base_channel.main_tree.id) + print("checking if the node is complete or not ", node.complete) + node.changed = False + node.save() + + ContentNode.objects.filter(pk=base_node.pk).update( + modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) + ) + + ContentNode.objects.filter(pk=source_node.pk).update( + modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) + ) + + self.run_migrations() + updated_base_node = ContentNode.objects.get(pk=base_node.pk) + updated_source_node = ContentNode.objects.get(pk=source_node.pk) + self.assertEqual(updated_base_node.license, self.original_contentnode.license) + self.assertEqual(updated_base_node.author, self.original_contentnode.author) + self.assertEqual(updated_source_node.license, self.original_contentnode.license) + self.assertEqual(updated_source_node.author, self.original_contentnode.author) + self.assertEqual(updated_source_node.aggregator, self.original_contentnode.aggregator) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + + def test_three_node_case_explicit(self): + source_node, source_channel = self.create_source_channel_and_contentnode() + base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) + source_node.provider = "luffy" + base_node.provider = "zoro" + base_node.save() + source_node.save() + publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + print(node.id) + print(node.id == base_channel.main_tree.id) + print("checking if the node is complete or not ", node.complete) + node.changed = False + node.save() + + ContentNode.objects.filter(pk=base_node.pk).update( + modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) + ) + + ContentNode.objects.filter(pk=source_node.pk).update( + modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) + ) self.run_migrations() updated_base_node = ContentNode.objects.get(pk=base_node.pk) + updated_source_node = ContentNode.objects.get(pk=source_node.pk) self.assertEqual(updated_base_node.license, self.original_contentnode.license) self.assertEqual(updated_base_node.author, self.original_contentnode.author) + self.assertEqual(updated_base_node.provider, self.original_contentnode.provider) + self.assertEqual(updated_source_node.license, self.original_contentnode.license) + self.assertEqual(updated_source_node.author, self.original_contentnode.author) + self.assertEqual(updated_source_node.provider, self.original_contentnode.provider) self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) - # assert(3==2) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index 9df74f9f8b..f8af43fdeb 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -19,11 +19,31 @@ class Command(BaseCommand): + + def add_arguments(self, parser): + + parser.add_argument( + '--is_test', + action='store_true', + help="Indicate if the command is running in a test environment.", + ) + + parser.add_argument( + '--user_id', + type=int, + help="User ID for the operation", + required=True + ) + def handle(self, *args, **options): # Filter Date : July 9, 2023 # Link https://github.com/learningequality/studio/pull/4189 # The PR date for the frontend change is July 10, 2023 # we would set the filter day one day back just to be sure + + is_test = options['is_test'] + user_id = options['user_id'] + filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) main_trees_cte = With( ( @@ -66,7 +86,6 @@ def handle(self, *args, **options): source_node_id__isnull=False, original_source_node_id__isnull=False, modified__lt=filter_date - # published=True, ) ).annotate( coalesced_provider=Coalesce("provider", Value("")), @@ -74,7 +93,6 @@ def handle(self, *args, **options): coalesced_aggregator=Coalesce("aggregator", Value("")), coalesced_license_id=Coalesce("license_id", -1), ) - diff_combined = diff.annotate( original_source_node_f_changed=Exists( original_source_nodes.filter( @@ -128,8 +146,12 @@ def handle(self, *args, **options): if base_node.license != original_source_node[0].license: base_node.license = original_source_node[0].license base_node.save() - if to_be_republished and base_channel.public: + if to_be_republished and base_channel.last_published is not None: # we would repbulish the channel - publish_channel("some_id", base_channel.id) + # Adding for testing + if is_test: + publish_channel(user_id, base_channel.id) + else: + publish_channel("SOME ID", base_channel.id, base_channel.id) else: continue From 9ad8ce79ccfd68efcf2655e2ab6400cbbb61ab9b Mon Sep 17 00:00:00 2001 From: ozer550 Date: Wed, 25 Sep 2024 12:33:00 +0530 Subject: [PATCH 08/10] cache channel_ids --- ...tify_incorrect_contentnode_source_fields.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index f8af43fdeb..2b950fec9e 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -116,6 +116,8 @@ def handle(self, *args, **options): "original_source_node_f_changed", ).order_by() + channel_ids_to_republish = set() + for item in final_nodes: base_node = ContentNode.objects.get(pk=item["id"]) @@ -146,12 +148,14 @@ def handle(self, *args, **options): if base_node.license != original_source_node[0].license: base_node.license = original_source_node[0].license base_node.save() + if to_be_republished and base_channel.last_published is not None: - # we would repbulish the channel - # Adding for testing - if is_test: - publish_channel(user_id, base_channel.id) - else: - publish_channel("SOME ID", base_channel.id, base_channel.id) + channel_ids_to_republish.add(base_channel.id) + + # we would repbulish the channel + # Adding for testing + for channel_id in channel_ids_to_republish: + if is_test: + publish_channel(user_id, channel_id) else: - continue + publish_channel("SOME ID", channel_id, channel_id) From a1aaedd3c18655bc6adbf6839d971a8df38c3b6a Mon Sep 17 00:00:00 2001 From: ozer550 Date: Wed, 25 Sep 2024 12:58:37 +0530 Subject: [PATCH 09/10] add admin user_id --- .../rectify_incorrect_contentnode_source_fields.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index 2b950fec9e..bf56de9dc1 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -13,6 +13,7 @@ from contentcuration.models import Channel from contentcuration.models import ContentNode +from contentcuration.models import User from contentcuration.utils.publish import publish_channel logger = logging.getLogger(__file__) @@ -44,6 +45,9 @@ def handle(self, *args, **options): is_test = options['is_test'] user_id = options['user_id'] + if not is_test: + user_id = User.objects.filter(email='channeladmin@learningequality.org').values('pk') + filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) main_trees_cte = With( ( @@ -155,7 +159,4 @@ def handle(self, *args, **options): # we would repbulish the channel # Adding for testing for channel_id in channel_ids_to_republish: - if is_test: - publish_channel(user_id, channel_id) - else: - publish_channel("SOME ID", channel_id, channel_id) + publish_channel(user_id, channel_id) From 0f1518fef8a5016e299a46f6bb021925d089852c Mon Sep 17 00:00:00 2001 From: ozer550 Date: Thu, 26 Sep 2024 21:49:25 +0530 Subject: [PATCH 10/10] return proper id instead of dict --- .../commands/rectify_incorrect_contentnode_source_fields.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index bf56de9dc1..2df2aad4bb 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -46,7 +46,7 @@ def handle(self, *args, **options): user_id = options['user_id'] if not is_test: - user_id = User.objects.filter(email='channeladmin@learningequality.org').values('pk') + user_id = User.objects.get(email='channeladmin@learningequality.org').pk filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) main_trees_cte = With( @@ -157,6 +157,5 @@ def handle(self, *args, **options): channel_ids_to_republish.add(base_channel.id) # we would repbulish the channel - # Adding for testing for channel_id in channel_ids_to_republish: publish_channel(user_id, channel_id)