-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4720 from ozer550/recorrect-incorrectly-changed-s…
…ource-fields Run migrations to fix incorrect source fields of contentnodes
- Loading branch information
Showing
3 changed files
with
344 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
182 changes: 182 additions & 0 deletions
182
contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
# DELETE THIS FILE AFTER RUNNING THE MIGRATIONSSS | ||
import datetime | ||
import uuid | ||
|
||
from django.core.management import call_command | ||
from django.utils import timezone | ||
from le_utils.constants import content_kinds | ||
|
||
from contentcuration.models import Channel | ||
from contentcuration.models import ContentNode | ||
from contentcuration.models import License | ||
from contentcuration.tests import testdata | ||
from contentcuration.tests.base import StudioAPITestCase | ||
from contentcuration.utils.publish import publish_channel | ||
|
||
|
||
class TestRectifyMigrationCommand(StudioAPITestCase): | ||
|
||
@classmethod | ||
def setUpClass(cls): | ||
super(TestRectifyMigrationCommand, cls).setUpClass() | ||
|
||
def tearDown(self): | ||
super(TestRectifyMigrationCommand, self).tearDown() | ||
|
||
def setUp(self): | ||
super(TestRectifyMigrationCommand, self).setUp() | ||
self.original_channel = testdata.channel() | ||
self.license_original = License.objects.all()[0] | ||
self.original_contentnode = ContentNode.objects.create( | ||
id=uuid.uuid4().hex, | ||
title="Original Node", | ||
parent=self.original_channel.main_tree, | ||
license=self.license_original, | ||
original_channel_id=None, | ||
source_channel_id=None, | ||
author="old author" | ||
) | ||
self.user = testdata.user() | ||
self.original_channel.editors.add(self.user) | ||
self.client.force_authenticate(user=self.user) | ||
|
||
def create_base_channel_and_contentnode(self, source_contentnode, source_channel): | ||
base_channel = testdata.channel() | ||
base_channel.public = True | ||
base_channel.save() | ||
license_changed = License.objects.all()[2] | ||
base_node = ContentNode.objects.create( | ||
id=uuid.uuid4().hex, | ||
title="base contentnode", | ||
parent=base_channel.main_tree, | ||
kind_id=content_kinds.VIDEO, | ||
original_channel_id=self.original_channel.id, | ||
original_source_node_id=self.original_contentnode.node_id, | ||
source_channel_id=source_channel.id, | ||
source_node_id=source_contentnode.node_id, | ||
author="source author", | ||
license=license_changed, | ||
) | ||
return base_node, base_channel | ||
|
||
def create_source_channel_and_contentnode(self): | ||
source_channel = testdata.channel() | ||
source_channel.public = True | ||
source_channel.save() | ||
license_changed = License.objects.all()[1] | ||
source_node = ContentNode.objects.create( | ||
id=uuid.uuid4().hex, | ||
title="base contentnode", | ||
parent=source_channel.main_tree, | ||
kind_id=content_kinds.VIDEO, | ||
license=license_changed, | ||
original_channel_id=self.original_channel.id, | ||
source_channel_id=self.original_channel.id, | ||
source_node_id=self.original_contentnode.node_id, | ||
original_source_node_id=self.original_contentnode.node_id, | ||
author="source author", | ||
) | ||
|
||
return source_node, source_channel | ||
|
||
def run_migrations(self): | ||
call_command('rectify_incorrect_contentnode_source_fields', user_id=self.user.id, is_test=True) | ||
|
||
def test_two_node_case(self): | ||
base_node, base_channel = self.create_base_channel_and_contentnode(self.original_contentnode, self.original_channel) | ||
|
||
publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) | ||
|
||
# main_tree node still has changed=true even after the publish | ||
for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): | ||
print(node.id) | ||
print(node.id == base_channel.main_tree.id) | ||
print("checking if the node is complete or not ", node.complete) | ||
node.changed = False | ||
# This should probably again change the changed=true but suprisingly it doesnot | ||
# Meaning the changed boolean doesnot change for the main_tree no matter what we do | ||
# through ContentNode model methods like save. | ||
node.save() | ||
|
||
ContentNode.objects.filter(pk=base_node.pk).update( | ||
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) | ||
) | ||
|
||
self.run_migrations() | ||
updated_base_node = ContentNode.objects.get(pk=base_node.pk) | ||
self.assertEqual(updated_base_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_base_node.author, self.original_contentnode.author) | ||
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) | ||
|
||
def test_three_node_case_implicit(self): | ||
source_node, source_channel = self.create_source_channel_and_contentnode() | ||
base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) | ||
source_node.aggregator = "Nami" | ||
source_node.save() | ||
# Implicit case | ||
base_node.author = source_node.author | ||
base_node.license = source_node.license | ||
base_node.aggregator = source_node.aggregator | ||
base_node.save() | ||
|
||
publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) | ||
|
||
for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): | ||
print(node.id) | ||
print(node.id == base_channel.main_tree.id) | ||
print("checking if the node is complete or not ", node.complete) | ||
node.changed = False | ||
node.save() | ||
|
||
ContentNode.objects.filter(pk=base_node.pk).update( | ||
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) | ||
) | ||
|
||
ContentNode.objects.filter(pk=source_node.pk).update( | ||
modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) | ||
) | ||
|
||
self.run_migrations() | ||
updated_base_node = ContentNode.objects.get(pk=base_node.pk) | ||
updated_source_node = ContentNode.objects.get(pk=source_node.pk) | ||
self.assertEqual(updated_base_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_base_node.author, self.original_contentnode.author) | ||
self.assertEqual(updated_source_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_source_node.author, self.original_contentnode.author) | ||
self.assertEqual(updated_source_node.aggregator, self.original_contentnode.aggregator) | ||
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) | ||
|
||
def test_three_node_case_explicit(self): | ||
source_node, source_channel = self.create_source_channel_and_contentnode() | ||
base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) | ||
source_node.provider = "luffy" | ||
base_node.provider = "zoro" | ||
base_node.save() | ||
source_node.save() | ||
publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) | ||
|
||
for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): | ||
print(node.id) | ||
print(node.id == base_channel.main_tree.id) | ||
print("checking if the node is complete or not ", node.complete) | ||
node.changed = False | ||
node.save() | ||
|
||
ContentNode.objects.filter(pk=base_node.pk).update( | ||
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) | ||
) | ||
|
||
ContentNode.objects.filter(pk=source_node.pk).update( | ||
modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) | ||
) | ||
|
||
self.run_migrations() | ||
updated_base_node = ContentNode.objects.get(pk=base_node.pk) | ||
updated_source_node = ContentNode.objects.get(pk=source_node.pk) | ||
self.assertEqual(updated_base_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_base_node.author, self.original_contentnode.author) | ||
self.assertEqual(updated_base_node.provider, self.original_contentnode.provider) | ||
self.assertEqual(updated_source_node.license, self.original_contentnode.license) | ||
self.assertEqual(updated_source_node.author, self.original_contentnode.author) | ||
self.assertEqual(updated_source_node.provider, self.original_contentnode.provider) | ||
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) |
161 changes: 161 additions & 0 deletions
161
...uration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
import datetime | ||
import logging | ||
|
||
from django.core.management.base import BaseCommand | ||
from django.db.models import Exists | ||
from django.db.models import F | ||
from django.db.models import OuterRef | ||
from django.db.models import Q | ||
from django.db.models import Value | ||
from django.db.models.functions import Coalesce | ||
from django.utils import timezone | ||
from django_cte import With | ||
|
||
from contentcuration.models import Channel | ||
from contentcuration.models import ContentNode | ||
from contentcuration.models import User | ||
from contentcuration.utils.publish import publish_channel | ||
|
||
logger = logging.getLogger(__file__) | ||
|
||
|
||
class Command(BaseCommand): | ||
|
||
def add_arguments(self, parser): | ||
|
||
parser.add_argument( | ||
'--is_test', | ||
action='store_true', | ||
help="Indicate if the command is running in a test environment.", | ||
) | ||
|
||
parser.add_argument( | ||
'--user_id', | ||
type=int, | ||
help="User ID for the operation", | ||
required=True | ||
) | ||
|
||
def handle(self, *args, **options): | ||
# Filter Date : July 9, 2023 | ||
# Link https://github.com/learningequality/studio/pull/4189 | ||
# The PR date for the frontend change is July 10, 2023 | ||
# we would set the filter day one day back just to be sure | ||
|
||
is_test = options['is_test'] | ||
user_id = options['user_id'] | ||
|
||
if not is_test: | ||
user_id = User.objects.get(email='[email protected]').pk | ||
|
||
filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) | ||
main_trees_cte = With( | ||
( | ||
Channel.objects.filter( | ||
main_tree__isnull=False | ||
) | ||
.annotate(channel_id=F("id")) | ||
.values("channel_id", "deleted", tree_id=F("main_tree__tree_id")) | ||
), | ||
name="main_trees", | ||
) | ||
|
||
nodes = main_trees_cte.join( | ||
ContentNode.objects.all(), | ||
tree_id=main_trees_cte.col.tree_id, | ||
).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted) | ||
|
||
original_source_nodes = ( | ||
nodes.with_cte(main_trees_cte) | ||
.filter( | ||
node_id=OuterRef("original_source_node_id"), | ||
) | ||
.exclude( | ||
tree_id=OuterRef("tree_id"), | ||
) | ||
.annotate( | ||
coalesced_provider=Coalesce("provider", Value("")), | ||
coalesced_author=Coalesce("author", Value("")), | ||
coalesced_aggregator=Coalesce("aggregator", Value("")), | ||
coalesced_license_id=Coalesce("license_id", -1), | ||
) | ||
) | ||
# We filter out according to last_modified date before this PR | ||
# https://github.com/learningequality/studio/pull/4189 | ||
# As we want to lossen up the public=True Filter and open the | ||
# migration for all the nodes even if they are not published | ||
diff = ( | ||
nodes.with_cte(main_trees_cte).filter( | ||
deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes | ||
source_node_id__isnull=False, | ||
original_source_node_id__isnull=False, | ||
modified__lt=filter_date | ||
) | ||
).annotate( | ||
coalesced_provider=Coalesce("provider", Value("")), | ||
coalesced_author=Coalesce("author", Value("")), | ||
coalesced_aggregator=Coalesce("aggregator", Value("")), | ||
coalesced_license_id=Coalesce("license_id", -1), | ||
) | ||
diff_combined = diff.annotate( | ||
original_source_node_f_changed=Exists( | ||
original_source_nodes.filter( | ||
~Q(coalesced_provider=OuterRef("coalesced_provider")) | ||
| ~Q(coalesced_author=OuterRef("coalesced_author")) | ||
| ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator")) | ||
| ~Q(coalesced_license_id=OuterRef("coalesced_license_id")) | ||
) | ||
) | ||
).filter(original_source_node_f_changed=True) | ||
|
||
final_nodes = diff_combined.values( | ||
"id", | ||
"channel_id", | ||
"original_channel_id", | ||
"original_source_node_id", | ||
"coalesced_provider", | ||
"coalesced_author", | ||
"coalesced_aggregator", | ||
"coalesced_license_id", | ||
"original_source_node_f_changed", | ||
).order_by() | ||
|
||
channel_ids_to_republish = set() | ||
|
||
for item in final_nodes: | ||
base_node = ContentNode.objects.get(pk=item["id"]) | ||
|
||
original_source_channel_id = item["original_channel_id"] | ||
original_source_node_id = item["original_source_node_id"] | ||
tree_id = ( | ||
Channel.objects.filter(pk=original_source_channel_id) | ||
.values_list("main_tree__tree_id", flat=True) | ||
.get() | ||
) | ||
original_source_node = ContentNode.objects.filter( | ||
tree_id=tree_id, node_id=original_source_node_id | ||
) | ||
|
||
base_channel = Channel.objects.get(pk=item['channel_id']) | ||
|
||
to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) | ||
|
||
if original_source_channel_id is not None and original_source_node.exists(): | ||
# original source node exists and its source fields dont match | ||
# update the base node | ||
if base_node.author != original_source_node[0].author: | ||
base_node.author = original_source_node[0].author | ||
if base_node.provider != original_source_node[0].provider: | ||
base_node.provider = original_source_node[0].provider | ||
if base_node.aggregator != original_source_node[0].aggregator: | ||
base_node.aggregator = original_source_node[0].aggregator | ||
if base_node.license != original_source_node[0].license: | ||
base_node.license = original_source_node[0].license | ||
base_node.save() | ||
|
||
if to_be_republished and base_channel.last_published is not None: | ||
channel_ids_to_republish.add(base_channel.id) | ||
|
||
# we would repbulish the channel | ||
for channel_id in channel_ids_to_republish: | ||
publish_channel(user_id, channel_id) |