Skip to content

Commit

Permalink
Merge pull request #4720 from ozer550/recorrect-incorrectly-changed-s…
Browse files Browse the repository at this point in the history
…ource-fields

Run migrations to fix incorrect source fields of contentnodes
  • Loading branch information
akolson authored Sep 27, 2024
2 parents 7bfe2ae + 0f1518f commit 6f6daf2
Show file tree
Hide file tree
Showing 3 changed files with 344 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ migrate:
# 4) Remove the management command from this `deploy-migrate` recipe
# 5) Repeat!
deploy-migrate:
echo "Nothing to do here!"
python contentcuration/manage.py rectify_incorrect_contentnode_source_fields

contentnodegc:
python contentcuration/manage.py garbage_collect
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# DELETE THIS FILE AFTER RUNNING THE MIGRATIONSSS
import datetime
import uuid

from django.core.management import call_command
from django.utils import timezone
from le_utils.constants import content_kinds

from contentcuration.models import Channel
from contentcuration.models import ContentNode
from contentcuration.models import License
from contentcuration.tests import testdata
from contentcuration.tests.base import StudioAPITestCase
from contentcuration.utils.publish import publish_channel


class TestRectifyMigrationCommand(StudioAPITestCase):

@classmethod
def setUpClass(cls):
super(TestRectifyMigrationCommand, cls).setUpClass()

def tearDown(self):
super(TestRectifyMigrationCommand, self).tearDown()

def setUp(self):
super(TestRectifyMigrationCommand, self).setUp()
self.original_channel = testdata.channel()
self.license_original = License.objects.all()[0]
self.original_contentnode = ContentNode.objects.create(
id=uuid.uuid4().hex,
title="Original Node",
parent=self.original_channel.main_tree,
license=self.license_original,
original_channel_id=None,
source_channel_id=None,
author="old author"
)
self.user = testdata.user()
self.original_channel.editors.add(self.user)
self.client.force_authenticate(user=self.user)

def create_base_channel_and_contentnode(self, source_contentnode, source_channel):
base_channel = testdata.channel()
base_channel.public = True
base_channel.save()
license_changed = License.objects.all()[2]
base_node = ContentNode.objects.create(
id=uuid.uuid4().hex,
title="base contentnode",
parent=base_channel.main_tree,
kind_id=content_kinds.VIDEO,
original_channel_id=self.original_channel.id,
original_source_node_id=self.original_contentnode.node_id,
source_channel_id=source_channel.id,
source_node_id=source_contentnode.node_id,
author="source author",
license=license_changed,
)
return base_node, base_channel

def create_source_channel_and_contentnode(self):
source_channel = testdata.channel()
source_channel.public = True
source_channel.save()
license_changed = License.objects.all()[1]
source_node = ContentNode.objects.create(
id=uuid.uuid4().hex,
title="base contentnode",
parent=source_channel.main_tree,
kind_id=content_kinds.VIDEO,
license=license_changed,
original_channel_id=self.original_channel.id,
source_channel_id=self.original_channel.id,
source_node_id=self.original_contentnode.node_id,
original_source_node_id=self.original_contentnode.node_id,
author="source author",
)

return source_node, source_channel

def run_migrations(self):
call_command('rectify_incorrect_contentnode_source_fields', user_id=self.user.id, is_test=True)

def test_two_node_case(self):
base_node, base_channel = self.create_base_channel_and_contentnode(self.original_contentnode, self.original_channel)

publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id)

# main_tree node still has changed=true even after the publish
for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True):
print(node.id)
print(node.id == base_channel.main_tree.id)
print("checking if the node is complete or not ", node.complete)
node.changed = False
# This should probably again change the changed=true but suprisingly it doesnot
# Meaning the changed boolean doesnot change for the main_tree no matter what we do
# through ContentNode model methods like save.
node.save()

ContentNode.objects.filter(pk=base_node.pk).update(
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc)
)

self.run_migrations()
updated_base_node = ContentNode.objects.get(pk=base_node.pk)
self.assertEqual(updated_base_node.license, self.original_contentnode.license)
self.assertEqual(updated_base_node.author, self.original_contentnode.author)
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False)

def test_three_node_case_implicit(self):
source_node, source_channel = self.create_source_channel_and_contentnode()
base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel)
source_node.aggregator = "Nami"
source_node.save()
# Implicit case
base_node.author = source_node.author
base_node.license = source_node.license
base_node.aggregator = source_node.aggregator
base_node.save()

publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id)

for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True):
print(node.id)
print(node.id == base_channel.main_tree.id)
print("checking if the node is complete or not ", node.complete)
node.changed = False
node.save()

ContentNode.objects.filter(pk=base_node.pk).update(
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc)
)

ContentNode.objects.filter(pk=source_node.pk).update(
modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc)
)

self.run_migrations()
updated_base_node = ContentNode.objects.get(pk=base_node.pk)
updated_source_node = ContentNode.objects.get(pk=source_node.pk)
self.assertEqual(updated_base_node.license, self.original_contentnode.license)
self.assertEqual(updated_base_node.author, self.original_contentnode.author)
self.assertEqual(updated_source_node.license, self.original_contentnode.license)
self.assertEqual(updated_source_node.author, self.original_contentnode.author)
self.assertEqual(updated_source_node.aggregator, self.original_contentnode.aggregator)
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False)

def test_three_node_case_explicit(self):
source_node, source_channel = self.create_source_channel_and_contentnode()
base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel)
source_node.provider = "luffy"
base_node.provider = "zoro"
base_node.save()
source_node.save()
publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id)

for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True):
print(node.id)
print(node.id == base_channel.main_tree.id)
print("checking if the node is complete or not ", node.complete)
node.changed = False
node.save()

ContentNode.objects.filter(pk=base_node.pk).update(
modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc)
)

ContentNode.objects.filter(pk=source_node.pk).update(
modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc)
)

self.run_migrations()
updated_base_node = ContentNode.objects.get(pk=base_node.pk)
updated_source_node = ContentNode.objects.get(pk=source_node.pk)
self.assertEqual(updated_base_node.license, self.original_contentnode.license)
self.assertEqual(updated_base_node.author, self.original_contentnode.author)
self.assertEqual(updated_base_node.provider, self.original_contentnode.provider)
self.assertEqual(updated_source_node.license, self.original_contentnode.license)
self.assertEqual(updated_source_node.author, self.original_contentnode.author)
self.assertEqual(updated_source_node.provider, self.original_contentnode.provider)
self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False)
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import datetime
import logging

from django.core.management.base import BaseCommand
from django.db.models import Exists
from django.db.models import F
from django.db.models import OuterRef
from django.db.models import Q
from django.db.models import Value
from django.db.models.functions import Coalesce
from django.utils import timezone
from django_cte import With

from contentcuration.models import Channel
from contentcuration.models import ContentNode
from contentcuration.models import User
from contentcuration.utils.publish import publish_channel

logger = logging.getLogger(__file__)


class Command(BaseCommand):

def add_arguments(self, parser):

parser.add_argument(
'--is_test',
action='store_true',
help="Indicate if the command is running in a test environment.",
)

parser.add_argument(
'--user_id',
type=int,
help="User ID for the operation",
required=True
)

def handle(self, *args, **options):
# Filter Date : July 9, 2023
# Link https://github.com/learningequality/studio/pull/4189
# The PR date for the frontend change is July 10, 2023
# we would set the filter day one day back just to be sure

is_test = options['is_test']
user_id = options['user_id']

if not is_test:
user_id = User.objects.get(email='[email protected]').pk

filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc)
main_trees_cte = With(
(
Channel.objects.filter(
main_tree__isnull=False
)
.annotate(channel_id=F("id"))
.values("channel_id", "deleted", tree_id=F("main_tree__tree_id"))
),
name="main_trees",
)

nodes = main_trees_cte.join(
ContentNode.objects.all(),
tree_id=main_trees_cte.col.tree_id,
).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted)

original_source_nodes = (
nodes.with_cte(main_trees_cte)
.filter(
node_id=OuterRef("original_source_node_id"),
)
.exclude(
tree_id=OuterRef("tree_id"),
)
.annotate(
coalesced_provider=Coalesce("provider", Value("")),
coalesced_author=Coalesce("author", Value("")),
coalesced_aggregator=Coalesce("aggregator", Value("")),
coalesced_license_id=Coalesce("license_id", -1),
)
)
# We filter out according to last_modified date before this PR
# https://github.com/learningequality/studio/pull/4189
# As we want to lossen up the public=True Filter and open the
# migration for all the nodes even if they are not published
diff = (
nodes.with_cte(main_trees_cte).filter(
deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes
source_node_id__isnull=False,
original_source_node_id__isnull=False,
modified__lt=filter_date
)
).annotate(
coalesced_provider=Coalesce("provider", Value("")),
coalesced_author=Coalesce("author", Value("")),
coalesced_aggregator=Coalesce("aggregator", Value("")),
coalesced_license_id=Coalesce("license_id", -1),
)
diff_combined = diff.annotate(
original_source_node_f_changed=Exists(
original_source_nodes.filter(
~Q(coalesced_provider=OuterRef("coalesced_provider"))
| ~Q(coalesced_author=OuterRef("coalesced_author"))
| ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator"))
| ~Q(coalesced_license_id=OuterRef("coalesced_license_id"))
)
)
).filter(original_source_node_f_changed=True)

final_nodes = diff_combined.values(
"id",
"channel_id",
"original_channel_id",
"original_source_node_id",
"coalesced_provider",
"coalesced_author",
"coalesced_aggregator",
"coalesced_license_id",
"original_source_node_f_changed",
).order_by()

channel_ids_to_republish = set()

for item in final_nodes:
base_node = ContentNode.objects.get(pk=item["id"])

original_source_channel_id = item["original_channel_id"]
original_source_node_id = item["original_source_node_id"]
tree_id = (
Channel.objects.filter(pk=original_source_channel_id)
.values_list("main_tree__tree_id", flat=True)
.get()
)
original_source_node = ContentNode.objects.filter(
tree_id=tree_id, node_id=original_source_node_id
)

base_channel = Channel.objects.get(pk=item['channel_id'])

to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists())

if original_source_channel_id is not None and original_source_node.exists():
# original source node exists and its source fields dont match
# update the base node
if base_node.author != original_source_node[0].author:
base_node.author = original_source_node[0].author
if base_node.provider != original_source_node[0].provider:
base_node.provider = original_source_node[0].provider
if base_node.aggregator != original_source_node[0].aggregator:
base_node.aggregator = original_source_node[0].aggregator
if base_node.license != original_source_node[0].license:
base_node.license = original_source_node[0].license
base_node.save()

if to_be_republished and base_channel.last_published is not None:
channel_ids_to_republish.add(base_channel.id)

# we would repbulish the channel
for channel_id in channel_ids_to_republish:
publish_channel(user_id, channel_id)

0 comments on commit 6f6daf2

Please sign in to comment.