From 8c4f6692da5ce3f2e86f2dee9b1e4595f67713f9 Mon Sep 17 00:00:00 2001 From: feanil Date: Wed, 17 Apr 2024 15:29:43 +0000 Subject: [PATCH 01/37] feat: Upgrade Python dependency edx-bulk-grades Update to a Python 3.11 compatible version Commit generated by workflow `openedx/edx-platform/.github/workflows/upgrade-one-python-dependency.yml@refs/heads/master` --- requirements/edx/base.txt | 2 +- requirements/edx/development.txt | 2 +- requirements/edx/doc.txt | 2 +- requirements/edx/testing.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index c6607f0dfc0f..24376a074c3c 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -428,7 +428,7 @@ edx-braze-client==0.2.2 # via # -r requirements/edx/bundled.in # edx-enterprise -edx-bulk-grades==1.0.2 +edx-bulk-grades==1.1.0 # via # -r requirements/edx/kernel.in # staff-graded-xblock diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 4461a06efc61..d02bc5fa6da8 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -692,7 +692,7 @@ edx-braze-client==0.2.2 # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt # edx-enterprise -edx-bulk-grades==1.0.2 +edx-bulk-grades==1.1.0 # via # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index 49acea996069..9f55649c011a 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -506,7 +506,7 @@ edx-braze-client==0.2.2 # via # -r requirements/edx/base.txt # edx-enterprise -edx-bulk-grades==1.0.2 +edx-bulk-grades==1.1.0 # via # -r requirements/edx/base.txt # staff-graded-xblock diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 6d21bb61a4a7..31fc58675695 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -530,7 +530,7 @@ edx-braze-client==0.2.2 # via # -r requirements/edx/base.txt # edx-enterprise -edx-bulk-grades==1.0.2 +edx-bulk-grades==1.1.0 # via # -r requirements/edx/base.txt # staff-graded-xblock From f18629e1329a508487081436d5dc899b089cedb3 Mon Sep 17 00:00:00 2001 From: Jeremy Ristau Date: Wed, 17 Apr 2024 16:42:05 -0400 Subject: [PATCH 02/37] chore: update 2U teams in CODEOWNERS (#34352) --- .github/CODEOWNERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 3a60d0041771..3f9abcc671fb 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,12 +2,12 @@ # Ensure that the team responsible for upgrades sees any PRs that would # add GitHub-hosted dependencies to that platform. -requirements/edx/github.in @openedx/arbi-bom +requirements/edx/github.in @openedx/2u-arbi-bom # Core common/djangoapps/student/ -common/djangoapps/student/models/__init__.py @openedx/teaching-and-learning -common/djangoapps/student/models/course_enrollment.py @openedx/teaching-and-learning +common/djangoapps/student/models/__init__.py @openedx/2u-tnl +common/djangoapps/student/models/course_enrollment.py @openedx/2u-tnl common/djangoapps/third_party_auth/ lms/djangoapps/course_api/blocks lms/djangoapps/courseware/ @@ -47,8 +47,8 @@ openedx/features/discounts/ # Ping Axim On-call if someone uses the QuickStart # https://docs.openedx.org/en/latest/developers/quickstarts/first_openedx_pr.html -lms/templates/dashboard.html @openedx/axim-oncall +lms/templates/dashboard.html @openedx/axim-oncall # Ensure minimal.yml stays minimal, this could be a team in the future # but it's just me for now, others can sign up if they care as well. -lms/envs/minimal.yml @feanil +lms/envs/minimal.yml @feanil From 1ae052aad313a9c091d301971c3e6a1bb0ea4f8a Mon Sep 17 00:00:00 2001 From: feanil Date: Thu, 18 Apr 2024 15:35:07 +0000 Subject: [PATCH 03/37] feat: Upgrade Python dependency edx-django-sites-extensions Update to a Python 3.11 compatible version Commit generated by workflow `openedx/edx-platform/.github/workflows/upgrade-one-python-dependency.yml@refs/heads/master` --- requirements/edx/base.txt | 2 +- requirements/edx/development.txt | 2 +- requirements/edx/doc.txt | 2 +- requirements/edx/testing.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index c6607f0dfc0f..27ed6f7e602f 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -450,7 +450,7 @@ edx-django-release-util==1.4.0 # -r requirements/edx/kernel.in # edxval # openedx-blockstore -edx-django-sites-extensions==4.0.2 +edx-django-sites-extensions==4.2.0 # via -r requirements/edx/kernel.in edx-django-utils==5.12.0 # via diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 4461a06efc61..b68ac14ba4ec 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -722,7 +722,7 @@ edx-django-release-util==1.4.0 # -r requirements/edx/testing.txt # edxval # openedx-blockstore -edx-django-sites-extensions==4.0.2 +edx-django-sites-extensions==4.2.0 # via # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index 49acea996069..5543a2a97222 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -528,7 +528,7 @@ edx-django-release-util==1.4.0 # -r requirements/edx/base.txt # edxval # openedx-blockstore -edx-django-sites-extensions==4.0.2 +edx-django-sites-extensions==4.2.0 # via -r requirements/edx/base.txt edx-django-utils==5.12.0 # via diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 6d21bb61a4a7..af9f55a1d4c5 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -552,7 +552,7 @@ edx-django-release-util==1.4.0 # -r requirements/edx/base.txt # edxval # openedx-blockstore -edx-django-sites-extensions==4.0.2 +edx-django-sites-extensions==4.2.0 # via -r requirements/edx/base.txt edx-django-utils==5.12.0 # via From cec7969ce81db37769901665cf999ace2157e1b6 Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Wed, 17 Apr 2024 08:48:11 -0400 Subject: [PATCH 04/37] feat!: Remove the django-splash app. DEPR: https://github.com/openedx/public-engineering/issues/224 The django-splash repo was created 11 years ago to let the LMS redirect users to a splash screen when a user comes to the site for the first time. It works by looking for a configurable cookie value and redirecting from the middleware. This feature was never documented, has some edx.org hardcoded defaults, and is not compatible with MFEs. BREAKING CHANGE: The django splash feature will no longer be available. --- lms/envs/common.py | 5 ----- requirements/edx/kernel.in | 1 - 2 files changed, 6 deletions(-) diff --git a/lms/envs/common.py b/lms/envs/common.py index 30ba71f62aad..4fd9ba08ada5 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -2286,8 +2286,6 @@ def _make_locale_paths(settings): # pylint: disable=missing-function-docstring 'openedx.core.djangoapps.cors_csrf.middleware.CorsCSRFMiddleware', 'openedx.core.djangoapps.cors_csrf.middleware.CsrfCrossDomainCookieMiddleware', - 'splash.middleware.SplashMiddleware', - 'openedx.core.djangoapps.geoinfo.middleware.CountryMiddleware', 'openedx.core.djangoapps.embargo.middleware.EmbargoMiddleware', @@ -3162,9 +3160,6 @@ def _make_locale_paths(settings): # pylint: disable=missing-function-docstring # Notes 'lms.djangoapps.edxnotes', - # Splash screen - 'splash', - # User API 'rest_framework', diff --git a/requirements/edx/kernel.in b/requirements/edx/kernel.in index 0c2f110c84b9..253926e2b385 100644 --- a/requirements/edx/kernel.in +++ b/requirements/edx/kernel.in @@ -51,7 +51,6 @@ django-pipeline django-ratelimit django-sekizai django-simple-history -django-splash django-statici18n django-storages django-user-tasks From 83ace4d925b0857a89a87760365ae017b3662cd4 Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Wed, 17 Apr 2024 09:02:32 -0400 Subject: [PATCH 05/37] chore: Run `make compile-requirements` to drop django-splash. --- requirements/edx/base.txt | 3 --- requirements/edx/development.txt | 5 ----- requirements/edx/doc.txt | 3 --- requirements/edx/testing.txt | 3 --- 4 files changed, 14 deletions(-) diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index c6607f0dfc0f..20272c4d6e5b 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -197,7 +197,6 @@ django==4.2.10 # django-oauth-toolkit # django-sekizai # django-ses - # django-splash # django-statici18n # django-storages # django-user-tasks @@ -349,8 +348,6 @@ django-simple-history==3.4.0 # edx-organizations # edx-proctoring # ora2 -django-splash==1.3.0 - # via -r requirements/edx/kernel.in django-statici18n==2.4.0 # via # -r requirements/edx/kernel.in diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 4461a06efc61..c227b95b219b 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -363,7 +363,6 @@ django==4.2.10 # django-oauth-toolkit # django-sekizai # django-ses - # django-splash # django-statici18n # django-storages # django-stubs @@ -567,10 +566,6 @@ django-simple-history==3.4.0 # edx-organizations # edx-proctoring # ora2 -django-splash==1.3.0 - # via - # -r requirements/edx/doc.txt - # -r requirements/edx/testing.txt django-statici18n==2.4.0 # via # -r requirements/edx/doc.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index 49acea996069..93555de9ed8c 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -247,7 +247,6 @@ django==4.2.10 # django-oauth-toolkit # django-sekizai # django-ses - # django-splash # django-statici18n # django-storages # django-user-tasks @@ -415,8 +414,6 @@ django-simple-history==3.4.0 # edx-organizations # edx-proctoring # ora2 -django-splash==1.3.0 - # via -r requirements/edx/base.txt django-statici18n==2.4.0 # via # -r requirements/edx/base.txt diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 6d21bb61a4a7..0ba71290b123 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -276,7 +276,6 @@ django==4.2.10 # django-oauth-toolkit # django-sekizai # django-ses - # django-splash # django-statici18n # django-storages # django-user-tasks @@ -444,8 +443,6 @@ django-simple-history==3.4.0 # edx-organizations # edx-proctoring # ora2 -django-splash==1.3.0 - # via -r requirements/edx/base.txt django-statici18n==2.4.0 # via # -r requirements/edx/base.txt From 9bd4474f7dee7590a95a351cc755a8ff65e15c45 Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Wed, 17 Apr 2024 10:02:16 -0400 Subject: [PATCH 06/37] test: Reduce query counts now that we dropped django-splash. --- .../certificates/apis/v0/tests/test_views.py | 2 +- lms/djangoapps/course_api/tests/test_views.py | 2 +- lms/djangoapps/courseware/tests/test_views.py | 8 ++++---- .../discussion/django_comment_client/base/tests.py | 4 ++-- .../djangoapps/user_api/accounts/tests/test_views.py | 12 ++++++------ .../tests/views/test_course_updates.py | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lms/djangoapps/certificates/apis/v0/tests/test_views.py b/lms/djangoapps/certificates/apis/v0/tests/test_views.py index fd31e3456961..efff97f54d5a 100644 --- a/lms/djangoapps/certificates/apis/v0/tests/test_views.py +++ b/lms/djangoapps/certificates/apis/v0/tests/test_views.py @@ -309,7 +309,7 @@ def test_no_certificate(self): def test_query_counts(self): # Test student with no certificates student_no_cert = UserFactory.create(password=self.user_password) - with self.assertNumQueries(21, table_ignorelist=WAFFLE_TABLES): + with self.assertNumQueries(20, table_ignorelist=WAFFLE_TABLES): resp = self.get_response( AuthType.jwt, requesting_user=self.global_staff, diff --git a/lms/djangoapps/course_api/tests/test_views.py b/lms/djangoapps/course_api/tests/test_views.py index 4065c54a983b..aab9769661a2 100644 --- a/lms/djangoapps/course_api/tests/test_views.py +++ b/lms/djangoapps/course_api/tests/test_views.py @@ -434,7 +434,7 @@ def test_too_many_courses(self): self.setup_user(self.audit_user) # These query counts were found empirically - query_counts = [53, 46, 46, 46, 46, 46, 46, 46, 46, 46, 16] + query_counts = [52, 46, 46, 46, 46, 46, 46, 46, 46, 46, 16] ordered_course_ids = sorted([str(cid) for cid in (course_ids + [c.id for c in self.courses])]) self.clear_caches() diff --git a/lms/djangoapps/courseware/tests/test_views.py b/lms/djangoapps/courseware/tests/test_views.py index 1d6c358ffd19..ef3d4741769e 100644 --- a/lms/djangoapps/courseware/tests/test_views.py +++ b/lms/djangoapps/courseware/tests/test_views.py @@ -365,7 +365,7 @@ def test_index_query_counts(self): self.client.login(username=self.user.username, password=self.user_password) CourseEnrollment.enroll(self.user, course.id) - with self.assertNumQueries(178, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST): + with self.assertNumQueries(177, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST): with check_mongo_calls(3): url = reverse( 'courseware_section', @@ -1496,8 +1496,8 @@ def test_view_certificate_link(self): self.assertContains(resp, "earned a certificate for this course.") @ddt.data( - (True, 54), - (False, 54), + (True, 53), + (False, 53), ) @ddt.unpack def test_progress_queries_paced_courses(self, self_paced, query_count): @@ -1512,7 +1512,7 @@ def test_progress_queries(self): ContentTypeGatingConfig.objects.create(enabled=True, enabled_as_of=datetime(2018, 1, 1)) self.setup_course() with self.assertNumQueries( - 54, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST + 53, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST ), check_mongo_calls(2): self._get_progress_page() diff --git a/lms/djangoapps/discussion/django_comment_client/base/tests.py b/lms/djangoapps/discussion/django_comment_client/base/tests.py index 7eb99020d608..6aa9c3e8e9fd 100644 --- a/lms/djangoapps/discussion/django_comment_client/base/tests.py +++ b/lms/djangoapps/discussion/django_comment_client/base/tests.py @@ -410,7 +410,7 @@ def inner(self, default_store, block_count, mongo_calls, sql_queries, *args, **k return inner @ddt.data( - (ModuleStoreEnum.Type.split, 3, 8, 43), + (ModuleStoreEnum.Type.split, 3, 8, 42), ) @ddt.unpack @count_queries @@ -418,7 +418,7 @@ def test_create_thread(self, mock_request): self.create_thread_helper(mock_request) @ddt.data( - (ModuleStoreEnum.Type.split, 3, 6, 42), + (ModuleStoreEnum.Type.split, 3, 6, 41), ) @ddt.unpack @count_queries diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_views.py index 0496f45ae2fc..ff0fb7abe4eb 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_views.py @@ -232,7 +232,7 @@ def test_get_username(self): Test that a client (logged in) can get her own username. """ self.client.login(username=self.user.username, password=TEST_PASSWORD) - self._verify_get_own_username(19) + self._verify_get_own_username(18) def test_get_username_inactive(self): """ @@ -242,7 +242,7 @@ def test_get_username_inactive(self): self.client.login(username=self.user.username, password=TEST_PASSWORD) self.user.is_active = False self.user.save() - self._verify_get_own_username(19) + self._verify_get_own_username(18) def test_get_username_not_logged_in(self): """ @@ -251,7 +251,7 @@ def test_get_username_not_logged_in(self): """ # verify that the endpoint is inaccessible when not logged in - self._verify_get_own_username(12, expected_status=401) + self._verify_get_own_username(11, expected_status=401) @skip_unless_lms @@ -358,7 +358,7 @@ class TestAccountsAPI(FilteredQueryCountMixin, CacheIsolationTestCase, UserAPITe """ ENABLED_CACHES = ['default'] - TOTAL_QUERY_COUNT = 27 + TOTAL_QUERY_COUNT = 26 FULL_RESPONSE_FIELD_COUNT = 29 def setUp(self): @@ -811,7 +811,7 @@ def verify_get_own_information(queries): assert data['time_zone'] is None self.client.login(username=self.user.username, password=TEST_PASSWORD) - verify_get_own_information(self._get_num_queries(25)) + verify_get_own_information(self._get_num_queries(24)) # Now make sure that the user can get the same information, even if not active self.user.is_active = False @@ -831,7 +831,7 @@ def test_get_account_empty_string(self): legacy_profile.save() self.client.login(username=self.user.username, password=TEST_PASSWORD) - with self.assertNumQueries(self._get_num_queries(25), table_ignorelist=WAFFLE_TABLES): + with self.assertNumQueries(self._get_num_queries(24), table_ignorelist=WAFFLE_TABLES): response = self.send_get(self.client) for empty_field in ("level_of_education", "gender", "country", "state", "bio",): assert response.data[empty_field] is None diff --git a/openedx/features/course_experience/tests/views/test_course_updates.py b/openedx/features/course_experience/tests/views/test_course_updates.py index f26e1cd35ba2..379be52ed40f 100644 --- a/openedx/features/course_experience/tests/views/test_course_updates.py +++ b/openedx/features/course_experience/tests/views/test_course_updates.py @@ -49,7 +49,7 @@ def test_queries(self): # Fetch the view and verify that the query counts haven't changed # TODO: decrease query count as part of REVO-28 - with self.assertNumQueries(53, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST): + with self.assertNumQueries(52, table_ignorelist=QUERY_COUNT_TABLE_IGNORELIST): with check_mongo_calls(3): url = course_updates_url(self.course) self.client.get(url) From 90b253a38a6add326c68593cf1023b8ef6cafec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=B4mulo=20Penido?= Date: Thu, 18 Apr 2024 13:53:21 -0300 Subject: [PATCH 07/37] feat: update Studio search index when course content is updated (#34391) --- openedx/core/djangoapps/content/search/api.py | 511 ++++++++++++++++++ .../docs/decisions/0001-meilisearch.rst | 2 + .../djangoapps/content/search/documents.py | 54 +- .../djangoapps/content/search/handlers.py | 116 +++- .../search/management/commands/meili_mixin.py | 105 ---- .../management/commands/reindex_studio.py | 135 +---- .../core/djangoapps/content/search/tasks.py | 83 +++ .../content/search/tests/test_api.py | 195 +++++++ .../content/search/tests/test_documents.py | 12 +- .../content/search/tests/test_handlers.py | 156 ++++++ .../content/search/tests/test_views.py | 70 ++- .../core/djangoapps/content/search/views.py | 76 +-- 12 files changed, 1157 insertions(+), 358 deletions(-) create mode 100644 openedx/core/djangoapps/content/search/api.py delete mode 100644 openedx/core/djangoapps/content/search/management/commands/meili_mixin.py create mode 100644 openedx/core/djangoapps/content/search/tasks.py create mode 100644 openedx/core/djangoapps/content/search/tests/test_api.py create mode 100644 openedx/core/djangoapps/content/search/tests/test_handlers.py diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py new file mode 100644 index 000000000000..382e8f8d759a --- /dev/null +++ b/openedx/core/djangoapps/content/search/api.py @@ -0,0 +1,511 @@ +""" +Content index and search API using Meilisearch +""" +from __future__ import annotations + +import logging +import time +from contextlib import contextmanager +from datetime import datetime, timedelta, timezone +from functools import wraps +from typing import Callable, Generator + +from django.conf import settings +from django.contrib.auth import get_user_model +from django.core.cache import cache +from meilisearch import Client as MeilisearchClient +from meilisearch.errors import MeilisearchError +from meilisearch.models.task import TaskInfo +from opaque_keys.edx.keys import UsageKey +from opaque_keys.edx.locator import LibraryLocatorV2 +from common.djangoapps.student.roles import GlobalStaff +from rest_framework.request import Request +from common.djangoapps.student.role_helpers import get_course_roles +from openedx.core.djangoapps.content.search.models import get_access_ids_for_request + +from openedx.core.djangoapps.content_libraries import api as lib_api +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.django import modulestore + +from .documents import ( + Fields, + meili_id_from_opaque_key, + searchable_doc_for_course_block, + searchable_doc_for_library_block, + searchable_doc_tags +) + +log = logging.getLogger(__name__) + +User = get_user_model() + +STUDIO_INDEX_SUFFIX = "studio_content" + +if hasattr(settings, "MEILISEARCH_INDEX_PREFIX"): + STUDIO_INDEX_NAME = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_SUFFIX +else: + STUDIO_INDEX_NAME = STUDIO_INDEX_SUFFIX + + +_MEILI_CLIENT = None +_MEILI_API_KEY_UID = None + +LOCK_EXPIRE = 24 * 60 * 60 # Lock expires in 24 hours + +MAX_ACCESS_IDS_IN_FILTER = 1_000 +MAX_ORGS_IN_FILTER = 1_000 + + +@contextmanager +def _index_rebuild_lock() -> Generator[str, None, None]: + """ + Lock to prevent that more than one rebuild is running at the same time + """ + lock_id = f"lock-meilisearch-index-{STUDIO_INDEX_NAME}" + new_index_name = STUDIO_INDEX_NAME + "_new" + + status = cache.add(lock_id, new_index_name, LOCK_EXPIRE) + + if not status: + # Lock already acquired + raise RuntimeError("Rebuild already in progress") + + # Lock acquired + try: + yield new_index_name + finally: + # Release the lock + cache.delete(lock_id) + + +def _get_running_rebuild_index_name() -> str | None: + lock_id = f"lock-meilisearch-index-{STUDIO_INDEX_NAME}" + + return cache.get(lock_id) + + +def _get_meilisearch_client(): + """ + Get the Meiliesearch client + """ + global _MEILI_CLIENT # pylint: disable=global-statement + + # Connect to Meilisearch + if not is_meilisearch_enabled(): + raise RuntimeError("MEILISEARCH_ENABLED is not set - search functionality disabled.") + + if _MEILI_CLIENT is not None: + return _MEILI_CLIENT + + _MEILI_CLIENT = MeilisearchClient(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) + try: + _MEILI_CLIENT.health() + except MeilisearchError as err: + _MEILI_CLIENT = None + raise ConnectionError("Unable to connect to Meilisearch") from err + return _MEILI_CLIENT + + +def clear_meilisearch_client(): + global _MEILI_CLIENT # pylint: disable=global-statement + + _MEILI_CLIENT = None + + +def _get_meili_api_key_uid(): + """ + Helper method to get the UID of the API key we're using for Meilisearch + """ + global _MEILI_API_KEY_UID # pylint: disable=global-statement + + if _MEILI_API_KEY_UID is not None: + return _MEILI_API_KEY_UID + + _MEILI_API_KEY_UID = _get_meilisearch_client().get_key(settings.MEILISEARCH_API_KEY).uid + + +def _wait_for_meili_task(info: TaskInfo) -> None: + """ + Simple helper method to wait for a Meilisearch task to complete + This method will block until the task is completed, so it should only be used in celery tasks + or management commands. + """ + client = _get_meilisearch_client() + current_status = client.get_task(info.task_uid) + while current_status.status in ("enqueued", "processing"): + time.sleep(0.5) + current_status = client.get_task(info.task_uid) + if current_status.status != "succeeded": + try: + err_reason = current_status.error['message'] + except (TypeError, KeyError): + err_reason = "Unknown error" + raise MeilisearchError(err_reason) + + +def _wait_for_meili_tasks(info_list: list[TaskInfo]) -> None: + """ + Simple helper method to wait for multiple Meilisearch tasks to complete + """ + while info_list: + info = info_list.pop() + _wait_for_meili_task(info) + + +def _index_exists(index_name: str) -> bool: + """ + Check if an index exists + """ + client = _get_meilisearch_client() + try: + client.get_index(index_name) + except MeilisearchError as err: + if err.code == "index_not_found": + return False + else: + raise err + return True + + +@contextmanager +def _using_temp_index(status_cb: Callable[[str], None] | None = None) -> Generator[str, None, None]: + """ + Create a new temporary Meilisearch index, populate it, then swap it to + become the active index. + + Args: + status_cb (Callable): A callback function to report status messages + """ + if status_cb is None: + status_cb = log.info + + client = _get_meilisearch_client() + status_cb("Checking index...") + with _index_rebuild_lock() as temp_index_name: + if _index_exists(temp_index_name): + status_cb("Temporary index already exists. Deleting it...") + _wait_for_meili_task(client.delete_index(temp_index_name)) + + status_cb("Creating new index...") + _wait_for_meili_task( + client.create_index(temp_index_name, {'primaryKey': 'id'}) + ) + new_index_created = client.get_index(temp_index_name).created_at + + yield temp_index_name + + if not _index_exists(STUDIO_INDEX_NAME): + # We have to create the "target" index before we can successfully swap the new one into it: + status_cb("Preparing to swap into index (first time)...") + _wait_for_meili_task(client.create_index(STUDIO_INDEX_NAME)) + status_cb("Swapping index...") + client.swap_indexes([{'indexes': [temp_index_name, STUDIO_INDEX_NAME]}]) + # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status + # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103 + while True: + time.sleep(1) + if client.get_index(STUDIO_INDEX_NAME).created_at != new_index_created: + status_cb("Waiting for swap completion...") + else: + break + status_cb("Deleting old index...") + _wait_for_meili_task(client.delete_index(temp_index_name)) + + +def _recurse_children(block, fn, status_cb: Callable[[str], None] | None = None) -> None: + """ + Recurse the children of an XBlock and call the given function for each + + The main purpose of this is just to wrap the loading of each child in + try...except. Otherwise block.get_children() would do what we need. + """ + if block.has_children: + for child_id in block.children: + try: + child = block.get_child(child_id) + except Exception as err: # pylint: disable=broad-except + log.exception(err) + if status_cb is not None: + status_cb(f"Unable to load block {child_id}") + else: + fn(child) + + +def _update_index_docs(docs) -> None: + """ + Helper function that updates the documents in the search index + + If there is a rebuild in progress, the document will also be added to the new index. + """ + if not docs: + return + + client = _get_meilisearch_client() + current_rebuild_index_name = _get_running_rebuild_index_name() + + tasks = [] + if current_rebuild_index_name: + # If there is a rebuild in progress, the document will also be added to the new index. + tasks.append(client.index(current_rebuild_index_name).update_documents(docs)) + tasks.append(client.index(STUDIO_INDEX_NAME).update_documents(docs)) + + _wait_for_meili_tasks(tasks) + + +def only_if_meilisearch_enabled(f): + """ + Only call `f` if meilisearch is enabled + """ + @wraps(f) + def wrapper(*args, **kwargs): + """Wraps the decorated function.""" + if is_meilisearch_enabled(): + return f(*args, **kwargs) + return wrapper + + +def is_meilisearch_enabled() -> bool: + """ + Returns whether Meilisearch is enabled + """ + if hasattr(settings, "MEILISEARCH_ENABLED"): + return settings.MEILISEARCH_ENABLED + + return False + + +def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None: + """ + Rebuild the Meilisearch index from scratch + """ + if status_cb is None: + status_cb = log.info + + client = _get_meilisearch_client() + store = modulestore() + + # Get the lists of libraries + status_cb("Counting libraries...") + lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')] + num_libraries = len(lib_keys) + + # Get the list of courses + status_cb("Counting courses...") + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + all_courses = store.get_courses() + num_courses = len(all_courses) + + # Some counters so we can track our progress as indexing progresses: + num_contexts = num_courses + num_libraries + num_contexts_done = 0 # How many courses/libraries we've indexed + num_blocks_done = 0 # How many individual components/XBlocks we've indexed + + status_cb(f"Found {num_courses} courses and {num_libraries} libraries.") + with _using_temp_index(status_cb) as temp_index_name: + ############## Configure the index ############## + + # The following index settings are best changed on an empty index. + # Changing them on a populated index will "re-index all documents in the index, which can take some time" + # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time. + + # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique): + client.index(temp_index_name).update_distinct_attribute(Fields.usage_key) + # Mark which attributes can be used for filtering/faceted search: + client.index(temp_index_name).update_filterable_attributes([ + Fields.block_type, + Fields.context_key, + Fields.org, + Fields.tags, + Fields.tags + "." + Fields.tags_taxonomy, + Fields.tags + "." + Fields.tags_level0, + Fields.tags + "." + Fields.tags_level1, + Fields.tags + "." + Fields.tags_level2, + Fields.tags + "." + Fields.tags_level3, + Fields.type, + Fields.access_id, + ]) + # Mark which attributes are used for keyword search, in order of importance: + client.index(temp_index_name).update_searchable_attributes([ + Fields.display_name, + Fields.block_id, + Fields.content, + Fields.tags, + # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields. + ]) + + ############## Libraries ############## + status_cb("Indexing libraries...") + for lib_key in lib_keys: + status_cb(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}") + try: + docs = [] + for component in lib_api.get_library_components(lib_key): + metadata = lib_api.LibraryXBlockMetadata.from_component(lib_key, component) + doc = {} + doc.update(searchable_doc_for_library_block(metadata)) + doc.update(searchable_doc_tags(metadata.usage_key)) + docs.append(doc) + + num_blocks_done += 1 + if docs: + # Add all the docs in this library at once (usually faster than adding one at a time): + _wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) + except Exception as err: # pylint: disable=broad-except + status_cb(f"Error indexing library {lib_key}: {err}") + finally: + num_contexts_done += 1 + + ############## Courses ############## + status_cb("Indexing courses...") + for course in all_courses: + status_cb( + f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})" + ) + docs = [] + + # Pre-fetch the course with all of its children: + course = store.get_course(course.id, depth=None) + + def add_with_children(block): + """ Recursively index the given XBlock/component """ + doc = searchable_doc_for_course_block(block) + docs.append(doc) # pylint: disable=cell-var-from-loop + _recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop + + _recurse_children(course, add_with_children) + + if docs: + # Add all the docs in this course at once (usually faster than adding one at a time): + _wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) + num_contexts_done += 1 + num_blocks_done += len(docs) + + status_cb(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses and libraries.") + + +def upsert_xblock_index_doc(usage_key: UsageKey, recursive: bool = True) -> None: + """ + Creates or updates the document for the given XBlock in the search index + + + Args: + usage_key (UsageKey): The usage key of the XBlock to index + recursive (bool): If True, also index all children of the XBlock + """ + xblock = modulestore().get_item(usage_key) + + docs = [] + + def add_with_children(block): + """ Recursively index the given XBlock/component """ + doc = searchable_doc_for_course_block(block) + docs.append(doc) + if recursive: + _recurse_children(block, add_with_children) + + add_with_children(xblock) + + _update_index_docs(docs) + + +def delete_index_doc(usage_key: UsageKey) -> None: + """ + Deletes the document for the given XBlock from the search index + + Args: + usage_key (UsageKey): The usage key of the XBlock to be removed from the index + """ + current_rebuild_index_name = _get_running_rebuild_index_name() + + client = _get_meilisearch_client() + + tasks = [] + if current_rebuild_index_name: + # If there is a rebuild in progress, the document will also be deleted from the new index. + tasks.append(client.index(current_rebuild_index_name).delete_document(meili_id_from_opaque_key(usage_key))) + tasks.append(client.index(STUDIO_INDEX_NAME).delete_document(meili_id_from_opaque_key(usage_key))) + + _wait_for_meili_tasks(tasks) + + +def upsert_library_block_index_doc(usage_key: UsageKey) -> None: + """ + Creates or updates the document for the given Library Block in the search index + """ + + library_block = lib_api.get_component_from_usage_key(usage_key) + library_block_metadata = lib_api.LibraryXBlockMetadata.from_component(usage_key.context_key, library_block) + + docs = [ + searchable_doc_for_library_block(library_block_metadata) + ] + + _update_index_docs(docs) + + +def upsert_content_library_index_docs(library_key: LibraryLocatorV2) -> None: + """ + Creates or updates the documents for the given Content Library in the search index + """ + docs = [] + for component in lib_api.get_library_components(library_key): + metadata = lib_api.LibraryXBlockMetadata.from_component(library_key, component) + doc = searchable_doc_for_library_block(metadata) + docs.append(doc) + + _update_index_docs(docs) + + +def _get_user_orgs(request: Request) -> list[str]: + """ + Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole. + + Note: org-level roles have course_id=None to distinguish them from course-level roles. + """ + course_roles = get_course_roles(request.user) + return list(set( + role.org + for role in course_roles + if role.course_id is None and role.role in ['staff', 'instructor'] + )) + + +def _get_meili_access_filter(request: Request) -> dict: + """ + Return meilisearch filter based on the requesting user's permissions. + """ + # Global staff can see anything, so no filters required. + if GlobalStaff().has_user(request.user): + return {} + + # Everyone else is limited to their org staff roles... + user_orgs = _get_user_orgs(request)[:MAX_ORGS_IN_FILTER] + + # ...or the N most recent courses and libraries they can access. + access_ids = get_access_ids_for_request(request, omit_orgs=user_orgs)[:MAX_ACCESS_IDS_IN_FILTER] + return { + "filter": f"org IN {user_orgs} OR access_id IN {access_ids}", + } + + +def generate_user_token_for_studio_search(request): + """ + Returns a Meilisearch API key that only allows the user to search content that they have permission to view + """ + expires_at = datetime.now(tz=timezone.utc) + timedelta(days=7) + + search_rules = { + STUDIO_INDEX_NAME: _get_meili_access_filter(request), + } + # Note: the following is just generating a JWT. It doesn't actually make an API call to Meilisearch. + restricted_api_key = _get_meilisearch_client().generate_tenant_token( + api_key_uid=_get_meili_api_key_uid(), + search_rules=search_rules, + expires_at=expires_at, + ) + + return { + "url": settings.MEILISEARCH_PUBLIC_URL, + "index_name": STUDIO_INDEX_NAME, + "api_key": restricted_api_key, + } diff --git a/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst b/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst index 1f355409da0e..f7430121605d 100644 --- a/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst +++ b/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst @@ -104,6 +104,8 @@ Decision new ``content/search`` Django app, so it's relatively easy to swap out later if this experiment doesn't pan out. 4. We will not use ``edx-search`` for the new search functionality. +5. For the experiment, we won't use Meilisearch during tests, but we expect to + add that in the future if we move forward with replacing Elasticsearch completely. Consequences diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index d32956c688d0..e74bfaac0381 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -2,11 +2,12 @@ Utilities related to indexing content for search """ from __future__ import annotations -from hashlib import blake2b + import logging +from hashlib import blake2b from django.utils.text import slugify -from opaque_keys.edx.keys import UsageKey, LearningContextKey +from opaque_keys.edx.keys import LearningContextKey, UsageKey from openedx.core.djangoapps.content.search.models import SearchAccess from openedx.core.djangoapps.content_libraries import api as lib_api @@ -14,7 +15,6 @@ from openedx.core.djangoapps.xblock import api as xblock_api log = logging.getLogger(__name__) -STUDIO_INDEX_NAME = "studio_content" class Fields: @@ -64,7 +64,7 @@ class DocType: library_block = "library_block" -def _meili_id_from_opaque_key(usage_key: UsageKey) -> str: +def meili_id_from_opaque_key(usage_key: UsageKey) -> str: """ Meilisearch requires each document to have a primary key that's either an integer or a string composed of alphanumeric characters (a-z A-Z 0-9), @@ -98,7 +98,6 @@ class implementation returns only: {"content": {"display_name": "..."}, "content_type": "..."} """ block_data = { - Fields.id: _meili_id_from_opaque_key(block.usage_key), Fields.usage_key: str(block.usage_key), Fields.block_id: str(block.usage_key.block_id), Fields.display_name: xblock_api.get_block_display_name(block), @@ -171,7 +170,7 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict: # Note that we could improve performance for indexing many components from the same library/course, # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the # tags for each component separately. - all_tags = tagging_api.get_object_tags(object_id).all() + all_tags = tagging_api.get_object_tags(str(object_id)).all() if not all_tags: return {} result = { @@ -207,23 +206,38 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict: return {Fields.tags: result} -def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict: +def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict: """ Generate a dictionary document suitable for ingestion into a search engine like Meilisearch or Elasticsearch, so that the given library block can be found using faceted search. """ - library_name = lib_api.get_library(metadata.usage_key.context_key).title - doc = {} - try: - block = xblock_api.load_block(metadata.usage_key, user=None) - except Exception as err: # pylint: disable=broad-except - log.exception(f"Failed to load XBlock {metadata.usage_key}: {err}") + library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title + block = xblock_api.load_block(xblock_metadata.usage_key, user=None) + + doc = { + Fields.id: meili_id_from_opaque_key(xblock_metadata.usage_key), + Fields.type: DocType.library_block, + } + doc.update(_fields_from_block(block)) - doc.update(_tags_for_content_object(metadata.usage_key)) - doc[Fields.type] = DocType.library_block + # Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here: doc[Fields.breadcrumbs] = [{"display_name": library_name}] + + return doc + + +def searchable_doc_tags(usage_key: UsageKey) -> dict: + """ + Generate a dictionary document suitable for ingestion into a search engine + like Meilisearch or Elasticsearch, with the tags data for the given content object. + """ + doc = { + Fields.id: meili_id_from_opaque_key(usage_key), + } + doc.update(_tags_for_content_object(usage_key)) + return doc @@ -233,7 +247,11 @@ def searchable_doc_for_course_block(block) -> dict: like Meilisearch or Elasticsearch, so that the given course block can be found using faceted search. """ - doc = _fields_from_block(block) - doc.update(_tags_for_content_object(block.usage_key)) - doc[Fields.type] = DocType.course_block + doc = { + Fields.id: meili_id_from_opaque_key(block.usage_key), + Fields.type: DocType.course_block, + } + + doc.update(_fields_from_block(block)) + return doc diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py index 9184b2b2e273..d782d4a07040 100644 --- a/openedx/core/djangoapps/content/search/handlers.py +++ b/openedx/core/djangoapps/content/search/handlers.py @@ -1,14 +1,36 @@ """ Signal/event handlers for content search """ + +import logging + from django.db.models.signals import post_delete from django.dispatch import receiver -from openedx_events.content_authoring.data import ContentLibraryData -from openedx_events.content_authoring.signals import CONTENT_LIBRARY_DELETED +from openedx_events.content_authoring.data import ContentLibraryData, LibraryBlockData, XBlockData +from openedx_events.content_authoring.signals import ( + CONTENT_LIBRARY_DELETED, + CONTENT_LIBRARY_UPDATED, + LIBRARY_BLOCK_CREATED, + LIBRARY_BLOCK_DELETED, + XBLOCK_CREATED, + XBLOCK_DELETED, + XBLOCK_UPDATED +) from openedx.core.djangoapps.content.course_overviews.models import CourseOverview from openedx.core.djangoapps.content.search.models import SearchAccess +from .api import only_if_meilisearch_enabled +from .tasks import ( + delete_library_block_index_doc, + delete_xblock_index_doc, + update_content_library_index_docs, + upsert_library_block_index_doc, + upsert_xblock_index_doc +) + +log = logging.getLogger(__name__) + # Using post_delete here because there is no COURSE_DELETED event defined. @receiver(post_delete, sender=CourseOverview) @@ -21,3 +43,93 @@ def delete_course_search_access(sender, instance, **kwargs): # pylint: disable= def delete_library_search_access(content_library: ContentLibraryData, **kwargs): """Deletes the SearchAccess instance for deleted content libraries""" SearchAccess.objects.filter(context_key=content_library.library_key).delete() + + +@receiver(XBLOCK_CREATED) +@only_if_meilisearch_enabled +def xblock_created_handler(**kwargs) -> None: + """ + Create the index for the XBlock + """ + xblock_info = kwargs.get("xblock_info", None) + if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + upsert_xblock_index_doc.delay( + str(xblock_info.usage_key), + recursive=False, + ) + + +@receiver(XBLOCK_UPDATED) +@only_if_meilisearch_enabled +def xblock_updated_handler(**kwargs) -> None: + """ + Update the index for the XBlock and its children + """ + xblock_info = kwargs.get("xblock_info", None) + if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + upsert_xblock_index_doc.delay( + str(xblock_info.usage_key), + recursive=True, # Update all children because the breadcrumb may have changed + ) + + +@receiver(XBLOCK_DELETED) +@only_if_meilisearch_enabled +def xblock_deleted_handler(**kwargs) -> None: + """ + Delete the index for the XBlock + """ + xblock_info = kwargs.get("xblock_info", None) + if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + delete_xblock_index_doc.delay(str(xblock_info.usage_key)) + + +@receiver(LIBRARY_BLOCK_CREATED) +@only_if_meilisearch_enabled +def library_block_updated_handler(**kwargs) -> None: + """ + Create or update the index for the content library block + """ + library_block_data = kwargs.get("library_block", None) + if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + upsert_library_block_index_doc.delay(str(library_block_data.usage_key)) + + +@receiver(LIBRARY_BLOCK_DELETED) +@only_if_meilisearch_enabled +def library_block_deleted(**kwargs) -> None: + """ + Delete the index for the content library block + """ + library_block_data = kwargs.get("library_block", None) + if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + delete_library_block_index_doc.delay(str(library_block_data.usage_key)) + + +@receiver(CONTENT_LIBRARY_UPDATED) +@only_if_meilisearch_enabled +def content_library_updated_handler(**kwargs) -> None: + """ + Update the index for the content library + """ + content_library_data = kwargs.get("content_library", None) + if not content_library_data or not isinstance(content_library_data, ContentLibraryData): # pragma: no cover + log.error("Received null or incorrect data for event") + return + + update_content_library_index_docs.delay(str(content_library_data.library_key)) diff --git a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py deleted file mode 100644 index 18c7681879cb..000000000000 --- a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Mixin for Django management commands that interact with Meilisearch -""" -from contextlib import contextmanager -import time - -from django.conf import settings -from django.core.management import CommandError -import meilisearch -from meilisearch.errors import MeilisearchError -from meilisearch.models.task import TaskInfo - - -class MeiliCommandMixin: - """ - Mixin for Django management commands that interact with Meilisearch - """ - def get_meilisearch_client(self): - """ - Get the Meiliesearch client - """ - if hasattr(self, "_meili_client"): - return self._meili_client - # Connect to Meilisearch - if not settings.MEILISEARCH_ENABLED: - raise CommandError("MEILISEARCH_ENABLED is not set - search functionality disabled.") - - self._meili_client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - try: - self._meili_client.health() - except MeilisearchError as err: - self.stderr.write(err.message) # print this because 'raise...from...' doesn't print the details - raise CommandError("Unable to connect to Meilisearch") from err - return self._meili_client - - def wait_for_meili_task(self, info: TaskInfo): - """ - Simple helper method to wait for a Meilisearch task to complete - """ - client = self.get_meilisearch_client() - current_status = client.get_task(info.task_uid) - while current_status.status in ("enqueued", "processing"): - self.stdout.write("...") - time.sleep(1) - current_status = client.get_task(info.task_uid) - if current_status.status != "succeeded": - self.stderr.write(f"Task has status: {current_status.status}") - self.stderr.write(str(current_status.error)) - try: - err_reason = current_status.error['message'] - except (TypeError, KeyError): - err_reason = "Unknown error" - raise MeilisearchError(err_reason) - - def index_exists(self, index_name: str) -> bool: - """ - Check if an index exists - """ - client = self.get_meilisearch_client() - try: - client.get_index(index_name) - except MeilisearchError as err: - if err.code == "index_not_found": - return False - else: - raise err - return True - - @contextmanager - def using_temp_index(self, target_index): - """ - Create a new temporary Meilisearch index, populate it, then swap it to - become the active index. - """ - client = self.get_meilisearch_client() - self.stdout.write("Checking index...") - temp_index_name = target_index + "_new" - if self.index_exists(temp_index_name): - self.stdout.write("Temporary index already exists. Deleting it...") - self.wait_for_meili_task(client.delete_index(temp_index_name)) - - self.stdout.write("Creating new index...") - self.wait_for_meili_task( - client.create_index(temp_index_name, {'primaryKey': 'id'}) - ) - new_index_created = client.get_index(temp_index_name).created_at - - yield temp_index_name - - if not self.index_exists(target_index): - # We have to create the "target" index before we can successfully swap the new one into it: - self.stdout.write("Preparing to swap into index (first time)...") - self.wait_for_meili_task(client.create_index(target_index)) - self.stdout.write("Swapping index...") - client.swap_indexes([{'indexes': [temp_index_name, target_index]}]) - # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status - # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103 - while True: - time.sleep(1) - if client.get_index(target_index).created_at != new_index_created: - self.stdout.write("Waiting for swap completion...") - else: - break - self.stdout.write("Deleting old index...") - self.wait_for_meili_task(client.delete_index(temp_index_name)) diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py index 8d39ef3746eb..3767ebcba6c9 100644 --- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py +++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py @@ -5,28 +5,12 @@ See also cms/djangoapps/contentstore/management/commands/reindex_course.py which indexes LMS (published) courses in ElasticSearch. """ -import logging -import time - -from django.conf import settings from django.core.management import BaseCommand, CommandError -from openedx.core.djangoapps.content_libraries import api as lib_api -from openedx.core.djangoapps.content.search.documents import ( - Fields, - searchable_doc_for_course_block, - searchable_doc_for_library_block, - STUDIO_INDEX_NAME, -) -from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.django import modulestore -from .meili_mixin import MeiliCommandMixin - - -log = logging.getLogger(__name__) +from ... import api -class Command(MeiliCommandMixin, BaseCommand): +class Command(BaseCommand): """ Build or re-build the search index for courses and libraries (in Studio, i.e. Draft mode) @@ -41,120 +25,13 @@ def handle(self, *args, **options): """ Build a new search index for Studio, containing content from courses and libraries """ + if not api.is_meilisearch_enabled(): + raise CommandError("Meilisearch is not enabled. Please set MEILISEARCH_ENABLED to True in your settings.") + if not options["experimental"]: raise CommandError( "This command is experimental and not recommended for production. " "Use the --experimental argument to acknowledge and run it." ) - start_time = time.perf_counter() - client = self.get_meilisearch_client() - store = modulestore() - - # Get the lists of libraries - self.stdout.write("Counting libraries...") - lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')] - num_libraries = len(lib_keys) - - # Get the list of courses - self.stdout.write("Counting courses...") - with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): - all_courses = store.get_courses() - num_courses = len(all_courses) - - # Some counters so we can track our progress as indexing progresses: - num_contexts = num_courses + num_libraries - num_contexts_done = 0 # How many courses/libraries we've indexed - num_blocks_done = 0 # How many individual components/XBlocks we've indexed - - self.stdout.write(f"Found {num_courses} courses and {num_libraries} libraries.") - index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME - with self.using_temp_index(index_name) as temp_index_name: - ############## Configure the index ############## - - # The following index settings are best changed on an empty index. - # Changing them on a populated index will "re-index all documents in the index, which can take some time" - # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time. - - # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique): - client.index(temp_index_name).update_distinct_attribute(Fields.usage_key) - # Mark which attributes can be used for filtering/faceted search: - client.index(temp_index_name).update_filterable_attributes([ - Fields.block_type, - Fields.context_key, - Fields.org, - Fields.tags, - Fields.type, - Fields.access_id, - ]) - # Mark which attributes are used for keyword search, in order of importance: - client.index(temp_index_name).update_searchable_attributes([ - Fields.display_name, - Fields.block_id, - Fields.content, - Fields.tags, - # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields. - ]) - ############## Libraries ############## - self.stdout.write("Indexing libraries...") - for lib_key in lib_keys: - self.stdout.write(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}") - docs = [] - for component in lib_api.get_library_components(lib_key): - metadata = lib_api.LibraryXBlockMetadata.from_component(lib_key, component) - doc = searchable_doc_for_library_block(metadata) - docs.append(doc) - num_blocks_done += 1 - if docs: - # Add all the docs in this library at once (usually faster than adding one at a time): - self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) - self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) - num_contexts_done += 1 - - ############## Courses ############## - self.stdout.write("Indexing courses...") - for course in all_courses: - self.stdout.write( - f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})" - ) - docs = [] - - # Pre-fetch the course with all of its children: - course = store.get_course(course.id, depth=None) - - def add_with_children(block): - """ Recursively index the given XBlock/component """ - doc = searchable_doc_for_course_block(block) - docs.append(doc) # pylint: disable=cell-var-from-loop - self.recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop - - self.recurse_children(course, add_with_children) - - if docs: - # Add all the docs in this course at once (usually faster than adding one at a time): - self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) - num_contexts_done += 1 - num_blocks_done += len(docs) - - elapsed_time = time.perf_counter() - start_time - self.stdout.write( - f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses " - f"and libraries in {elapsed_time:.0f}s." - ) - - def recurse_children(self, block, fn): - """ - Recurse the children of an XBlock and call the given function for each - - The main purpose of this is just to wrap the loading of each child in - try...except. Otherwise block.get_children() would do what we need. - """ - if block.has_children: - for child_id in block.children: - try: - child = block.get_child(child_id) - except Exception as err: # pylint: disable=broad-except - log.exception(err) - self.stderr.write(f"Unable to load block {child_id}") - else: - fn(child) + api.rebuild_index(self.stdout.write) diff --git a/openedx/core/djangoapps/content/search/tasks.py b/openedx/core/djangoapps/content/search/tasks.py new file mode 100644 index 000000000000..06ea3e777c61 --- /dev/null +++ b/openedx/core/djangoapps/content/search/tasks.py @@ -0,0 +1,83 @@ +""" +Defines asynchronous celery task for content indexing +""" + +from __future__ import annotations + +import logging + +from celery import shared_task +from celery_utils.logged_task import LoggedTask +from edx_django_utils.monitoring import set_code_owner_attribute +from opaque_keys.edx.keys import UsageKey +from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2 +from meilisearch.errors import MeilisearchError + +from . import api + +log = logging.getLogger(__name__) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def upsert_xblock_index_doc(usage_key_str: str, recursive: bool) -> None: + """ + Celery task to update the content index document for an XBlock + """ + usage_key = UsageKey.from_string(usage_key_str) + + log.info("Updating content index document for XBlock with id: %s", usage_key) + + api.upsert_xblock_index_doc(usage_key, recursive) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def delete_xblock_index_doc(usage_key_str: str) -> None: + """ + Celery task to delete the content index document for an XBlock + """ + usage_key = UsageKey.from_string(usage_key_str) + + log.info("Updating content index document for XBlock with id: %s", usage_key) + + api.delete_index_doc(usage_key) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def upsert_library_block_index_doc(usage_key_str: str) -> None: + """ + Celery task to update the content index document for a library block + """ + usage_key = LibraryUsageLocatorV2.from_string(usage_key_str) + + log.info("Updating content index document for library block with id: %s", usage_key) + + api.upsert_library_block_index_doc(usage_key) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def delete_library_block_index_doc(usage_key_str: str) -> None: + """ + Celery task to delete the content index document for a library block + """ + usage_key = LibraryUsageLocatorV2.from_string(usage_key_str) + + log.info("Deleting content index document for library block with id: %s", usage_key) + + api.delete_index_doc(usage_key) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def update_content_library_index_docs(library_key_str: str) -> None: + """ + Celery task to update the content index documents for all library blocks in a library + """ + library_key = LibraryLocatorV2.from_string(library_key_str) + + log.info("Updating content index documents for library with id: %s", library_key) + + api.upsert_content_library_index_docs(library_key) diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py new file mode 100644 index 000000000000..43e1eb20e68e --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -0,0 +1,195 @@ +""" +Tests for the Studio content search API. +""" +from __future__ import annotations + +from unittest.mock import MagicMock, call, patch + +import ddt +from django.test import override_settings +from organizations.tests.factories import OrganizationFactory + +from common.djangoapps.student.tests.factories import UserFactory +from openedx.core.djangoapps.content_libraries import api as library_api +from openedx.core.djangolib.testing.utils import skip_unless_cms +from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, ModuleStoreTestCase + + +try: + # This import errors in the lms because content.search is not an installed app there. + from .. import api + from ..models import SearchAccess +except RuntimeError: + SearchAccess = {} + +STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" + + +@ddt.ddt +@skip_unless_cms +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +class TestSearchApi(ModuleStoreTestCase): + """ + Tests for the Studio content search and index API. + """ + + MODULESTORE = TEST_DATA_SPLIT_MODULESTORE + + def setUp(self): + super().setUp() + self.user = UserFactory.create() + self.user_id = self.user.id + + self.modulestore_patcher = patch( + "openedx.core.djangoapps.content.search.api.modulestore", return_value=self.store + ) + self.addCleanup(self.modulestore_patcher.stop) + self.modulestore_patcher.start() + + # Clear the Meilisearch client to avoid side effects from other tests + api.clear_meilisearch_client() + + # Create course + self.course = self.store.create_course( + "org1", + "test_course", + "test_run", + self.user_id, + fields={"display_name": "Test Course"}, + ) + course_access, _ = SearchAccess.objects.get_or_create(context_key=self.course.id) + + # Create XBlocks + self.sequential = self.store.create_child(self.user_id, self.course.location, "sequential", "test_sequential") + self.doc_sequential = { + 'id': 'block-v1org1test_coursetest_runtypesequentialblocktest_sequential-f702c144', + 'type': 'course_block', + 'usage_key': 'block-v1:org1+test_course+test_run+type@sequential+block@test_sequential', + 'block_id': 'test_sequential', + 'display_name': 'sequential', + 'block_type': 'sequential', + 'context_key': 'course-v1:org1+test_course+test_run', + 'org': 'org1', + 'breadcrumbs': [{'display_name': 'Test Course'}], + 'content': {}, + 'access_id': course_access.id, + } + self.store.create_child(self.user_id, self.sequential.location, "vertical", "test_vertical") + self.doc_vertical = { + 'id': 'block-v1org1test_coursetest_runtypeverticalblocktest_vertical-e76a10a4', + 'type': 'course_block', + 'usage_key': 'block-v1:org1+test_course+test_run+type@vertical+block@test_vertical', + 'block_id': 'test_vertical', + 'display_name': 'vertical', + 'block_type': 'vertical', + 'context_key': 'course-v1:org1+test_course+test_run', + 'org': 'org1', + 'breadcrumbs': [ + {'display_name': 'Test Course'}, + {'display_name': 'sequential'} + ], + 'content': {}, + 'access_id': course_access.id, + } + + # Create a content library: + self.library = library_api.create_library( + library_type=library_api.COMPLEX, + org=OrganizationFactory.create(short_name="org1"), + slug="lib", + title="Library", + ) + lib_access, _ = SearchAccess.objects.get_or_create(context_key=self.library.key) + # Populate it with a problem: + self.problem = library_api.create_library_block(self.library.key, "problem", "p1") + self.doc_problem = { + "id": "lborg1libproblemp1-a698218e", + "usage_key": "lb:org1:lib:problem:p1", + "block_id": "p1", + "display_name": "Blank Problem", + "block_type": "problem", + "context_key": "lib:org1:lib", + "org": "org1", + "breadcrumbs": [{"display_name": "Library"}], + "content": {"problem_types": [], "capa_content": " "}, + "type": "library_block", + "access_id": lib_access.id, + } + + @override_settings(MEILISEARCH_ENABLED=False) + def test_reindex_meilisearch_disabled(self, mock_meilisearch): + with self.assertRaises(RuntimeError): + api.rebuild_index() + + mock_meilisearch.return_value.swap_indexes.assert_not_called() + + @override_settings(MEILISEARCH_ENABLED=True) + def test_reindex_meilisearch(self, mock_meilisearch): + + api.rebuild_index() + mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls( + [ + call([self.doc_sequential, self.doc_vertical]), + call([self.doc_problem]), + ], + any_order=True, + ) + + @ddt.data( + True, + False + ) + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_xblock_metadata(self, recursive, mock_meilisearch): + """ + Test indexing an XBlock. + """ + api.upsert_xblock_index_doc(self.sequential.usage_key, recursive=recursive) + + if recursive: + expected_docs = [self.doc_sequential, self.doc_vertical] + else: + expected_docs = [self.doc_sequential] + + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with(expected_docs) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_delete_index_xblock(self, mock_meilisearch): + """ + Test deleting an XBlock doc from the index. + """ + api.delete_index_doc(self.sequential.usage_key) + + mock_meilisearch.return_value.index.return_value.delete_document.assert_called_once_with( + self.doc_sequential['id'] + ) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_library_block_metadata(self, mock_meilisearch): + """ + Test indexing a Library Block. + """ + api.upsert_library_block_index_doc(self.problem.usage_key) + + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([self.doc_problem]) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_delete_index_library_block(self, mock_meilisearch): + """ + Test deleting a Library Block doc from the index. + """ + api.delete_index_doc(self.problem.usage_key) + + mock_meilisearch.return_value.index.return_value.delete_document.assert_called_once_with( + self.doc_problem['id'] + ) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_content_library_metadata(self, mock_meilisearch): + """ + Test indexing a whole content library. + """ + api.upsert_content_library_index_docs(self.library.key) + + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([self.doc_problem]) diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index 914cae880a41..79efda11177b 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -2,6 +2,7 @@ Tests for the Studio content search documents (what gets stored in the index) """ from organizations.models import Organization + from openedx.core.djangoapps.content_tagging import api as tagging_api from openedx.core.djangolib.testing.utils import skip_unless_cms from xmodule.modulestore.django import modulestore @@ -10,7 +11,7 @@ try: # This import errors in the lms because content.search is not an installed app there. - from ..documents import searchable_doc_for_course_block + from ..documents import searchable_doc_for_course_block, searchable_doc_tags from ..models import SearchAccess except RuntimeError: searchable_doc_for_course_block = lambda x: x @@ -78,7 +79,10 @@ def test_problem_block(self): Test how a problem block gets represented in the search index """ block = self.store.get_item(self.problem_block.usage_key) - doc = searchable_doc_for_course_block(block) + doc = {} + doc.update(searchable_doc_for_course_block(block)) + doc.update(searchable_doc_tags(block.usage_key)) + assert doc == { # Note the 'id' has been stripped of special characters to meet Meilisearch requirements. # The '-8516ed8' suffix is deterministic based on the original usage key. @@ -115,7 +119,9 @@ def test_html_block(self): Test how an HTML block gets represented in the search index """ block = self.store.get_item(self.html_block_key) - doc = searchable_doc_for_course_block(block) + doc = {} + doc.update(searchable_doc_for_course_block(block)) + doc.update(searchable_doc_tags(block.usage_key)) assert doc == { "id": "block-v1edxtoy2012_falltypehtmlblocktoyjumpto-efb9c601", "type": "course_block", diff --git a/openedx/core/djangoapps/content/search/tests/test_handlers.py b/openedx/core/djangoapps/content/search/tests/test_handlers.py new file mode 100644 index 000000000000..1a1134547666 --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_handlers.py @@ -0,0 +1,156 @@ +""" +Tests for the search index update handlers +""" +from unittest.mock import MagicMock, patch + +from django.test import LiveServerTestCase, override_settings +from organizations.tests.factories import OrganizationFactory + +from common.djangoapps.student.tests.factories import UserFactory +from openedx.core.djangoapps.content_libraries import api as library_api +from openedx.core.djangolib.testing.utils import skip_unless_cms +from openedx.core.lib.blockstore_api.tests.base import BlockstoreAppTestMixin +from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, ModuleStoreTestCase + + +try: + # This import errors in the lms because content.search is not an installed app there. + from .. import api + from ..models import SearchAccess +except RuntimeError: + SearchAccess = {} + + +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) +@patch("openedx.core.djangoapps.content.search.api.MeilisearchClient") +@override_settings(MEILISEARCH_ENABLED=True) +@skip_unless_cms +class TestUpdateIndexHandlers( + ModuleStoreTestCase, + BlockstoreAppTestMixin, + LiveServerTestCase, +): + """ + Test that the search index is updated when XBlocks and Library Blocks are modified + """ + + MODULESTORE = TEST_DATA_SPLIT_MODULESTORE + + def setUp(self): + super().setUp() + # Create user + self.user = UserFactory.create() + self.user_id = self.user.id + + self.orgA = OrganizationFactory.create(short_name="orgA") + + self.patcher = patch("openedx.core.djangoapps.content_tagging.tasks.modulestore", return_value=self.store) + self.addCleanup(self.patcher.stop) + self.patcher.start() + + api.clear_meilisearch_client() # Clear the Meilisearch client to avoid leaking state from other tests + + def test_create_delete_xblock(self, meilisearch_client): + # Create course + course = self.store.create_course( + self.orgA.short_name, + "test_course", + "test_run", + self.user_id, + fields={"display_name": "Test Course"}, + ) + course_access, _ = SearchAccess.objects.get_or_create(context_key=course.id) + + # Create XBlocks + sequential = self.store.create_child(self.user_id, course.location, "sequential", "test_sequential") + doc_sequential = { + "id": "block-v1orgatest_coursetest_runtypesequentialblocktest_sequential-0cdb9395", + "type": "course_block", + "usage_key": "block-v1:orgA+test_course+test_run+type@sequential+block@test_sequential", + "block_id": "test_sequential", + "display_name": "sequential", + "block_type": "sequential", + "context_key": "course-v1:orgA+test_course+test_run", + "org": "orgA", + "breadcrumbs": [{"display_name": "Test Course"}], "content": {}, + "access_id": course_access.id, + + } + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_sequential]) + vertical = self.store.create_child(self.user_id, sequential.location, "vertical", "test_vertical") + doc_vertical = { + "id": "block-v1orgatest_coursetest_runtypeverticalblocktest_vertical-011f143b", + "type": "course_block", + "usage_key": "block-v1:orgA+test_course+test_run+type@vertical+block@test_vertical", + "block_id": "test_vertical", + "display_name": "vertical", + "block_type": "vertical", + "context_key": "course-v1:orgA+test_course+test_run", + "org": "orgA", + "breadcrumbs": [{"display_name": "Test Course"}, {"display_name": "sequential"}], + "content": {}, + "access_id": course_access.id, + } + + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_vertical]) + + # Update the XBlock + sequential = self.store.get_item(sequential.location, self.user_id) # Refresh the XBlock + sequential.display_name = "Updated Sequential" + self.store.update_item(sequential, self.user_id) + + # The display name and the child's breadcrumbs should be updated + doc_sequential["display_name"] = "Updated Sequential" + doc_vertical["breadcrumbs"][1]["display_name"] = "Updated Sequential" + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([ + doc_sequential, + doc_vertical, + ]) + + # Delete the XBlock + self.store.delete_item(vertical.location, self.user_id) + + meilisearch_client.return_value.index.return_value.delete_document.assert_called_with( + "block-v1orgatest_coursetest_runtypeverticalblocktest_vertical-011f143b" + ) + + def test_create_delete_library_block(self, meilisearch_client): + # Create library + library = library_api.create_library( + org=self.orgA, + slug="lib_a", + title="Library Org A", + description="This is a library from Org A", + ) + lib_access, _ = SearchAccess.objects.get_or_create(context_key=library.key) + + problem = library_api.create_library_block(library.key, "problem", "Problem1") + doc_problem = { + "id": "lborgalib_aproblemproblem1-ca3186e9", + "type": "library_block", + "usage_key": "lb:orgA:lib_a:problem:Problem1", + "block_id": "Problem1", + "display_name": "Blank Problem", + "block_type": "problem", + "context_key": "lib:orgA:lib_a", + "org": "orgA", + "breadcrumbs": [{"display_name": "Library Org A"}], + "content": {"problem_types": [], "capa_content": " "}, + "access_id": lib_access.id, + } + + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem]) + + # Rename the content library + library_api.update_library(library.key, title="Updated Library Org A") + + # The breadcrumbs should be updated + doc_problem["breadcrumbs"][0]["display_name"] = "Updated Library Org A" + meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem]) + + # Delete the Library Block + library_api.delete_library_block(problem.usage_key) + + meilisearch_client.return_value.index.return_value.delete_document.assert_called_with( + "lborgalib_aproblemproblem1-ca3186e9" + ) diff --git a/openedx/core/djangoapps/content/search/tests/test_views.py b/openedx/core/djangoapps/content/search/tests/test_views.py index 853f5b5149a6..7c1b8e99850a 100644 --- a/openedx/core/djangoapps/content/search/tests/test_views.py +++ b/openedx/core/djangoapps/content/search/tests/test_views.py @@ -1,8 +1,10 @@ """ Tests for the Studio content search REST API. """ +from __future__ import annotations + import functools -from unittest import mock +from unittest.mock import ANY, MagicMock, Mock, patch import ddt from django.test import override_settings @@ -17,7 +19,8 @@ try: # This import errors in the lms because content.search is not an installed app there. - from openedx.core.djangoapps.content.search.models import SearchAccess + from .. import api + from ..models import SearchAccess except RuntimeError: SearchAccess = {} @@ -28,7 +31,7 @@ def mock_meilisearch(enabled=True): """ - Decorator that mocks the required parts of content.search.views to simulate a running Meilisearch instance. + Decorator that mocks the required parts of content.search.api to simulate a running Meilisearch instance. """ def decorator(func): """ @@ -40,17 +43,19 @@ def wrapper(*args, **kwargs): MEILISEARCH_ENABLED=enabled, MEILISEARCH_PUBLIC_URL="http://meilisearch.url", ): - with mock.patch( - 'openedx.core.djangoapps.content.search.views._get_meili_api_key_uid', + with patch( + 'openedx.core.djangoapps.content.search.api._get_meili_api_key_uid', return_value=MOCK_API_KEY_UID, ): return func(*args, **kwargs) + return wrapper return decorator @ddt.ddt @skip_unless_cms +@patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", new=MagicMock(return_value=None)) class StudioSearchViewTest(StudioSearchTestMixin, SharedModuleStoreTestCase): """ General tests for the Studio search REST API. @@ -59,6 +64,9 @@ def setUp(self): super().setUp() self.client = APIClient() + # Clear the Meilisearch client to avoid side effects from other tests + api.clear_meilisearch_client() + @mock_meilisearch(enabled=False) def test_studio_search_unathenticated_disabled(self): """ @@ -84,31 +92,33 @@ def test_studio_search_disabled(self): result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) assert result.status_code == 404 + def _mock_generate_tenant_token(self, mock_search_client): + """ + Return a mocked meilisearch.Client.generate_tenant_token method. + """ + mock_generate_tenant_token = Mock(return_value='restricted_api_key') + mock_search_client.return_value = Mock( + generate_tenant_token=mock_generate_tenant_token, + ) + return mock_generate_tenant_token + @mock_meilisearch(enabled=True) - def test_studio_search_enabled(self): + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') + def test_studio_search_enabled(self, mock_search_client): """ We've implement fine-grained permissions on the meilisearch content, so any logged-in user can get a restricted API key for Meilisearch using the REST API. """ self.client.login(username='student', password='student_pass') + mock_generate_tenant_token = self._mock_generate_tenant_token(mock_search_client) result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) assert result.status_code == 200 assert result.data["index_name"] == "studio_content" assert result.data["url"] == "http://meilisearch.url" assert result.data["api_key"] and isinstance(result.data["api_key"], str) - def _mock_generate_tenant_token(self, mock_search_client): - """ - Return a mocked meilisearch.Client.generate_tenant_token method. - """ - mock_generate_tenant_token = mock.Mock(return_value='restricted_api_key') - mock_search_client.return_value = mock.Mock( - generate_tenant_token=mock_generate_tenant_token, - ) - return mock_generate_tenant_token - @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_student_no_access(self, mock_search_client): """ Users without access to any courses or libraries will have all documents filtered out. @@ -124,11 +134,11 @@ def test_studio_search_student_no_access(self, mock_search_client): "filter": "org IN [] OR access_id IN []", } }, - expires_at=mock.ANY, + expires_at=ANY, ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_staff(self, mock_search_client): """ Users with global staff access can search any document. @@ -142,11 +152,11 @@ def test_studio_search_staff(self, mock_search_client): search_rules={ "studio_content": {} }, - expires_at=mock.ANY, + expires_at=ANY, ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_course_staff_access(self, mock_search_client): """ Users with staff or instructor access to a course or library will be limited to these courses/libraries. @@ -168,7 +178,7 @@ def test_studio_search_course_staff_access(self, mock_search_client): "filter": f"org IN [] OR access_id IN {expected_access_ids}", } }, - expires_at=mock.ANY, + expires_at=ANY, ) @ddt.data( @@ -176,7 +186,7 @@ def test_studio_search_course_staff_access(self, mock_search_client): 'org_instr', ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_org_access(self, username, mock_search_client): """ Users with org access to any courses or libraries will use the org filter. @@ -192,11 +202,11 @@ def test_studio_search_org_access(self, username, mock_search_client): "filter": "org IN ['org1'] OR access_id IN []", } }, - expires_at=mock.ANY, + expires_at=ANY, ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_omit_orgs(self, mock_search_client): """ Grant org access to our staff user to ensure that org's access_ids are omitted from the search filter. @@ -219,13 +229,13 @@ def test_studio_search_omit_orgs(self, mock_search_client): "filter": f"org IN ['org1'] OR access_id IN {expected_access_ids}", } }, - expires_at=mock.ANY, + expires_at=ANY, ) @mock_meilisearch(enabled=True) - @mock.patch('openedx.core.djangoapps.content.search.views._get_user_orgs') - @mock.patch('openedx.core.djangoapps.content.search.views.get_access_ids_for_request') - @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + @patch('openedx.core.djangoapps.content.search.api._get_user_orgs') + @patch('openedx.core.djangoapps.content.search.api.get_access_ids_for_request') + @patch('openedx.core.djangoapps.content.search.api.MeilisearchClient') def test_studio_search_limits(self, mock_search_client, mock_get_access_ids, mock_get_user_orgs): """ Users with access to many courses/libraries or orgs will only be able to search content @@ -254,5 +264,5 @@ def test_studio_search_limits(self, mock_search_client, mock_get_access_ids, moc "filter": f"org IN {expected_user_orgs} OR access_id IN {expected_access_ids}", } }, - expires_at=mock.ANY, + expires_at=ANY, ) diff --git a/openedx/core/djangoapps/content/search/views.py b/openedx/core/djangoapps/content/search/views.py index 5b29a8db1b94..f747fe77e8b5 100644 --- a/openedx/core/djangoapps/content/search/views.py +++ b/openedx/core/djangoapps/content/search/views.py @@ -3,69 +3,19 @@ """ from __future__ import annotations -from datetime import datetime, timedelta, timezone import logging -from django.conf import settings from django.contrib.auth import get_user_model -import meilisearch from rest_framework.exceptions import NotFound -from rest_framework.request import Request from rest_framework.response import Response from rest_framework.views import APIView -from common.djangoapps.student.role_helpers import get_course_roles -from common.djangoapps.student.roles import GlobalStaff from openedx.core.lib.api.view_utils import view_auth_classes -from openedx.core.djangoapps.content.search.documents import STUDIO_INDEX_NAME -from openedx.core.djangoapps.content.search.models import get_access_ids_for_request + +from . import api User = get_user_model() log = logging.getLogger(__name__) -MAX_ACCESS_IDS_IN_FILTER = 1_000 -MAX_ORGS_IN_FILTER = 1_000 - - -def _get_meili_api_key_uid(): - """ - Helper method to get the UID of the API key we're using for Meilisearch - """ - if not hasattr(_get_meili_api_key_uid, "uid"): - client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - _get_meili_api_key_uid.uid = client.get_key(settings.MEILISEARCH_API_KEY).uid - return _get_meili_api_key_uid.uid - - -def _get_meili_access_filter(request: Request) -> dict: - """ - Return meilisearch filter based on the requesting user's permissions. - """ - # Global staff can see anything, so no filters required. - if GlobalStaff().has_user(request.user): - return {} - - # Everyone else is limited to their org staff roles... - user_orgs = _get_user_orgs(request)[:MAX_ORGS_IN_FILTER] - - # ...or the N most recent courses and libraries they can access. - access_ids = get_access_ids_for_request(request, omit_orgs=user_orgs)[:MAX_ACCESS_IDS_IN_FILTER] - return { - "filter": f"org IN {user_orgs} OR access_id IN {access_ids}", - } - - -def _get_user_orgs(request: Request) -> list[str]: - """ - Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole. - - Note: org-level roles have course_id=None to distinguish them from course-level roles. - """ - course_roles = get_course_roles(request.user) - return list(set( - role.org - for role in course_roles - if role.course_id is None and role.role in ['staff', 'instructor'] - )) @view_auth_classes(is_authenticated=True) @@ -78,25 +28,9 @@ def get(self, request): """ Give user details on how they can search studio content """ - if not settings.MEILISEARCH_ENABLED: + if not api.is_meilisearch_enabled(): raise NotFound("Meilisearch features are not enabled.") - client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) - index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME - # Create an API key that only allows the user to search content that they have permission to view: - expires_at = datetime.now(tz=timezone.utc) + timedelta(days=7) - search_rules = { - index_name: _get_meili_access_filter(request), - } - # Note: the following is just generating a JWT. It doesn't actually make an API call to Meilisearch. - restricted_api_key = client.generate_tenant_token( - api_key_uid=_get_meili_api_key_uid(), - search_rules=search_rules, - expires_at=expires_at, - ) + response_data = api.generate_user_token_for_studio_search(request) - return Response({ - "url": settings.MEILISEARCH_PUBLIC_URL, - "index_name": index_name, - "api_key": restricted_api_key, - }) + return Response(response_data) From f866545bb9a48ffe4e5ef0402fdc9819c4ed6f0c Mon Sep 17 00:00:00 2001 From: connorhaugh <49422820+connorhaugh@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:08:25 -0400 Subject: [PATCH 08/37] temp: trace celery tasks in dd (#34537) --- openedx/core/lib/celery/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/openedx/core/lib/celery/__init__.py b/openedx/core/lib/celery/__init__.py index 855970c1df3e..5536b4952316 100644 --- a/openedx/core/lib/celery/__init__.py +++ b/openedx/core/lib/celery/__init__.py @@ -19,6 +19,15 @@ from celery import Celery +# TEMP: This code will be removed by ARCH-BOM on 4/22/24 +# ddtrace allows celery task logs to be traced by the dd agent. +# TODO: remove this code. +try: + from ddtrace import patch + patch(celery=True) +except ImportError: + pass + # WARNING: Do not refer to this unless you are cms.celery or # lms.celery. See module docstring! APP = Celery('proj') From a24aa83f19d45688fe240fdd931da72dfa4f3b96 Mon Sep 17 00:00:00 2001 From: feanil Date: Thu, 18 Apr 2024 17:10:39 +0000 Subject: [PATCH 09/37] feat: Upgrade Python dependency edx-milestones Update to a Python 3.11 compatible version Commit generated by workflow `openedx/edx-platform/.github/workflows/upgrade-one-python-dependency.yml@refs/heads/master` --- requirements/edx/base.txt | 3 ++- requirements/edx/development.txt | 3 ++- requirements/edx/doc.txt | 3 ++- requirements/edx/testing.txt | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 24376a074c3c..be31d5de3f26 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -59,6 +59,7 @@ backports-zoneinfo[tzdata]==0.2.1 # via # celery # django + # edx-milestones # icalendar # kombu beautifulsoup4==4.12.3 @@ -493,7 +494,7 @@ edx-i18n-tools==1.5.0 # via # -r requirements/edx/bundled.in # ora2 -edx-milestones==0.5.0 +edx-milestones==0.6.0 # via -r requirements/edx/kernel.in edx-name-affirmation==2.3.7 # via -r requirements/edx/kernel.in diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index d02bc5fa6da8..33de62b8826f 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -117,6 +117,7 @@ backports-zoneinfo[tzdata]==0.2.1 # -r requirements/edx/testing.txt # celery # django + # edx-milestones # icalendar # kombu beautifulsoup4==4.12.3 @@ -777,7 +778,7 @@ edx-i18n-tools==1.5.0 # ora2 edx-lint==5.3.6 # via -r requirements/edx/testing.txt -edx-milestones==0.5.0 +edx-milestones==0.6.0 # via # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index 9f55649c011a..7c20363ce50e 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -83,6 +83,7 @@ backports-zoneinfo[tzdata]==0.2.1 # -r requirements/edx/base.txt # celery # django + # edx-milestones # icalendar # kombu beautifulsoup4==4.12.3 @@ -571,7 +572,7 @@ edx-i18n-tools==1.5.0 # via # -r requirements/edx/base.txt # ora2 -edx-milestones==0.5.0 +edx-milestones==0.6.0 # via -r requirements/edx/base.txt edx-name-affirmation==2.3.7 # via -r requirements/edx/base.txt diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 31fc58675695..60b8c52584be 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -83,6 +83,7 @@ backports-zoneinfo[tzdata]==0.2.1 # -r requirements/edx/base.txt # celery # django + # edx-milestones # icalendar # kombu beautifulsoup4==4.12.3 @@ -597,7 +598,7 @@ edx-i18n-tools==1.5.0 # ora2 edx-lint==5.3.6 # via -r requirements/edx/testing.in -edx-milestones==0.5.0 +edx-milestones==0.6.0 # via -r requirements/edx/base.txt edx-name-affirmation==2.3.7 # via -r requirements/edx/base.txt From ad00e35266e852db51502a1a4980c59286be902f Mon Sep 17 00:00:00 2001 From: feanil Date: Thu, 18 Apr 2024 17:00:56 +0000 Subject: [PATCH 10/37] feat: Upgrade Python dependency edx-drf-extensions Update to a Python 3.11 compatible version Commit generated by workflow `openedx/edx-platform/.github/workflows/upgrade-one-python-dependency.yml@refs/heads/master` --- requirements/edx/base.txt | 2 +- requirements/edx/development.txt | 2 +- requirements/edx/doc.txt | 2 +- requirements/edx/testing.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 24376a074c3c..1e5cd933dcf9 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -469,7 +469,7 @@ edx-django-utils==5.12.0 # openedx-events # ora2 # super-csv -edx-drf-extensions==10.2.0 +edx-drf-extensions==10.3.0 # via # -r requirements/edx/kernel.in # edx-completion diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index d02bc5fa6da8..dca6688af201 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -744,7 +744,7 @@ edx-django-utils==5.12.0 # openedx-events # ora2 # super-csv -edx-drf-extensions==10.2.0 +edx-drf-extensions==10.3.0 # via # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index 9f55649c011a..c2f898310ee6 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -547,7 +547,7 @@ edx-django-utils==5.12.0 # openedx-events # ora2 # super-csv -edx-drf-extensions==10.2.0 +edx-drf-extensions==10.3.0 # via # -r requirements/edx/base.txt # edx-completion diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 31fc58675695..883be396d46b 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -571,7 +571,7 @@ edx-django-utils==5.12.0 # openedx-events # ora2 # super-csv -edx-drf-extensions==10.2.0 +edx-drf-extensions==10.3.0 # via # -r requirements/edx/base.txt # edx-completion From b20ac9c5154fdf591c0088c34d61b8f52347910d Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Tue, 2 Apr 2024 14:43:52 -0400 Subject: [PATCH 11/37] fix: Be able to clear the process_cache manually in Python 3.11 Given code like the following ``` class Foo: @process_cached def bar(self): pass ``` In Python 3.8 referencing `bar` would not call its `__get__` method. ``` x = Foo().bar ``` However in Python 3.11, making the same call would call the `__get__` method, permanently replacing the underlying `process_cached` object with the partial function that references it. This meant that code to clear the cache would work in Python 3.8 but would break in 3.11 ``` Foo().bar.cache.clear() # Works in 3.8 but not in 3.11 ``` In 3.11 this results in the following error: ``` E AttributeError: 'functools.partial' object has no attribute 'cache' ``` To make this compatible in both version, we just add the cache as an accessible attribute on the partial we generate for our wrapped function. --- openedx/core/lib/cache_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openedx/core/lib/cache_utils.py b/openedx/core/lib/cache_utils.py index 1b3b65f378a4..b53aeedf884f 100644 --- a/openedx/core/lib/cache_utils.py +++ b/openedx/core/lib/cache_utils.py @@ -147,7 +147,10 @@ def __get__(self, obj, objtype): """ Support instance methods. """ - return functools.partial(self.__call__, obj) + partial = functools.partial(self.__call__, obj) + # Make the cache accessible on the wrapped object so it can be cleared if needed. + partial.cache = self.cache + return partial class CacheInvalidationManager: From 87b9c759f0c52f27e8db968ffe952a4bf1f5f670 Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Fri, 15 Mar 2024 15:30:01 -0400 Subject: [PATCH 12/37] fix: Provide a sequence to random.sample The sample function used to automatically convert sets to sequences but that is no longer supported starting in 3.11 so we have to do it manually. Reference: https://docs.python.org/3/library/random.html#random.sample --- xmodule/library_content_block.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xmodule/library_content_block.py b/xmodule/library_content_block.py index 4edcca5f00b6..6c8965186d51 100644 --- a/xmodule/library_content_block.py +++ b/xmodule/library_content_block.py @@ -221,7 +221,7 @@ def make_selection(cls, selected, children, max_count, mode): overlimit_block_keys = set() if len(selected_keys) > max_count: num_to_remove = len(selected_keys) - max_count - overlimit_block_keys = set(rand.sample(selected_keys, num_to_remove)) + overlimit_block_keys = set(rand.sample(list(selected_keys), num_to_remove)) selected_keys -= overlimit_block_keys # Do we have enough blocks now? @@ -233,7 +233,7 @@ def make_selection(cls, selected, children, max_count, mode): pool = valid_block_keys - selected_keys if mode == "random": num_to_add = min(len(pool), num_to_add) - added_block_keys = set(rand.sample(pool, num_to_add)) + added_block_keys = set(rand.sample(list(pool), num_to_add)) # We now have the correct n random children to show for this user. else: raise NotImplementedError("Unsupported mode.") From 08b3f0bf32c9a001c548b7612edbe90e10e4e21c Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Fri, 15 Mar 2024 15:16:18 -0400 Subject: [PATCH 13/37] fix: Create a bad unicode file differently. In Python 3.11 CSV files are allowed to have null characters so the test data is no longer a valid. We update it to not have a valid unicode character to still test this code path correctly. I'm not concerned about the fact that the files with null will get past this test beacause there are other checks on the content of the file that catch if it doesn't have enough or the right fields so this should be a safe change to make to the tests. Relevant Change in Python: https://github.com/python/cpython/issues/71767 --- lms/djangoapps/instructor/tests/test_api.py | 2 +- lms/djangoapps/instructor/tests/test_certificates.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lms/djangoapps/instructor/tests/test_api.py b/lms/djangoapps/instructor/tests/test_api.py index 00b0fc56b05f..870494900f89 100644 --- a/lms/djangoapps/instructor/tests/test_api.py +++ b/lms/djangoapps/instructor/tests/test_api.py @@ -713,7 +713,7 @@ def test_bad_file_upload_type(self): """ Try uploading some non-CSV file and verify that it is rejected """ - uploaded_file = SimpleUploadedFile("temp.csv", io.BytesIO(b"some initial binary data: \x00\x01").read()) + uploaded_file = SimpleUploadedFile("temp.csv", io.BytesIO(b"some initial binary data: \xC3\x01").read()) response = self.client.post(self.url, {'students_list': uploaded_file}) assert response.status_code == 200 data = json.loads(response.content.decode('utf-8')) diff --git a/lms/djangoapps/instructor/tests/test_certificates.py b/lms/djangoapps/instructor/tests/test_certificates.py index 9921ae6d6831..ce3433f312d8 100644 --- a/lms/djangoapps/instructor/tests/test_certificates.py +++ b/lms/djangoapps/instructor/tests/test_certificates.py @@ -888,7 +888,7 @@ def test_bad_file_upload_type(self): """ Try uploading CSV file with invalid binary data and verify that it is rejected """ - uploaded_file = SimpleUploadedFile("temp.csv", io.BytesIO(b"some initial binary data: \x00\x01").read()) + uploaded_file = SimpleUploadedFile("temp.csv", io.BytesIO(b"some initial binary data: \xC3\x01").read()) response = self.client.post(self.url, {'students_list': uploaded_file}) assert response.status_code == 200 data = json.loads(response.content.decode('utf-8')) From 884fe8ace9f3808cddbf6f262a63fa41c2d02acf Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Fri, 15 Mar 2024 13:01:57 -0400 Subject: [PATCH 14/37] fix: Fix function mocking. The way the patch decorator was being used is not supported in python 3.11. Use the patch decorator to auto generate the correct mock and make the test a bit more readabale. The new change is both 3.8 and 3.11 compatible. --- lms/djangoapps/instructor/tests/test_api.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lms/djangoapps/instructor/tests/test_api.py b/lms/djangoapps/instructor/tests/test_api.py index 870494900f89..0cb7e009454e 100644 --- a/lms/djangoapps/instructor/tests/test_api.py +++ b/lms/djangoapps/instructor/tests/test_api.py @@ -920,15 +920,16 @@ def test_users_created_and_enrolled_successfully_if_others_fail(self): manual_enrollments = ManualEnrollmentAudit.objects.all() assert manual_enrollments.count() == 2 - @patch('lms.djangoapps.instructor.views.api', 'generate_random_string', - Mock(side_effect=['first', 'first', 'second'])) def test_generate_unique_password_no_reuse(self): """ generate_unique_password should generate a unique password string that hasn't been generated before. """ - generated_password = ['first'] - password = generate_unique_password(generated_password, 12) - assert password != 'first' + with patch('lms.djangoapps.instructor.views.api.generate_random_string') as mock: + mock.side_effect = ['first', 'first', 'second'] + + generated_password = ['first'] + password = generate_unique_password(generated_password, 12) + assert password != 'first' @patch.dict(settings.FEATURES, {'ALLOW_AUTOMATED_SIGNUPS': False}) def test_allow_automated_signups_flag_not_set(self): From 6ea63da96916a58e5d27f290e106c5effac80a84 Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Fri, 15 Mar 2024 13:00:22 -0400 Subject: [PATCH 15/37] fix: Don't use the deprecated location for Hashable The Hashable object was moved in python 3.3 and support for the old location is dropped in python 3.10 the new location is available in python 3.8 so we can just update this and it should work with both python 3.8 and 3.11 https://docs.python.org/3.8/library/collections.html --- openedx/core/lib/cache_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx/core/lib/cache_utils.py b/openedx/core/lib/cache_utils.py index b53aeedf884f..c379ab2d961a 100644 --- a/openedx/core/lib/cache_utils.py +++ b/openedx/core/lib/cache_utils.py @@ -126,7 +126,7 @@ def __init__(self, func): self.cache = {} def __call__(self, *args): - if not isinstance(args, collections.Hashable): + if not isinstance(args, collections.abc.Hashable): # uncacheable. a list, for instance. # better to not cache than blow up. return self.func(*args) From 6fb59639af395156ec362ba00dcfcb24293b6120 Mon Sep 17 00:00:00 2001 From: Feanil Patel Date: Fri, 15 Mar 2024 12:29:26 -0400 Subject: [PATCH 16/37] fix: Remove deprecated getargspec call. This function was removed by python 3.11 so update to the alternate call that is the current recommended replacement. https://docs.python.org/3.11/library/inspect.html#inspect.getfullargspec --- xmodule/tests/xml/factories.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xmodule/tests/xml/factories.py b/xmodule/tests/xml/factories.py index 23a57898b6dc..a62f7b3f49bb 100644 --- a/xmodule/tests/xml/factories.py +++ b/xmodule/tests/xml/factories.py @@ -57,7 +57,7 @@ def __repr__(self): # Extract all argument names used to construct XmlImportData objects, # so that the factory doesn't treat them as XML attributes -XML_IMPORT_ARGS = inspect.getargspec(XmlImportData.__init__).args # lint-amnesty, pylint: disable=deprecated-method +XML_IMPORT_ARGS = inspect.getfullargspec(XmlImportData.__init__).args class XmlImportFactory(Factory): From e3d83eaccb42d734a51bf2ab2a05df7fb1c423ce Mon Sep 17 00:00:00 2001 From: katrinan029 Date: Thu, 18 Apr 2024 18:28:23 +0000 Subject: [PATCH 17/37] chore: version bump --- requirements/constraints.txt | 2 +- requirements/edx/base.txt | 2 +- requirements/edx/development.txt | 2 +- requirements/edx/doc.txt | 2 +- requirements/edx/testing.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 9c0484d69209..bf9b0f349665 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -23,7 +23,7 @@ click>=8.0,<9.0 # The team that owns this package will manually bump this package rather than having it pulled in automatically. # This is to allow them to better control its deployment and to do it in a process that works better # for them. -edx-enterprise==4.15.3 +edx-enterprise==4.15.7 # Stay on LTS version, remove once this is added to common constraint Django<5.0 diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 9993993862a3..868298662ce3 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -481,7 +481,7 @@ edx-drf-extensions==10.3.0 # edx-when # edxval # openedx-learning -edx-enterprise==4.15.3 +edx-enterprise==4.15.7 # via # -c requirements/edx/../constraints.txt # -r requirements/edx/kernel.in diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 68219982bfd0..ab395fab63c9 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -757,7 +757,7 @@ edx-drf-extensions==10.3.0 # edx-when # edxval # openedx-learning -edx-enterprise==4.15.3 +edx-enterprise==4.15.7 # via # -c requirements/edx/../constraints.txt # -r requirements/edx/doc.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index b669e32c2d14..efb2803d58fe 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -559,7 +559,7 @@ edx-drf-extensions==10.3.0 # edx-when # edxval # openedx-learning -edx-enterprise==4.15.3 +edx-enterprise==4.15.7 # via # -c requirements/edx/../constraints.txt # -r requirements/edx/base.txt diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 8579560a53d0..0b099338553a 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -583,7 +583,7 @@ edx-drf-extensions==10.3.0 # edx-when # edxval # openedx-learning -edx-enterprise==4.15.3 +edx-enterprise==4.15.7 # via # -c requirements/edx/../constraints.txt # -r requirements/edx/base.txt From de50f97d902f51467aba7987d3e35a864507e3e0 Mon Sep 17 00:00:00 2001 From: "Kyle D. McCormick" Date: Tue, 16 Apr 2024 16:30:10 -0400 Subject: [PATCH 18/37] build: replace wget->curl, so make upgrade works in tutor tutor's containers don't have wget installed, and curl -L works just as well and is installed into basically everything --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0bb53d1119f4..b459e9424daa 100644 --- a/Makefile +++ b/Makefile @@ -130,7 +130,7 @@ endef COMMON_CONSTRAINTS_TXT=requirements/common_constraints.txt .PHONY: $(COMMON_CONSTRAINTS_TXT) $(COMMON_CONSTRAINTS_TXT): - wget -O "$(@)" https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt + curl -L https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt > "$(@)" printf "$(COMMON_CONSTRAINTS_TEMP_COMMENT)" | cat - $(@) > temp && mv temp $(@) compile-requirements: export CUSTOM_COMPILE_COMMAND=make upgrade From a17e2c06fa52c4e96fb981fce0cba0948c26cb83 Mon Sep 17 00:00:00 2001 From: "Kyle D. McCormick" Date: Tue, 16 Apr 2024 16:05:12 -0400 Subject: [PATCH 19/37] refactor: remove requirements/edx-sandbox/shared.[in,txt] These files were used to assist the Python 3.5 -> 3.8 upgrade, but they are no longer needed nor referened anywhere. They haven't been updated for years. --- requirements/edx-sandbox/shared.in | 14 ------------ requirements/edx-sandbox/shared.txt | 34 ----------------------------- 2 files changed, 48 deletions(-) delete mode 100644 requirements/edx-sandbox/shared.in delete mode 100644 requirements/edx-sandbox/shared.txt diff --git a/requirements/edx-sandbox/shared.in b/requirements/edx-sandbox/shared.in deleted file mode 100644 index 5bcfd9ce712b..000000000000 --- a/requirements/edx-sandbox/shared.in +++ /dev/null @@ -1,14 +0,0 @@ -# Core dependencies shared between Python sandboxes for secured execution and edx-platform. -# -# DON'T JUST ADD NEW DEPENDENCIES!!! -# -# If you open a pull request that adds a new dependency, you should: -# * verify that the dependency has a license compatible with AGPLv3 -# * confirm that it has no system requirements beyond what we already install -# * run "make upgrade" to update the detailed requirements files - --c ../constraints.txt - -cryptography # Implementations of assorted cryptography algorithms -lxml # XML parser -nltk # Natural language processing; used by the chem package diff --git a/requirements/edx-sandbox/shared.txt b/requirements/edx-sandbox/shared.txt deleted file mode 100644 index 70794cbe57bf..000000000000 --- a/requirements/edx-sandbox/shared.txt +++ /dev/null @@ -1,34 +0,0 @@ -# -# This file is autogenerated by pip-compile -# To update, run: -# -# make upgrade -# -cffi==1.14.5 - # via cryptography -click==7.1.2 - # via - # -c requirements/edx-sandbox/../constraints.txt - # nltk -cryptography==3.2.1 - # via - # -c requirements/edx-sandbox/../constraints.txt - # -r requirements/edx-sandbox/shared.in -joblib==0.14.1 - # via - # -c requirements/edx-sandbox/../constraints.txt - # nltk -lxml==4.5.0 - # via - # -c requirements/edx-sandbox/../constraints.txt - # -r requirements/edx-sandbox/shared.in -nltk==3.6.2 - # via -r requirements/edx-sandbox/shared.in -pycparser==2.20 - # via cffi -regex==2021.4.4 - # via nltk -six==1.16.0 - # via cryptography -tqdm==4.61.0 - # via nltk From 7e96b32f6ab6856db3fb542aad0df8b50dd78bb9 Mon Sep 17 00:00:00 2001 From: "Kyle D. McCormick" Date: Tue, 16 Apr 2024 16:06:59 -0400 Subject: [PATCH 20/37] feat!: expose per-release edx-sandbox dependency pins See requirements/edx-sandbox/README.rst for more info BREAKING CHANGE: edx-sandbox/py38.txt will not longer be updated. Please install from either edx-sandbox/base.txt or edx-sandbox/releases/*.txt instead. --- Makefile | 2 +- requirements/edx-sandbox/README.rst | 59 +++++++++++++++++++ requirements/edx-sandbox/{py38.in => base.in} | 0 .../edx-sandbox/{py38.txt => base.txt} | 24 ++++---- requirements/edx-sandbox/releases/.gitignore | 0 5 files changed, 72 insertions(+), 13 deletions(-) create mode 100644 requirements/edx-sandbox/README.rst rename requirements/edx-sandbox/{py38.in => base.in} (100%) rename requirements/edx-sandbox/{py38.txt => base.txt} (74%) create mode 100644 requirements/edx-sandbox/releases/.gitignore diff --git a/Makefile b/Makefile index b459e9424daa..6fc019290088 100644 --- a/Makefile +++ b/Makefile @@ -110,7 +110,7 @@ shell: ## launch a bash shell in a Docker container with all edx-platform depend REQ_FILES = \ requirements/edx/coverage \ requirements/edx/paver \ - requirements/edx-sandbox/py38 \ + requirements/edx-sandbox/base \ requirements/edx/base \ requirements/edx/doc \ requirements/edx/testing \ diff --git a/requirements/edx-sandbox/README.rst b/requirements/edx-sandbox/README.rst new file mode 100644 index 000000000000..6129aa865b31 --- /dev/null +++ b/requirements/edx-sandbox/README.rst @@ -0,0 +1,59 @@ +edx-sandbox: a Python environment for sandboxed execution with CodeJail +####################################################################### + +The requirements in this directory describe a Python environment separate from +the general edx-platform environment. When correctly configured with +`CodeJail `_, edx-platform can use +it to execute untrusted code, particularly instructor-authored Python code +within ``