refactor: avoid fetching more data than we have to.

* get_library_collection_usage_key and searchable_doc_tags_for_collection do not need a Collection object; the usage key can be created from the library_key and collection_key. * updated searchable_doc_for_collection to require the parts of the collection usage key + an optional collection. This allows us to identify the collection's search document from its usage key without requiring an existing Collection object (in case it's been deleted). Also removes the edge case for indexing Collections not associated with a ContentLibrary -- this won't ever really happen.
open-craft · Sep 20, 2024 · e830adc · e830adc
1 parent 3f8acb7
commit e830adc
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 63 deletions.
diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
@@ -400,8 +400,8 @@ def index_collection_batch(batch, num_done, library_key) -> int:
             docs = []
             for collection in batch:
                 try:
-                    doc = searchable_doc_for_collection(collection)
-                    doc.update(searchable_doc_tags_for_collection(library_key, collection))
+                    doc = searchable_doc_for_collection(library_key, collection.key, collection=collection)
+                    doc.update(searchable_doc_tags_for_collection(library_key, collection.key))
                     docs.append(doc)
                 except Exception as err:  # pylint: disable=broad-except
                     status_cb(f"Error indexing collection {collection}: {err}")
@@ -565,13 +565,8 @@ def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collectio
     """
     Creates or updates the document for the given Library Collection in the search index
     """
-    content_library = lib_api.ContentLibrary.objects.get_by_key(library_key)
-    collection = authoring_api.get_collection(
-        learning_package_id=content_library.learning_package_id,
-        collection_key=collection_key,
-    )
     docs = [
-        searchable_doc_for_collection(collection)
+        searchable_doc_for_collection(library_key, collection_key)
     ]
 
     _update_index_docs(docs)
@@ -612,10 +607,8 @@ def upsert_collection_tags_index_docs(collection_usage_key: LibraryCollectionLoc
     """
     Updates the tags data in documents for the given library collection
     """
-    collection = lib_api.get_library_collection_from_usage_key(collection_usage_key)
 
-    doc = {Fields.id: collection.id}
-    doc.update(searchable_doc_tags_for_collection(collection_usage_key.library_key, collection))
+    doc = searchable_doc_tags_for_collection(collection_usage_key.library_key, collection_usage_key.collection_id)
     _update_index_docs([doc])
 
 

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
@@ -16,7 +16,7 @@
 from openedx.core.djangoapps.content_libraries import api as lib_api
 from openedx.core.djangoapps.content_tagging import api as tagging_api
 from openedx.core.djangoapps.xblock import api as xblock_api
-from openedx_learning.api.authoring_models import LearningPackage
+from openedx_learning.api.authoring_models import Collection
 
 log = logging.getLogger(__name__)
 
@@ -347,15 +347,15 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict:
 
 def searchable_doc_tags_for_collection(
     library_key: LibraryLocatorV2,
-    collection,
+    collection_key: str,
 ) -> dict:
     """
     Generate a dictionary document suitable for ingestion into a search engine
     like Meilisearch or Elasticsearch, with the tags data for the given library collection.
     """
     collection_usage_key = lib_api.get_library_collection_usage_key(
         library_key,
-        collection.key,
+        collection_key,
     )
     doc = _searchable_doc_for_usage_key(collection_usage_key)
     doc.update(_tags_for_content_object(collection_usage_key))
@@ -390,6 +390,9 @@ def searchable_doc_for_collection(
     Generate a dictionary document suitable for ingestion into a search engine
     like Meilisearch or Elasticsearch, so that the given collection can be
     found using faceted search.
+
+    If no collection is found for the given library_key + collection_key, the returned document will contain only basic
+    information derived from the collection usage key, and no Fields.type value will be included in the returned dict.
     """
     collection_usage_key = lib_api.get_library_collection_usage_key(
         library_key,
@@ -399,17 +402,27 @@ def searchable_doc_for_collection(
     doc = _searchable_doc_for_usage_key(collection_usage_key)
 
     try:
-        context_key = collection.learning_package.contentlibrary.library_key
-        org = str(context_key.org)
+        collection = collection or lib_api.get_library_collection_from_usage_key(collection_usage_key)
+    except lib_api.ContentLibraryCollectionNotFound:
+        # Collection not found, so we can only return the base doc
+        pass
+
+    if collection:
+        assert collection.key == collection_key
+
         doc.update({
-            Fields.context_key: str(context_key),
-            Fields.org: org,
+            Fields.context_key: str(library_key),
+            Fields.org: str(library_key.org),
             Fields.usage_key: str(collection_usage_key),
+            Fields.block_id: collection.key,
+            Fields.type: DocType.collection,
+            Fields.display_name: collection.title,
+            Fields.description: collection.description,
+            Fields.created: collection.created.timestamp(),
+            Fields.modified: collection.modified.timestamp(),
+            Fields.num_children: collection.entities.count(),
+            Fields.access_id: _meili_access_id_from_context_key(library_key),
+            Fields.breadcrumbs: [{"display_name": collection.learning_package.title}],
         })
-    except LearningPackage.contentlibrary.RelatedObjectDoesNotExist:
-        log.warning(f"Related library not found for {collection}")
-    doc[Fields.access_id] = _meili_access_id_from_context_key(doc[Fields.context_key])
-    # Add the breadcrumbs.
-    doc[Fields.breadcrumbs] = [{"display_name": collection.learning_package.title}]
 
     return doc
diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py
@@ -5,7 +5,6 @@
 from organizations.models import Organization
 
 from freezegun import freeze_time
-from openedx_learning.api import authoring as authoring_api
 
 from openedx.core.djangoapps.content_tagging import api as tagging_api
 from openedx.core.djangoapps.content_libraries import api as library_api
@@ -299,8 +298,8 @@ def test_html_library_block(self):
         }
 
     def test_collection_with_library(self):
-        doc = searchable_doc_for_collection(self.collection)
-        doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection))
+        doc = searchable_doc_for_collection(self.library.key, self.collection.key)
+        doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection.key))
 
         assert doc == {
             "id": "lib-collectionedx2012_falltoy_collection-d1d907a4",
@@ -321,33 +320,3 @@ def test_collection_with_library(self):
                 'level0': ['Difficulty > Normal']
             }
         }
-
-    def test_collection_with_no_library(self):
-        created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
-        with freeze_time(created_date):
-            learning_package = authoring_api.create_learning_package(
-                key="course-v1:edX+toy+2012_Fall",
-                title="some learning_package",
-                description="some description",
-            )
-            collection = authoring_api.create_collection(
-                learning_package_id=learning_package.id,
-                key="MYCOL",
-                title="my_collection",
-                created_by=None,
-                description="my collection description"
-            )
-        doc = searchable_doc_for_collection(collection)
-        assert doc == {
-            "id": collection.id,
-            "block_id": collection.key,
-            "type": "collection",
-            "display_name": "my_collection",
-            "description": "my collection description",
-            "num_children": 0,
-            "context_key": learning_package.key,
-            "access_id": self.toy_course_access_id,
-            "breadcrumbs": [{"display_name": "some learning_package"}],
-            "created": created_date.timestamp(),
-            "modified": created_date.timestamp(),
-        }
diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py
@@ -1237,17 +1237,10 @@ def update_library_collection_components(
 def get_library_collection_usage_key(
     library_key: LibraryLocatorV2,
     collection_key: str,
-    # As an optimization, callers may pass in a pre-fetched ContentLibrary instance
-    content_library: ContentLibrary | None = None,
 ) -> LibraryCollectionLocator:
     """
     Returns the LibraryCollectionLocator associated to a collection
     """
-    if not content_library:
-        content_library = ContentLibrary.objects.get_by_key(library_key)  # type: ignore[attr-defined]
-    assert content_library
-    assert content_library.learning_package_id
-    assert content_library.library_key == library_key
 
     return LibraryCollectionLocator(library_key, collection_key)