Skip to content

Commit

Permalink
refactor: avoid fetching more data than we have to.
Browse files Browse the repository at this point in the history
* get_library_collection_usage_key and
  searchable_doc_tags_for_collection do not need a Collection object;
  the usage key can be created from the library_key and collection_key.

* updated searchable_doc_for_collection to require the parts of the
  collection usage key + an optional collection. This allows us to
  identify the collection's search document from its usage key without
  requiring an existing Collection object (in case it's been deleted).
  Also removes the edge case for indexing Collections not associated
  with a ContentLibrary -- this won't ever really happen.
  • Loading branch information
pomegranited committed Sep 20, 2024
1 parent 3f8acb7 commit e830adc
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 63 deletions.
15 changes: 4 additions & 11 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,8 @@ def index_collection_batch(batch, num_done, library_key) -> int:
docs = []
for collection in batch:
try:
doc = searchable_doc_for_collection(collection)
doc.update(searchable_doc_tags_for_collection(library_key, collection))
doc = searchable_doc_for_collection(library_key, collection.key, collection=collection)
doc.update(searchable_doc_tags_for_collection(library_key, collection.key))
docs.append(doc)
except Exception as err: # pylint: disable=broad-except
status_cb(f"Error indexing collection {collection}: {err}")
Expand Down Expand Up @@ -565,13 +565,8 @@ def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collectio
"""
Creates or updates the document for the given Library Collection in the search index
"""
content_library = lib_api.ContentLibrary.objects.get_by_key(library_key)
collection = authoring_api.get_collection(
learning_package_id=content_library.learning_package_id,
collection_key=collection_key,
)
docs = [
searchable_doc_for_collection(collection)
searchable_doc_for_collection(library_key, collection_key)
]

_update_index_docs(docs)
Expand Down Expand Up @@ -612,10 +607,8 @@ def upsert_collection_tags_index_docs(collection_usage_key: LibraryCollectionLoc
"""
Updates the tags data in documents for the given library collection
"""
collection = lib_api.get_library_collection_from_usage_key(collection_usage_key)

doc = {Fields.id: collection.id}
doc.update(searchable_doc_tags_for_collection(collection_usage_key.library_key, collection))
doc = searchable_doc_tags_for_collection(collection_usage_key.library_key, collection_usage_key.collection_id)
_update_index_docs([doc])


Expand Down
37 changes: 25 additions & 12 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api
from openedx_learning.api.authoring_models import LearningPackage
from openedx_learning.api.authoring_models import Collection

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -347,15 +347,15 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict:

def searchable_doc_tags_for_collection(
library_key: LibraryLocatorV2,
collection,
collection_key: str,
) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the tags data for the given library collection.
"""
collection_usage_key = lib_api.get_library_collection_usage_key(
library_key,
collection.key,
collection_key,
)
doc = _searchable_doc_for_usage_key(collection_usage_key)
doc.update(_tags_for_content_object(collection_usage_key))
Expand Down Expand Up @@ -390,6 +390,9 @@ def searchable_doc_for_collection(
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, so that the given collection can be
found using faceted search.
If no collection is found for the given library_key + collection_key, the returned document will contain only basic
information derived from the collection usage key, and no Fields.type value will be included in the returned dict.
"""
collection_usage_key = lib_api.get_library_collection_usage_key(
library_key,
Expand All @@ -399,17 +402,27 @@ def searchable_doc_for_collection(
doc = _searchable_doc_for_usage_key(collection_usage_key)

try:
context_key = collection.learning_package.contentlibrary.library_key
org = str(context_key.org)
collection = collection or lib_api.get_library_collection_from_usage_key(collection_usage_key)
except lib_api.ContentLibraryCollectionNotFound:
# Collection not found, so we can only return the base doc
pass

if collection:
assert collection.key == collection_key

doc.update({
Fields.context_key: str(context_key),
Fields.org: org,
Fields.context_key: str(library_key),
Fields.org: str(library_key.org),
Fields.usage_key: str(collection_usage_key),
Fields.block_id: collection.key,
Fields.type: DocType.collection,
Fields.display_name: collection.title,
Fields.description: collection.description,
Fields.created: collection.created.timestamp(),
Fields.modified: collection.modified.timestamp(),
Fields.num_children: collection.entities.count(),
Fields.access_id: _meili_access_id_from_context_key(library_key),
Fields.breadcrumbs: [{"display_name": collection.learning_package.title}],
})
except LearningPackage.contentlibrary.RelatedObjectDoesNotExist:
log.warning(f"Related library not found for {collection}")
doc[Fields.access_id] = _meili_access_id_from_context_key(doc[Fields.context_key])
# Add the breadcrumbs.
doc[Fields.breadcrumbs] = [{"display_name": collection.learning_package.title}]

return doc
35 changes: 2 additions & 33 deletions openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from organizations.models import Organization

from freezegun import freeze_time
from openedx_learning.api import authoring as authoring_api

from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.content_libraries import api as library_api
Expand Down Expand Up @@ -299,8 +298,8 @@ def test_html_library_block(self):
}

def test_collection_with_library(self):
doc = searchable_doc_for_collection(self.collection)
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection))
doc = searchable_doc_for_collection(self.library.key, self.collection.key)
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection.key))

assert doc == {
"id": "lib-collectionedx2012_falltoy_collection-d1d907a4",
Expand All @@ -321,33 +320,3 @@ def test_collection_with_library(self):
'level0': ['Difficulty > Normal']
}
}

def test_collection_with_no_library(self):
created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
with freeze_time(created_date):
learning_package = authoring_api.create_learning_package(
key="course-v1:edX+toy+2012_Fall",
title="some learning_package",
description="some description",
)
collection = authoring_api.create_collection(
learning_package_id=learning_package.id,
key="MYCOL",
title="my_collection",
created_by=None,
description="my collection description"
)
doc = searchable_doc_for_collection(collection)
assert doc == {
"id": collection.id,
"block_id": collection.key,
"type": "collection",
"display_name": "my_collection",
"description": "my collection description",
"num_children": 0,
"context_key": learning_package.key,
"access_id": self.toy_course_access_id,
"breadcrumbs": [{"display_name": "some learning_package"}],
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
}
7 changes: 0 additions & 7 deletions openedx/core/djangoapps/content_libraries/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1237,17 +1237,10 @@ def update_library_collection_components(
def get_library_collection_usage_key(
library_key: LibraryLocatorV2,
collection_key: str,
# As an optimization, callers may pass in a pre-fetched ContentLibrary instance
content_library: ContentLibrary | None = None,
) -> LibraryCollectionLocator:
"""
Returns the LibraryCollectionLocator associated to a collection
"""
if not content_library:
content_library = ContentLibrary.objects.get_by_key(library_key) # type: ignore[attr-defined]
assert content_library
assert content_library.learning_package_id
assert content_library.library_key == library_key

return LibraryCollectionLocator(library_key, collection_key)

Expand Down

0 comments on commit e830adc

Please sign in to comment.