Skip to content

Commit

Permalink
feat: update Studio search index when course content is updated (#34391)
Browse files Browse the repository at this point in the history
  • Loading branch information
rpenido authored Apr 18, 2024
1 parent f18629e commit 90b253a
Show file tree
Hide file tree
Showing 12 changed files with 1,157 additions and 358 deletions.
511 changes: 511 additions & 0 deletions openedx/core/djangoapps/content/search/api.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ Decision
new ``content/search`` Django app, so it's relatively easy to swap out later
if this experiment doesn't pan out.
4. We will not use ``edx-search`` for the new search functionality.
5. For the experiment, we won't use Meilisearch during tests, but we expect to
add that in the future if we move forward with replacing Elasticsearch completely.


Consequences
Expand Down
54 changes: 36 additions & 18 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@
Utilities related to indexing content for search
"""
from __future__ import annotations
from hashlib import blake2b

import logging
from hashlib import blake2b

from django.utils.text import slugify
from opaque_keys.edx.keys import UsageKey, LearningContextKey
from opaque_keys.edx.keys import LearningContextKey, UsageKey

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api

log = logging.getLogger(__name__)
STUDIO_INDEX_NAME = "studio_content"


class Fields:
Expand Down Expand Up @@ -64,7 +64,7 @@ class DocType:
library_block = "library_block"


def _meili_id_from_opaque_key(usage_key: UsageKey) -> str:
def meili_id_from_opaque_key(usage_key: UsageKey) -> str:
"""
Meilisearch requires each document to have a primary key that's either an
integer or a string composed of alphanumeric characters (a-z A-Z 0-9),
Expand Down Expand Up @@ -98,7 +98,6 @@ class implementation returns only:
{"content": {"display_name": "..."}, "content_type": "..."}
"""
block_data = {
Fields.id: _meili_id_from_opaque_key(block.usage_key),
Fields.usage_key: str(block.usage_key),
Fields.block_id: str(block.usage_key.block_id),
Fields.display_name: xblock_api.get_block_display_name(block),
Expand Down Expand Up @@ -171,7 +170,7 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
# Note that we could improve performance for indexing many components from the same library/course,
# if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
# tags for each component separately.
all_tags = tagging_api.get_object_tags(object_id).all()
all_tags = tagging_api.get_object_tags(str(object_id)).all()
if not all_tags:
return {}
result = {
Expand Down Expand Up @@ -207,23 +206,38 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
return {Fields.tags: result}


def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict:
def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, so that the given library block can be
found using faceted search.
"""
library_name = lib_api.get_library(metadata.usage_key.context_key).title
doc = {}
try:
block = xblock_api.load_block(metadata.usage_key, user=None)
except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to load XBlock {metadata.usage_key}: {err}")
library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
block = xblock_api.load_block(xblock_metadata.usage_key, user=None)

doc = {
Fields.id: meili_id_from_opaque_key(xblock_metadata.usage_key),
Fields.type: DocType.library_block,
}

doc.update(_fields_from_block(block))
doc.update(_tags_for_content_object(metadata.usage_key))
doc[Fields.type] = DocType.library_block

# Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here:
doc[Fields.breadcrumbs] = [{"display_name": library_name}]

return doc


def searchable_doc_tags(usage_key: UsageKey) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the tags data for the given content object.
"""
doc = {
Fields.id: meili_id_from_opaque_key(usage_key),
}
doc.update(_tags_for_content_object(usage_key))

return doc


Expand All @@ -233,7 +247,11 @@ def searchable_doc_for_course_block(block) -> dict:
like Meilisearch or Elasticsearch, so that the given course block can be
found using faceted search.
"""
doc = _fields_from_block(block)
doc.update(_tags_for_content_object(block.usage_key))
doc[Fields.type] = DocType.course_block
doc = {
Fields.id: meili_id_from_opaque_key(block.usage_key),
Fields.type: DocType.course_block,
}

doc.update(_fields_from_block(block))

return doc
116 changes: 114 additions & 2 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,36 @@
"""
Signal/event handlers for content search
"""

import logging

from django.db.models.signals import post_delete
from django.dispatch import receiver
from openedx_events.content_authoring.data import ContentLibraryData
from openedx_events.content_authoring.signals import CONTENT_LIBRARY_DELETED
from openedx_events.content_authoring.data import ContentLibraryData, LibraryBlockData, XBlockData
from openedx_events.content_authoring.signals import (
CONTENT_LIBRARY_DELETED,
CONTENT_LIBRARY_UPDATED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_DELETED,
XBLOCK_CREATED,
XBLOCK_DELETED,
XBLOCK_UPDATED
)

from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import SearchAccess

from .api import only_if_meilisearch_enabled
from .tasks import (
delete_library_block_index_doc,
delete_xblock_index_doc,
update_content_library_index_docs,
upsert_library_block_index_doc,
upsert_xblock_index_doc
)

log = logging.getLogger(__name__)


# Using post_delete here because there is no COURSE_DELETED event defined.
@receiver(post_delete, sender=CourseOverview)
Expand All @@ -21,3 +43,93 @@ def delete_course_search_access(sender, instance, **kwargs): # pylint: disable=
def delete_library_search_access(content_library: ContentLibraryData, **kwargs):
"""Deletes the SearchAccess instance for deleted content libraries"""
SearchAccess.objects.filter(context_key=content_library.library_key).delete()


@receiver(XBLOCK_CREATED)
@only_if_meilisearch_enabled
def xblock_created_handler(**kwargs) -> None:
"""
Create the index for the XBlock
"""
xblock_info = kwargs.get("xblock_info", None)
if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

upsert_xblock_index_doc.delay(
str(xblock_info.usage_key),
recursive=False,
)


@receiver(XBLOCK_UPDATED)
@only_if_meilisearch_enabled
def xblock_updated_handler(**kwargs) -> None:
"""
Update the index for the XBlock and its children
"""
xblock_info = kwargs.get("xblock_info", None)
if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

upsert_xblock_index_doc.delay(
str(xblock_info.usage_key),
recursive=True, # Update all children because the breadcrumb may have changed
)


@receiver(XBLOCK_DELETED)
@only_if_meilisearch_enabled
def xblock_deleted_handler(**kwargs) -> None:
"""
Delete the index for the XBlock
"""
xblock_info = kwargs.get("xblock_info", None)
if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

delete_xblock_index_doc.delay(str(xblock_info.usage_key))


@receiver(LIBRARY_BLOCK_CREATED)
@only_if_meilisearch_enabled
def library_block_updated_handler(**kwargs) -> None:
"""
Create or update the index for the content library block
"""
library_block_data = kwargs.get("library_block", None)
if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

upsert_library_block_index_doc.delay(str(library_block_data.usage_key))


@receiver(LIBRARY_BLOCK_DELETED)
@only_if_meilisearch_enabled
def library_block_deleted(**kwargs) -> None:
"""
Delete the index for the content library block
"""
library_block_data = kwargs.get("library_block", None)
if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

delete_library_block_index_doc.delay(str(library_block_data.usage_key))


@receiver(CONTENT_LIBRARY_UPDATED)
@only_if_meilisearch_enabled
def content_library_updated_handler(**kwargs) -> None:
"""
Update the index for the content library
"""
content_library_data = kwargs.get("content_library", None)
if not content_library_data or not isinstance(content_library_data, ContentLibraryData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

update_content_library_index_docs.delay(str(content_library_data.library_key))

This file was deleted.

Loading

0 comments on commit 90b253a

Please sign in to comment.