Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: update search index when course content is updated [FC-0040] #34391

Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f514257
feat: update search index when course content is updated
rpenido Mar 29, 2024
b834d39
feat: add celery retry to search tasks
rpenido Apr 3, 2024
814d0a6
fix: use log.info if status callback not defined
rpenido Apr 5, 2024
bb7ac37
fix: update docstring and reduce sleep time
rpenido Apr 5, 2024
d2c3c53
refactor: remove update_metadata and update_tags parameters
rpenido Apr 5, 2024
1c498b8
refactor: rename generate_user_token to generate_user_token_for_studi…
rpenido Apr 5, 2024
8cd8f65
docs: fix docstring
rpenido Apr 5, 2024
e3bc29d
refactor: rename index name constants
rpenido Apr 5, 2024
0c20792
refactor: create _update_index_docs helper
rpenido Apr 5, 2024
c8d2651
fix: increase rebuild lock time and throws if rebuild already in prog…
rpenido Apr 5, 2024
8e64b13
docs: add docstring for status_cb
rpenido Apr 5, 2024
3515a2b
style: fix pylint
rpenido Apr 5, 2024
7983bfc
Merge branch 'master' into rpenido/fal-3690-update-search-index-when-…
rpenido Apr 8, 2024
2f165bc
Merge branch 'master' into rpenido/fal-3690-update-search-index-when-…
rpenido Apr 9, 2024
3b6e475
Merge branch 'master' into rpenido/fal-3690-update-search-index-when-…
bradenmacdonald Apr 9, 2024
612f32f
fix: error in library crash reindex
rpenido Apr 16, 2024
cbf4a46
Merge branch 'master' into rpenido/fal-3690-update-search-index-when-…
rpenido Apr 17, 2024
670c04a
fix: fix code and tests after merge
rpenido Apr 17, 2024
58f8ff4
style: fix pylint
rpenido Apr 17, 2024
66b1b71
feat: update meilisearch configuration needed for tags filter
rpenido Apr 17, 2024
83e1d27
fix: conditional import
rpenido Apr 17, 2024
17e1e53
style: fix pylint
rpenido Apr 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
457 changes: 457 additions & 0 deletions openedx/core/djangoapps/content/search/api.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ Decision
new ``content/search`` Django app, so it's relatively easy to swap out later
if this experiment doesn't pan out.
4. We will not use ``edx-search`` for the new search functionality.
5. For the experiment, we won't use Meilisearch during tests, but we expect to
add that in the future if we move forward with replacing Elasticsearch completely.


Consequences
Expand Down
60 changes: 39 additions & 21 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
Utilities related to indexing content for search
"""
from __future__ import annotations
from hashlib import blake2b

import logging
from hashlib import blake2b

from django.utils.text import slugify
from opaque_keys.edx.keys import UsageKey, LearningContextKey
from opaque_keys.edx.keys import LearningContextKey, UsageKey

from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api

log = logging.getLogger(__name__)
STUDIO_INDEX_NAME = "studio_content"


class Fields:
Expand Down Expand Up @@ -62,7 +62,7 @@ class DocType:
library_block = "library_block"


def _meili_id_from_opaque_key(usage_key: UsageKey) -> str:
def meili_id_from_opaque_key(usage_key: UsageKey) -> str:
"""
Meilisearch requires each document to have a primary key that's either an
integer or a string composed of alphanumeric characters (a-z A-Z 0-9),
Expand All @@ -88,7 +88,6 @@ class implementation returns only:
{"content": {"display_name": "..."}, "content_type": "..."}
"""
block_data = {
Fields.id: _meili_id_from_opaque_key(block.usage_key),
Fields.usage_key: str(block.usage_key),
Fields.block_id: str(block.usage_key.block_id),
Fields.display_name: xblock_api.get_block_display_name(block),
Expand Down Expand Up @@ -160,7 +159,7 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
# Note that we could improve performance for indexing many components from the same library/course,
# if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
# tags for each component separately.
all_tags = tagging_api.get_object_tags(object_id).all()
all_tags = tagging_api.get_object_tags(str(object_id)).all()
if not all_tags:
return {}
result = {
Expand All @@ -170,10 +169,10 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
}
for obj_tag in all_tags:
# Add the taxonomy name:
if obj_tag.name not in result[Fields.tags_taxonomy]:
result[Fields.tags_taxonomy].append(obj_tag.name)
if obj_tag.taxonomy.name not in result[Fields.tags_taxonomy]:
result[Fields.tags_taxonomy].append(obj_tag.taxonomy.name)
# Taxonomy name plus each level of tags, in a list:
parts = [obj_tag.name] + obj_tag.get_lineage() # e.g. ["Location", "North America", "Canada", "Vancouver"]
parts = [obj_tag.taxonomy.name] + obj_tag.get_lineage() # e.g. ["Location", "North America", "Canada"]
parts = [part.replace(" > ", " _ ") for part in parts] # Escape our separator.
# Now we build each level (tags.level0, tags.level1, etc.) as applicable.
# We have a hard-coded limit of 4 levels of tags for now (see Fields.tags above).
Expand All @@ -196,23 +195,38 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
return {Fields.tags: result}


def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict:
def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, so that the given library block can be
found using faceted search.
"""
library_name = lib_api.get_library(metadata.usage_key.context_key).title
doc = {}
try:
block = xblock_api.load_block(metadata.usage_key, user=None)
except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to load XBlock {metadata.usage_key}: {err}")
library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
block = xblock_api.load_block(xblock_metadata.usage_key, user=None)

doc = {
Fields.id: meili_id_from_opaque_key(xblock_metadata.usage_key),
Fields.type: DocType.library_block,
}

doc.update(_fields_from_block(block))
doc.update(_tags_for_content_object(metadata.usage_key))
doc[Fields.type] = DocType.library_block

# Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here:
doc[Fields.breadcrumbs] = [{"display_name": library_name}]

return doc


def searchable_doc_tags(usage_key: UsageKey) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the tags data for the given content object.
"""
doc = {
Fields.id: meili_id_from_opaque_key(usage_key),
}
doc.update(_tags_for_content_object(usage_key))

return doc


Expand All @@ -222,7 +236,11 @@ def searchable_doc_for_course_block(block) -> dict:
like Meilisearch or Elasticsearch, so that the given course block can be
found using faceted search.
"""
doc = _fields_from_block(block)
doc.update(_tags_for_content_object(block.usage_key))
doc[Fields.type] = DocType.course_block
doc = {
Fields.id: meili_id_from_opaque_key(block.usage_key),
Fields.type: DocType.course_block,
}

doc.update(_fields_from_block(block))

return doc
117 changes: 117 additions & 0 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
Handlers for content indexing
"""

import logging

from django.dispatch import receiver
from openedx_events.content_authoring.data import ContentLibraryData, LibraryBlockData, XBlockData
from openedx_events.content_authoring.signals import (
CONTENT_LIBRARY_UPDATED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_DELETED,
XBLOCK_CREATED,
XBLOCK_DELETED,
XBLOCK_UPDATED
)

from .api import only_if_meilisearch_enabled
from .tasks import (
delete_library_block_index_doc,
delete_xblock_index_doc,
update_content_library_index_docs,
upsert_library_block_index_doc,
upsert_xblock_index_doc
)

log = logging.getLogger(__name__)


@receiver(XBLOCK_CREATED)
@only_if_meilisearch_enabled
def xblock_created_handler(**kwargs) -> None:
"""
Create the index for the XBlock
"""
xblock_info = kwargs.get("xblock_info", None)
if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

upsert_xblock_index_doc.delay(
str(xblock_info.usage_key),
recursive=False,
)


@receiver(XBLOCK_UPDATED)
@only_if_meilisearch_enabled
def xblock_updated_handler(**kwargs) -> None:
"""
Update the index for the XBlock and its children
"""
xblock_info = kwargs.get("xblock_info", None)
if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

upsert_xblock_index_doc.delay(
str(xblock_info.usage_key),
recursive=True, # Update all children because the breadcrumb may have changed
)


@receiver(XBLOCK_DELETED)
@only_if_meilisearch_enabled
def xblock_deleted_handler(**kwargs) -> None:
"""
Delete the index for the XBlock
"""
xblock_info = kwargs.get("xblock_info", None)
if not xblock_info or not isinstance(xblock_info, XBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

delete_xblock_index_doc.delay(str(xblock_info.usage_key))


@receiver(LIBRARY_BLOCK_CREATED)
@only_if_meilisearch_enabled
def library_block_updated_handler(**kwargs) -> None:
"""
Create or update the index for the content library block
"""
library_block_data = kwargs.get("library_block", None)
if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

upsert_library_block_index_doc.delay(str(library_block_data.usage_key))


@receiver(LIBRARY_BLOCK_DELETED)
@only_if_meilisearch_enabled
def library_block_deleted(**kwargs) -> None:
"""
Delete the index for the content library block
"""
library_block_data = kwargs.get("library_block", None)
if not library_block_data or not isinstance(library_block_data, LibraryBlockData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

delete_library_block_index_doc.delay(str(library_block_data.usage_key))


@receiver(CONTENT_LIBRARY_UPDATED)
@only_if_meilisearch_enabled
def content_library_updated_handler(**kwargs) -> None:
"""
Update the index for the content library
"""
content_library_data = kwargs.get("content_library", None)
if not content_library_data or not isinstance(content_library_data, ContentLibraryData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

update_content_library_index_docs.delay(str(content_library_data.library_key))

This file was deleted.

Loading
Loading