diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py index 9d0a2379a0bb..443bae2b1a65 100644 --- a/openedx/core/djangoapps/content/search/api.py +++ b/openedx/core/djangoapps/content/search/api.py @@ -230,68 +230,76 @@ def _configure_index(index_name): # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique): client.index(index_name).update_distinct_attribute(Fields.usage_key) # Mark which attributes can be used for filtering/faceted search: - client.index(index_name).update_filterable_attributes([ - # Get specific block/collection using combination of block_id and context_key - Fields.block_id, - Fields.block_type, - Fields.context_key, - Fields.usage_key, - Fields.org, - Fields.tags, - Fields.tags + "." + Fields.tags_taxonomy, - Fields.tags + "." + Fields.tags_level0, - Fields.tags + "." + Fields.tags_level1, - Fields.tags + "." + Fields.tags_level2, - Fields.tags + "." + Fields.tags_level3, - Fields.collections, - Fields.collections + "." + Fields.collections_display_name, - Fields.collections + "." + Fields.collections_key, - Fields.type, - Fields.access_id, - Fields.last_published, - Fields.content + "." + Fields.problem_types, - ]) + client.index(index_name).update_filterable_attributes( + [ + # Get specific block/collection using combination of block_id and context_key + Fields.block_id, + Fields.block_type, + Fields.context_key, + Fields.usage_key, + Fields.org, + Fields.tags, + Fields.tags + "." + Fields.tags_taxonomy, + Fields.tags + "." + Fields.tags_level0, + Fields.tags + "." + Fields.tags_level1, + Fields.tags + "." + Fields.tags_level2, + Fields.tags + "." + Fields.tags_level3, + Fields.collections, + Fields.collections + "." + Fields.collections_display_name, + Fields.collections + "." + Fields.collections_key, + Fields.type, + Fields.access_id, + Fields.last_published, + Fields.content + "." + Fields.problem_types, + ] + ) # Mark which attributes are used for keyword search, in order of importance: - client.index(index_name).update_searchable_attributes([ - # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields. - Fields.display_name, - Fields.block_id, - Fields.content, - Fields.description, - Fields.tags, - Fields.collections, - # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they - # are searchable only if at least one document in the index has a value. If we didn't list them here and, - # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for - # these sub-fields: "Attribute `tags.level3` is not searchable." - Fields.tags + "." + Fields.tags_taxonomy, - Fields.tags + "." + Fields.tags_level0, - Fields.tags + "." + Fields.tags_level1, - Fields.tags + "." + Fields.tags_level2, - Fields.tags + "." + Fields.tags_level3, - Fields.collections + "." + Fields.collections_display_name, - Fields.collections + "." + Fields.collections_key, - Fields.published + "." + Fields.display_name, - Fields.published + "." + Fields.published_description, - ]) + client.index(index_name).update_searchable_attributes( + [ + # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields. + Fields.display_name, + Fields.block_id, + Fields.content, + Fields.description, + Fields.tags, + Fields.collections, + # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they + # are searchable only if at least one document in the index has a value. If we didn't list them here and, + # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for + # these sub-fields: "Attribute `tags.level3` is not searchable." + Fields.tags + "." + Fields.tags_taxonomy, + Fields.tags + "." + Fields.tags_level0, + Fields.tags + "." + Fields.tags_level1, + Fields.tags + "." + Fields.tags_level2, + Fields.tags + "." + Fields.tags_level3, + Fields.collections + "." + Fields.collections_display_name, + Fields.collections + "." + Fields.collections_key, + Fields.published + "." + Fields.display_name, + Fields.published + "." + Fields.published_description, + ] + ) # Mark which attributes can be used for sorting search results: - client.index(index_name).update_sortable_attributes([ - Fields.display_name, - Fields.created, - Fields.modified, - Fields.last_published, - ]) + client.index(index_name).update_sortable_attributes( + [ + Fields.display_name, + Fields.created, + Fields.modified, + Fields.last_published, + ] + ) # Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance. # cf https://www.meilisearch.com/docs/learn/core_concepts/relevancy - client.index(index_name).update_ranking_rules([ - "sort", - "words", - "typo", - "proximity", - "attribute", - "exactness", - ]) + client.index(index_name).update_ranking_rules( + [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness", + ] + ) def _recurse_children(block, fn, status_cb: Callable[[str], None] | None = None) -> None: @@ -357,6 +365,9 @@ def is_meilisearch_enabled() -> bool: def reset_index(status_cb: Callable[[str], None] | None = None) -> None: + """ + Reset the Meilisearch index, deleting all documents and reconfiguring it + """ if status_cb is None: status_cb = log.info @@ -368,19 +379,25 @@ def reset_index(status_cb: Callable[[str], None] | None = None) -> None: def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable[[str], None] | None = None) -> None: + """ + Initialize the Meilisearch index, creating it and configuring it if it doesn't exist + """ if status_cb is None: status_cb = log.info if warn_cb is None: warn_cb = log.warning if _index_exists(STUDIO_INDEX_NAME): - warn_cb("A rebuild of the index is required. Please run ./manage.py cms reindex_studio --experimental [--incremental]") + warn_cb( + "A rebuild of the index is required. Please run ./manage.py cms reindex_studio" + " --experimental [--incremental]" + ) return reset_index(status_cb) -def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=False) -> None: +def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=False) -> None: # lint-amnesty, pylint: disable=too-many-statements """ Rebuild the Meilisearch index from scratch """ @@ -394,10 +411,10 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa status_cb("Counting libraries...") keys_indexed = [] if incremental: - keys_indexed = list(IncrementalIndexCompleted.objects.values_list('context_key', flat=True)) + keys_indexed = list(IncrementalIndexCompleted.objects.values_list("context_key", flat=True)) lib_keys = [ lib.library_key - for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug').order_by('-id') + for lib in lib_api.ContentLibrary.objects.select_related("org").only("org", "slug").order_by("-id") if lib.library_key not in keys_indexed ] num_libraries = len(lib_keys) diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py index 9fa2b30ea87a..e06070927b7d 100644 --- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py +++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py @@ -18,10 +18,10 @@ class Command(BaseCommand): """ def add_arguments(self, parser): - parser.add_argument('--experimental', action='store_true') - parser.add_argument('--reset', action='store_true') - parser.add_argument('--init', action='store_true') - parser.add_argument('--incremental', action='store_true') + parser.add_argument("--experimental", action="store_true") + parser.add_argument("--reset", action="store_true") + parser.add_argument("--init", action="store_true") + parser.add_argument("--incremental", action="store_true") parser.set_defaults(experimental=False, reset=False, init=False, incremental=False) def handle(self, *args, **options): diff --git a/openedx/core/djangoapps/content/search/models.py b/openedx/core/djangoapps/content/search/models.py index 91e12affc326..6fa53ef17b34 100644 --- a/openedx/core/djangoapps/content/search/models.py +++ b/openedx/core/djangoapps/content/search/models.py @@ -71,6 +71,9 @@ class IncrementalIndexCompleted(models.Model): """ Stores the contex keys of aleady indexed courses and libraries for incremental indexing. """ + context_key = LearningContextKeyField( - max_length=255, unique=True, null=False, + max_length=255, + unique=True, + null=False, ) diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py index 3949021953a4..6c46afb22b44 100644 --- a/openedx/core/djangoapps/content/search/tests/test_api.py +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -18,7 +18,6 @@ from organizations.tests.factories import OrganizationFactory from common.djangoapps.student.tests.factories import UserFactory -from openedx.core.djangoapps.content.search.models import IncrementalIndexCompleted from openedx.core.djangoapps.content_libraries import api as library_api from openedx.core.djangoapps.content_tagging import api as tagging_api from openedx.core.djangoapps.content.course_overviews.api import CourseOverview @@ -29,7 +28,7 @@ try: # This import errors in the lms because content.search is not an installed app there. from .. import api - from ..models import SearchAccess + from ..models import SearchAccess, IncrementalIndexCompleted except RuntimeError: SearchAccess = {} @@ -249,10 +248,10 @@ def test_reindex_meilisearch_incremental(self, mock_meilisearch): doc_vertical["tags"] = {} doc_problem1 = copy.deepcopy(self.doc_problem1) doc_problem1["tags"] = {} - doc_problem1["collections"] = {'display_name': [], 'key': []} + doc_problem1["collections"] = {"display_name": [], "key": []} doc_problem2 = copy.deepcopy(self.doc_problem2) doc_problem2["tags"] = {} - doc_problem2["collections"] = {'display_name': [], 'key': []} + doc_problem2["collections"] = {"display_name": [], "key": []} doc_collection = copy.deepcopy(self.collection_dict) doc_collection["tags"] = {} @@ -267,18 +266,22 @@ def test_reindex_meilisearch_incremental(self, mock_meilisearch): any_order=True, ) - # Now we simulate interruption by patching _wait_for_meili_task to raise an exception - def simulated_interruption(): - yield - yield - raise Exception("Simulated interruption") - with patch("openedx.core.djangoapps.content.search.api._wait_for_meili_task", side_effect=simulated_interruption()): - with pytest.raises(Exception, match="Simulated interruption"): - api.rebuild_index(incremental=True) + # Now we simulate interruption by passing this function to the status_cb argument + def simulated_interruption(message): + # this exception prevents courses from being indexed + if "Indexing courses" in message: + raise Exception("Simulated interruption") + + with pytest.raises(Exception, match="Simulated interruption"): + api.rebuild_index(simulated_interruption, incremental=True) + + # two more calls due to collections + assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 5 assert IncrementalIndexCompleted.objects.all().count() == 1 api.rebuild_index(incremental=True) assert IncrementalIndexCompleted.objects.all().count() == 0 - assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 7 + # one missing course indexed + assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 6 @override_settings(MEILISEARCH_ENABLED=True) def test_reset_meilisearch_index(self, mock_meilisearch): @@ -297,9 +300,11 @@ def test_init_meilisearch_index(self, mock_meilisearch): mock_meilisearch.return_value.delete_index.assert_not_called() mock_meilisearch.return_value.get_index.side_effect = [ - MeilisearchApiError("Testing reindex", Mock(code="index_not_found", text=None)), - MeilisearchApiError("Testing reindex", Mock(code="index_not_found", text=None)), - Mock(), + MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')), + MeilisearchApiError("Testing reindex", Mock(text='{"code":"index_not_found"}')), + Mock(created_at=1), + Mock(created_at=1), + Mock(created_at=1), ] api.init_index() mock_meilisearch.return_value.swap_indexes.assert_called_once()