From f663739510305bd01ab3b8da34fb5e494921073e Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Fri, 22 Mar 2024 10:08:31 -0700 Subject: [PATCH] feat: Index Studio content using Meilisearch [experimental] (#34310) --- cms/envs/common.py | 18 ++ cms/urls.py | 4 + .../djangoapps/content/search/__init__.py | 0 .../core/djangoapps/content/search/apps.py | 16 ++ .../docs/decisions/0001-meilisearch.rst | 151 ++++++++++++ .../djangoapps/content/search/documents.py | 228 ++++++++++++++++++ .../djangoapps/content/search/handlers.py | 0 .../content/search/management/__init__.py | 0 .../search/management/commands/__init__.py | 0 .../search/management/commands/meili_mixin.py | 105 ++++++++ .../management/commands/reindex_studio.py | 159 ++++++++++++ .../content/search/tests/__init__.py | 0 .../content/search/tests/test_documents.py | 149 ++++++++++++ .../content/search/tests/test_views.py | 82 +++++++ .../core/djangoapps/content/search/urls.py | 11 + .../core/djangoapps/content/search/views.py | 71 ++++++ .../core/djangoapps/content_tagging/api.py | 3 + openedx/core/djangoapps/xblock/api.py | 2 +- requirements/edx/base.txt | 14 ++ requirements/edx/development.txt | 18 +- requirements/edx/doc.txt | 22 ++ requirements/edx/kernel.in | 1 + requirements/edx/testing.txt | 24 +- 23 files changed, 1070 insertions(+), 8 deletions(-) create mode 100644 openedx/core/djangoapps/content/search/__init__.py create mode 100644 openedx/core/djangoapps/content/search/apps.py create mode 100644 openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst create mode 100644 openedx/core/djangoapps/content/search/documents.py create mode 100644 openedx/core/djangoapps/content/search/handlers.py create mode 100644 openedx/core/djangoapps/content/search/management/__init__.py create mode 100644 openedx/core/djangoapps/content/search/management/commands/__init__.py create mode 100644 openedx/core/djangoapps/content/search/management/commands/meili_mixin.py create mode 100644 openedx/core/djangoapps/content/search/management/commands/reindex_studio.py create mode 100644 openedx/core/djangoapps/content/search/tests/__init__.py create mode 100644 openedx/core/djangoapps/content/search/tests/test_documents.py create mode 100644 openedx/core/djangoapps/content/search/tests/test_views.py create mode 100644 openedx/core/djangoapps/content/search/urls.py create mode 100644 openedx/core/djangoapps/content/search/views.py diff --git a/cms/envs/common.py b/cms/envs/common.py index 7f39b129d828..a11b7c6cd31a 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -1796,6 +1796,9 @@ 'openedx_tagging.core.tagging.apps.TaggingConfig', 'openedx.core.djangoapps.content_tagging', + # Search + 'openedx.core.djangoapps.content.search', + 'openedx.features.course_duration_limits', 'openedx.features.content_type_gating', 'openedx.features.discounts', @@ -2894,3 +2897,18 @@ def _should_send_xblock_events(settings): REST_FRAMEWORK['DEFAULT_SCHEMA_CLASS'] = 'drf_spectacular.openapi.AutoSchema' BEAMER_PRODUCT_ID = "" + +################### Studio Search (alpha, using Meilisearch) ################### + +# Enable Studio search features (powered by Meilisearch) (beta, off by default) +MEILISEARCH_ENABLED = False +# Meilisearch URL that the python backend can use. Often points to another docker container or k8s service. +MEILISEARCH_URL = "http://meilisearch" +# URL that browsers (end users) can user to reach Meilisearch. Should be HTTPS in production. +MEILISEARCH_PUBLIC_URL = "http://meilisearch.example.com" +# To support multi-tenancy, you can prefix all indexes with a common key like "sandbox7-" +# and use a restricted tenant token in place of an API key, so that this Open edX instance +# can only use the index(es) that start with this prefix. +# See https://www.meilisearch.com/docs/learn/security/tenant_tokens +MEILISEARCH_INDEX_PREFIX = "" +MEILISEARCH_API_KEY = "devkey" diff --git a/cms/urls.py b/cms/urls.py index 8c3588b04960..9828e9d0fbf0 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -307,6 +307,10 @@ ), ) +urlpatterns.append( + path('', include(('openedx.core.djangoapps.content.search.urls', 'content_search'), namespace='content_search')), +) + # display error page templates, for testing purposes urlpatterns += [ path('404', handler404), diff --git a/openedx/core/djangoapps/content/search/__init__.py b/openedx/core/djangoapps/content/search/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/openedx/core/djangoapps/content/search/apps.py b/openedx/core/djangoapps/content/search/apps.py new file mode 100644 index 000000000000..13a83a11a6a6 --- /dev/null +++ b/openedx/core/djangoapps/content/search/apps.py @@ -0,0 +1,16 @@ +""" +Define the content search Django App. +""" + +from django.apps import AppConfig + + +class ContentSearchConfig(AppConfig): + """App config for the content search feature""" + + default_auto_field = "django.db.models.BigAutoField" + name = "openedx.core.djangoapps.content.search" + + def ready(self): + # Connect signal handlers + from . import handlers # pylint: disable=unused-import diff --git a/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst b/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst new file mode 100644 index 000000000000..1f355409da0e --- /dev/null +++ b/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst @@ -0,0 +1,151 @@ +Studio Content Search Powered by Meilisearch +############################################ + +Status +****** + +Draft + + +Context +******* + +Existing search functionality +============================= + +The Open edX platform currently implements many different forms of search. For +example, users can search for course content, library content, forum posts, and +more. Most of the search functionality in the core platform is powered by the +Elasticsearch search engine (though other functionality developed by 2U, such as +in edx-enterprise, is powered by Algolia). + +Most uses of Elasticsearch in Open edX use +`edx-search `_ which provides a partial +abstraction over Elasticsearch. The edx-search library formerly used +`django-haystack `_ as an abstraction +layer across search engines, but "that was ripped out after the package was +abandoned upstream and it became an obstacle to upgrades and efficiently +utilizing Elasticsearch (the abstraction layer imposed significant limits)" +(thanks to Jeremy Bowman for this context). Due to these changes, the current +edx-search API is a mix of abstractions and direct usage of the Elasticsearch +API, which makes it confusing and difficult to work with. In addition, each +usage of edx-search has been implemented fairly differently. See +`State of edx-search `_ +for details (thanks to Andy Shultz). + +Other platform components use Elasticsearch more directly: + +* ``course-discovery`` and ``edx-notes-api`` do not use ``edx-search``, but are + very tied to Elasticsearch via the use of ``django-elasticsearch-dsl`` and + ``django-elasticsearch-drf``. +* ``cs_comments_service`` uses Elasticsearch via the official ruby gems. + +Problems with Elasticsearch +=========================== + +At the same time, there are many problems with the current reliance on +Elasticsearch: + +1. In 2021, the license of Elasticsearch changed from Apache 2.0 to a more + restrictive license that prohibits providing "the products to others as a + managed service". Consequently, AWS forked the search engine to create + OpenSearch and no longer offers Elasticsearch as a service. This is + problematic for many Open edX operators that use AWS and prefer to avoid + any third-party services. +2. Elasticsearch is very resource-intensive and often uses more than a gigabyte + of memory just for small search use cases. +3. Elasticsearch has poor support for multi-tenancy, which multiplies the + problem of resource usage for organizations with many small Open edX sites. +4. The existing usage of edx-search/Elasticsearch routes all search requests and + result processing through edxapp (the LMS) or other IDAs, increasing the + load on those applications. + +Need for Studio Search +====================== + +At the time of this ADR, we have a goal to implement new search functionality in +Studio, to support various course authoring workflows. + +Meilisearch +=========== + +Meilisearch ("MAY-lee search") is a new, promising search engine that offers a +compelling alternative to Elasticsearch. It is open source, feature rich, and +very fast and memory efficient (written in Rust, uses orders of magnitude less +memory than Elasticsearch for small datasets). It has a simple API with an +official python driver, and has official integrations with the popular +Instantsearch frontend library from Algolia. It has strong support for +multi-tenancy, and allows creating restricted API keys that incorporate a user's +permissions, so that search requests can be made directly from the user to +Meilisearch, rather than routing them through Django. Initial testing has shown +it to be much more developer friendly than Elasticsearch/OpenSearch. + +At the time of writing, there are only two known concerns with Meilisearch: + +1. It doesn't (yet) support High Availability via replication, although this is + planned and under development. It does have other features to support high + availability, such as very low restart time (in ms). +2. It doesn't support boolean operators in keyword search ("red AND panda"), + though it does of course support boolean operators in filters. This is a + product decision aimed at keeping the user experience simple, and is unlikely + to change. + + +Decision +******** + +1. We will implement the new Studio search functionality using Meilisearch, + as an experiment and to evaluate it more thoroughly. +2. The Studio search functionality will be disabled by default in the next + Open edX release (Redwood), so that Meilisearch will not be a requirement + for any default nor existing features. This will also allow us to evaluate it + before deciding to embrace it or replace it. +3. We will keep the Meilisearch-specific code isolated to the + new ``content/search`` Django app, so it's relatively easy to swap out later + if this experiment doesn't pan out. +4. We will not use ``edx-search`` for the new search functionality. + + +Consequences +************ + +1. Organizations that wish to try out the new Studio Search functionality in + the Redwood release will have to install and configure Meilisearch. +2. Building both the backend and frontend components of the Studio search + project will be much faster and simpler than if we used ElasticSearch, + edx-search, OpenSearch, django-haystack, etc. +3. Keyword search with boolean operators will not be supported in any of the new + search features. + + +Alternatives Considered +*********************** + +OpenSearch Only +=============== + +Moving existing search functionality to OpenSearch is a possibility, but though +it mostly addresses the licensing issue, it doesn't solve the problems of +resource usage, API complexity, edx-search API complexity, lack of Instantsearch +integration, and poor multi-tenancy. + +OpenSearch and Elasticsearch +============================ + +When OpenSearch was originally forked from Elasticsearch, it was completely API +compatible, but over time they have developed along divergent paths. Regardless +of whether ElasticSearch and OpenSearch are actually wire-compatible, recent +versions of all the official ElasticSearch clients have been made to actively +reject connections to OpenSearch, which is why you generally won't find client +libraries that work with both engines, and why there are OpenSearch forks of +everything on the client side as well as the server side. + +As there is no ready-to-use abstraction layer that would allow us to comfortably +support both, and no interest in maintaining one ourselves, this is not an +appealing option. + +Algolia +======= + +Algolia is a great search engine service, but as it is a proprietary product, it +is not suitable as a requirement for an open source platform like Open edX. diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py new file mode 100644 index 000000000000..fc1388f82455 --- /dev/null +++ b/openedx/core/djangoapps/content/search/documents.py @@ -0,0 +1,228 @@ +""" +Utilities related to indexing content for search +""" +from __future__ import annotations +from hashlib import blake2b +import logging + +from django.utils.text import slugify +from opaque_keys.edx.keys import UsageKey, LearningContextKey + +from openedx.core.djangoapps.content_libraries import api as lib_api +from openedx.core.djangoapps.content_tagging import api as tagging_api +from openedx.core.djangoapps.xblock import api as xblock_api + +log = logging.getLogger(__name__) +STUDIO_INDEX_NAME = "studio_content" + + +class Fields: + """ + Fields that exist on the documents in our search index + """ + # Meilisearch primary key. String. + id = "id" + usage_key = "usage_key" + type = "type" # DocType.course_block or DocType.library_block (see below) + block_id = "block_id" # The block_id part of the usage key. Sometimes human-readable, sometimes a random hex ID + display_name = "display_name" + block_type = "block_type" + context_key = "context_key" + org = "org" + # breadcrumbs: an array of {"display_name": "..."} entries. First one is the name of the course/library itself. + # After that is the name of any parent Section/Subsection/Unit/etc. + # It's a list of dictionaries because for now we just include the name of each but in future we may add their IDs. + breadcrumbs = "breadcrumbs" + # tags (dictionary) + # See https://blog.meilisearch.com/nested-hierarchical-facets-guide/ + # and https://www.algolia.com/doc/api-reference/widgets/hierarchical-menu/js/ + # For details on the format of the hierarchical tag data. + # We currently have a hard-coded limit of 4 levels of tags in the search index (level0..level3). + tags = "tags" + tags_taxonomy = "taxonomy" # subfield of tags, i.e. tags.taxonomy + tags_level0 = "level0" # subfield of tags, i.e. tags.level0 + tags_level1 = "level1" + tags_level2 = "level2" + tags_level3 = "level3" + # The "content" field is a dictionary of arbitrary data, depending on the block_type. + # It comes from each XBlock's index_dictionary() method (if present) plus some processing. + # Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on. + content = "content" + + # Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword + # search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio' + # command (changing those settings on an large active index is not recommended). + + +class DocType: + """ + Values for the 'type' field on each doc in the search index + """ + course_block = "course_block" + library_block = "library_block" + + +def _meili_id_from_opaque_key(usage_key: UsageKey) -> str: + """ + Meilisearch requires each document to have a primary key that's either an + integer or a string composed of alphanumeric characters (a-z A-Z 0-9), + hyphens (-) and underscores (_). Since our opaque keys don't meet this + requirement, we transform them to a similar slug ID string that does. + + In the future, with Learning Core's data models in place for courseware, + we could use PublishableEntity's primary key / UUID instead. + """ + # The slugified key _may_ not be unique so we append a hashed string to make it unique: + key_bin = str(usage_key).encode() + suffix = blake2b(key_bin, digest_size=4).hexdigest() # When we use Python 3.9+, should add usedforsecurity=False + return slugify(str(usage_key)) + "-" + suffix + + +def _fields_from_block(block) -> dict: + """ + Given an XBlock instance, call its index_dictionary() method to load any + data that it wants included in the search index. Format into a flat dict. + + Note: the format of index_dictionary() depends on the block type. The base + class implementation returns only: + {"content": {"display_name": "..."}, "content_type": "..."} + """ + block_data = { + Fields.id: _meili_id_from_opaque_key(block.usage_key), + Fields.usage_key: str(block.usage_key), + Fields.block_id: str(block.usage_key.block_id), + Fields.display_name: xblock_api.get_block_display_name(block), + Fields.block_type: block.scope_ids.block_type, + # This is called context_key so it's the same for courses and libraries + Fields.context_key: str(block.usage_key.context_key), # same as lib_key + Fields.org: str(block.usage_key.context_key.org), + Fields.breadcrumbs: [] + } + # Get the breadcrumbs (course, section, subsection, etc.): + if block.usage_key.context_key.is_course: # Getting parent is not yet implemented in Learning Core (for libraries). + cur_block = block + while cur_block.parent: + if not cur_block.has_cached_parent: + # This is not a big deal, but if you're updating many blocks in the same course at once, + # this would be very inefficient. Better to recurse the tree top-down with the parent blocks loaded. + log.warning(f"Updating Studio search index for XBlock {block.usage_key} but ancestors weren't cached.") + cur_block = cur_block.get_parent() + block_data[Fields.breadcrumbs].insert(0, {"display_name": xblock_api.get_block_display_name(cur_block)}) + try: + content_data = block.index_dictionary() + # Will be something like: + # { + # 'content': {'display_name': '...', 'capa_content': '...'}, + # 'content_type': 'CAPA', + # 'problem_types': ['multiplechoiceresponse'] + # } + # Which we need to flatten: + if "content_type" in content_data: + del content_data["content_type"] # Redundant with our standard Fields.block_type field. + if "content" in content_data and isinstance(content_data["content"], dict): + content = content_data["content"] + if "display_name" in content: + del content["display_name"] + del content_data["content"] + content_data.update(content) + # Now we have something like: + # { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] } + block_data[Fields.content] = content_data + except Exception as err: # pylint: disable=broad-except + log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}") + return block_data + + +def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict: + """ + Given an XBlock, course, library, etc., get the tag data for its index doc. + + See the comments above on "Field.tags" for an explanation of the format. + + e.g. for something tagged "Difficulty: Hard" and "Location: Vancouver" this + would return: + { + "tags": { + "taxonomy": ["Location", "Difficulty"], + "level0": ["Location > North America", "Difficulty > Hard"], + "level1": ["Location > North America > Canada"], + "level2": ["Location > North America > Canada > Vancouver"], + } + } + + Note: despite what you might expect, because this is only used for the + filtering/refinement UI, it's fine if this is a one-way transformation. + It's not necessary to be able to re-construct the exact tag IDs nor taxonomy + IDs from this data that's stored in the search index. It's just a bunch of + strings in a particular format that the frontend knows how to render to + support hierarchical refinement by tag. + """ + # Note that we could improve performance for indexing many components from the same library/course, + # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the + # tags for each component separately. + all_tags = tagging_api.get_object_tags(object_id).all() + if not all_tags: + return {} + result = { + Fields.tags_taxonomy: [], + Fields.tags_level0: [], + # ... other levels added as needed + } + for obj_tag in all_tags: + # Add the taxonomy name: + if obj_tag.name not in result[Fields.tags_taxonomy]: + result[Fields.tags_taxonomy].append(obj_tag.name) + # Taxonomy name plus each level of tags, in a list: + parts = [obj_tag.name] + obj_tag.get_lineage() # e.g. ["Location", "North America", "Canada", "Vancouver"] + parts = [part.replace(" > ", " _ ") for part in parts] # Escape our separator. + # Now we build each level (tags.level0, tags.level1, etc.) as applicable. + # We have a hard-coded limit of 4 levels of tags for now (see Fields.tags above). + # A tag like "Difficulty: Hard" will only result in one level (tags.level0) + # But a tag like "Location: North America > Canada > Vancouver" would result in three levels (tags.level0: + # "North America", tags.level1: "North America > Canada", tags.level2: "North America > Canada > Vancouver") + # See the comments above on "Field.tags" for an explanation of why we use this format (basically it's the format + # required by the Instantsearch frontend). + for level in range(4): + # We use '>' as a separator because it's the default for the Instantsearch frontend library, and our + # preferred separator (\t) used in the database is ignored by Meilisearch since it's whitespace. + new_value = " > ".join(parts[0:level + 2]) + if f"level{level}" not in result: + result[f"level{level}"] = [new_value] + elif new_value not in result[f"level{level}"]: + result[f"level{level}"].append(new_value) + if len(parts) == level + 2: + break # We have all the levels for this tag now (e.g. parts=["Difficulty", "Hard"] -> need level0 only) + + return {Fields.tags: result} + + +def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict: + """ + Generate a dictionary document suitable for ingestion into a search engine + like Meilisearch or Elasticsearch, so that the given library block can be + found using faceted search. + """ + library_name = lib_api.get_library(metadata.usage_key.context_key).title + doc = {} + try: + block = xblock_api.load_block(metadata.usage_key, user=None) + except Exception as err: # pylint: disable=broad-except + log.exception(f"Failed to load XBlock {metadata.usage_key}: {err}") + doc.update(_fields_from_block(block)) + doc.update(_tags_for_content_object(metadata.usage_key)) + doc[Fields.type] = DocType.library_block + # Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here: + doc[Fields.breadcrumbs] = [{"display_name": library_name}] + return doc + + +def searchable_doc_for_course_block(block) -> dict: + """ + Generate a dictionary document suitable for ingestion into a search engine + like Meilisearch or Elasticsearch, so that the given course block can be + found using faceted search. + """ + doc = _fields_from_block(block) + doc.update(_tags_for_content_object(block.usage_key)) + doc[Fields.type] = DocType.course_block + return doc diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/openedx/core/djangoapps/content/search/management/__init__.py b/openedx/core/djangoapps/content/search/management/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/openedx/core/djangoapps/content/search/management/commands/__init__.py b/openedx/core/djangoapps/content/search/management/commands/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py new file mode 100644 index 000000000000..18c7681879cb --- /dev/null +++ b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py @@ -0,0 +1,105 @@ +""" +Mixin for Django management commands that interact with Meilisearch +""" +from contextlib import contextmanager +import time + +from django.conf import settings +from django.core.management import CommandError +import meilisearch +from meilisearch.errors import MeilisearchError +from meilisearch.models.task import TaskInfo + + +class MeiliCommandMixin: + """ + Mixin for Django management commands that interact with Meilisearch + """ + def get_meilisearch_client(self): + """ + Get the Meiliesearch client + """ + if hasattr(self, "_meili_client"): + return self._meili_client + # Connect to Meilisearch + if not settings.MEILISEARCH_ENABLED: + raise CommandError("MEILISEARCH_ENABLED is not set - search functionality disabled.") + + self._meili_client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) + try: + self._meili_client.health() + except MeilisearchError as err: + self.stderr.write(err.message) # print this because 'raise...from...' doesn't print the details + raise CommandError("Unable to connect to Meilisearch") from err + return self._meili_client + + def wait_for_meili_task(self, info: TaskInfo): + """ + Simple helper method to wait for a Meilisearch task to complete + """ + client = self.get_meilisearch_client() + current_status = client.get_task(info.task_uid) + while current_status.status in ("enqueued", "processing"): + self.stdout.write("...") + time.sleep(1) + current_status = client.get_task(info.task_uid) + if current_status.status != "succeeded": + self.stderr.write(f"Task has status: {current_status.status}") + self.stderr.write(str(current_status.error)) + try: + err_reason = current_status.error['message'] + except (TypeError, KeyError): + err_reason = "Unknown error" + raise MeilisearchError(err_reason) + + def index_exists(self, index_name: str) -> bool: + """ + Check if an index exists + """ + client = self.get_meilisearch_client() + try: + client.get_index(index_name) + except MeilisearchError as err: + if err.code == "index_not_found": + return False + else: + raise err + return True + + @contextmanager + def using_temp_index(self, target_index): + """ + Create a new temporary Meilisearch index, populate it, then swap it to + become the active index. + """ + client = self.get_meilisearch_client() + self.stdout.write("Checking index...") + temp_index_name = target_index + "_new" + if self.index_exists(temp_index_name): + self.stdout.write("Temporary index already exists. Deleting it...") + self.wait_for_meili_task(client.delete_index(temp_index_name)) + + self.stdout.write("Creating new index...") + self.wait_for_meili_task( + client.create_index(temp_index_name, {'primaryKey': 'id'}) + ) + new_index_created = client.get_index(temp_index_name).created_at + + yield temp_index_name + + if not self.index_exists(target_index): + # We have to create the "target" index before we can successfully swap the new one into it: + self.stdout.write("Preparing to swap into index (first time)...") + self.wait_for_meili_task(client.create_index(target_index)) + self.stdout.write("Swapping index...") + client.swap_indexes([{'indexes': [temp_index_name, target_index]}]) + # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status + # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103 + while True: + time.sleep(1) + if client.get_index(target_index).created_at != new_index_created: + self.stdout.write("Waiting for swap completion...") + else: + break + self.stdout.write("Deleting old index...") + self.wait_for_meili_task(client.delete_index(temp_index_name)) diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py new file mode 100644 index 000000000000..0f52eea51d4d --- /dev/null +++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py @@ -0,0 +1,159 @@ +""" +Command to build or re-build the search index for courses (in Studio, i.e. Draft +mode), in Meilisearch. + +See also cms/djangoapps/contentstore/management/commands/reindex_course.py which +indexes LMS (published) courses in ElasticSearch. +""" +import logging +import time + +from django.conf import settings +from django.core.management import BaseCommand, CommandError + +from openedx.core.djangoapps.content_libraries import api as lib_api +from openedx.core.djangoapps.content.search.documents import ( + Fields, + searchable_doc_for_course_block, + searchable_doc_for_library_block, + STUDIO_INDEX_NAME, +) +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.django import modulestore +from .meili_mixin import MeiliCommandMixin + + +log = logging.getLogger(__name__) + + +class Command(MeiliCommandMixin, BaseCommand): + """ + Build or re-build the search index for courses and libraries (in Studio, i.e. Draft mode) + + This is experimental and not recommended for production use. + """ + + def add_arguments(self, parser): + parser.add_argument('--experimental', action='store_true') + parser.set_defaults(experimental=False) + + def handle(self, *args, **options): + """ + Build a new search index for Studio, containing content from courses and libraries + """ + if not options["experimental"]: + raise CommandError( + "This command is experimental and not recommended for production. " + "Use the --experimental argument to acknowledge and run it." + ) + start_time = time.perf_counter() + client = self.get_meilisearch_client() + store = modulestore() + + # Get the lists of libraries + self.stdout.write("Counting libraries...") + lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')] + num_libraries = len(lib_keys) + + # Get the list of courses + self.stdout.write("Counting courses...") + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + all_courses = store.get_courses() + num_courses = len(all_courses) + + # Some counters so we can track our progress as indexing progresses: + num_contexts = num_courses + num_libraries + num_contexts_done = 0 # How many courses/libraries we've indexed + num_blocks_done = 0 # How many individual components/XBlocks we've indexed + + self.stdout.write(f"Found {num_courses} courses and {num_libraries} libraries.") + index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME + with self.using_temp_index(index_name) as temp_index_name: + ############## Configure the index ############## + + # The following index settings are best changed on an empty index. + # Changing them on a populated index will "re-index all documents in the index, which can take some time" + # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time. + + # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique): + client.index(temp_index_name).update_distinct_attribute(Fields.usage_key) + # Mark which attributes can be used for filtering/faceted search: + client.index(temp_index_name).update_filterable_attributes([ + Fields.block_type, + Fields.context_key, + Fields.org, + Fields.tags, + Fields.type, + ]) + # Mark which attributes are used for keyword search, in order of importance: + client.index(temp_index_name).update_searchable_attributes([ + Fields.display_name, + Fields.block_id, + Fields.content, + Fields.tags, + # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields. + ]) + + ############## Libraries ############## + self.stdout.write("Indexing libraries...") + for lib_key in lib_keys: + self.stdout.write(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}") + docs = [] + for component in lib_api.get_library_components(lib_key): + metadata = lib_api.LibraryXBlockMetadata.from_component(lib_key, component) + doc = searchable_doc_for_library_block(metadata) + docs.append(doc) + num_blocks_done += 1 + if docs: + # Add all the docs in this library at once (usually faster than adding one at a time): + self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) + self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) + num_contexts_done += 1 + + ############## Courses ############## + self.stdout.write("Indexing courses...") + for course in all_courses: + self.stdout.write( + f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})" + ) + docs = [] + + # Pre-fetch the course with all of its children: + course = store.get_course(course.id, depth=None) + + def add_with_children(block): + """ Recursively index the given XBlock/component """ + doc = searchable_doc_for_course_block(block) + docs.append(doc) # pylint: disable=cell-var-from-loop + self.recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop + + self.recurse_children(course, add_with_children) + + if docs: + # Add all the docs in this course at once (usually faster than adding one at a time): + self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs)) + num_contexts_done += 1 + num_blocks_done += len(docs) + + elapsed_time = time.perf_counter() - start_time + self.stdout.write( + f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses " + f"and libraries in {elapsed_time:.0f}s." + ) + + def recurse_children(self, block, fn): + """ + Recurse the children of an XBlock and call the given function for each + + The main purpose of this is just to wrap the loading of each child in + try...except. Otherwise block.get_children() would do what we need. + """ + if block.has_children: + for child_id in block.children: + try: + child = block.get_child(child_id) + except Exception as err: # pylint: disable=broad-except + log.exception(err) + self.stderr.write(f"Unable to load block {child_id}") + else: + fn(child) diff --git a/openedx/core/djangoapps/content/search/tests/__init__.py b/openedx/core/djangoapps/content/search/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py new file mode 100644 index 000000000000..97b2fb5d033f --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -0,0 +1,149 @@ +""" +Tests for the Studio content search documents (what gets stored in the index) +""" +from organizations.models import Organization +from openedx.core.djangoapps.content_tagging import api as tagging_api +from openedx.core.djangolib.testing.utils import skip_unless_cms +from xmodule.modulestore.django import modulestore +from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase +from xmodule.modulestore.tests.factories import BlockFactory, ToyCourseFactory + +from ..documents import searchable_doc_for_course_block + +STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" + + +@skip_unless_cms +class StudioDocumentsTest(SharedModuleStoreTestCase): + """ + Tests for the Studio content search documents (what gets stored in the + search index) + """ + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.store = modulestore() + cls.toy_course = ToyCourseFactory.create() # See xmodule/modulestore/tests/sample_courses.py + cls.toy_course_key = cls.toy_course.id + # Get references to some blocks in the toy course + cls.html_block_key = cls.toy_course_key.make_usage_key("html", "toyjumpto") + # Create a problem in library + cls.problem_block = BlockFactory.create( + category="problem", + parent_location=cls.toy_course_key.make_usage_key("vertical", "vertical_test"), + display_name='Test Problem', + data="What is a test?", + ) + + # Create a couple taxonomies and some tags + cls.org = Organization.objects.create(name="edX", short_name="edX") + cls.difficulty_tags = tagging_api.create_taxonomy(name="Difficulty", orgs=[cls.org], allow_multiple=False) + tagging_api.add_tag_to_taxonomy(cls.difficulty_tags, tag="Easy") + tagging_api.add_tag_to_taxonomy(cls.difficulty_tags, tag="Normal") + tagging_api.add_tag_to_taxonomy(cls.difficulty_tags, tag="Difficult") + + cls.subject_tags = tagging_api.create_taxonomy(name="Subject", orgs=[cls.org], allow_multiple=True) + tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Linguistics") + tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Asian Languages", parent_tag_value="Linguistics") + tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Chinese", parent_tag_value="Asian Languages") + tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Hypertext") + tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Jump Links", parent_tag_value="Hypertext") + + # Tag stuff: + tagging_api.tag_object(cls.problem_block.usage_key, cls.difficulty_tags, tags=["Easy"]) + tagging_api.tag_object(cls.html_block_key, cls.subject_tags, tags=["Chinese", "Jump Links"]) + tagging_api.tag_object(cls.html_block_key, cls.difficulty_tags, tags=["Normal"]) + + def test_problem_block(self): + """ + Test how a problem block gets represented in the search index + """ + block = self.store.get_item(self.problem_block.usage_key) + doc = searchable_doc_for_course_block(block) + assert doc == { + # Note the 'id' has been stripped of special characters to meet Meilisearch requirements. + # The '-8516ed8' suffix is deterministic based on the original usage key. + "id": "block-v1edxtoy2012_falltypeproblemblocktest_problem-f46b6f1e", + "type": "course_block", + "block_type": "problem", + "usage_key": "block-v1:edX+toy+2012_Fall+type@problem+block@Test_Problem", + "block_id": "Test_Problem", + "context_key": "course-v1:edX+toy+2012_Fall", + "org": "edX", + "display_name": "Test Problem", + "breadcrumbs": [ + {"display_name": "Toy Course"}, + {"display_name": "chapter"}, + {"display_name": "sequential"}, + {"display_name": "vertical"}, + ], + "content": { + "capa_content": "What is a test?", + "problem_types": ["multiplechoiceresponse"], + }, + # See https://blog.meilisearch.com/nested-hierarchical-facets-guide/ + # and https://www.algolia.com/doc/api-reference/widgets/hierarchical-menu/js/ + # For details on why the hierarchical tag data is in this format. + "tags": { + "taxonomy": ["Difficulty"], + "level0": ["Difficulty > Easy"], + }, + } + + def test_html_block(self): + """ + Test how an HTML block gets represented in the search index + """ + block = self.store.get_item(self.html_block_key) + doc = searchable_doc_for_course_block(block) + assert doc == { + "id": "block-v1edxtoy2012_falltypehtmlblocktoyjumpto-efb9c601", + "type": "course_block", + "block_type": "html", + "usage_key": "block-v1:edX+toy+2012_Fall+type@html+block@toyjumpto", + "block_id": "toyjumpto", + "context_key": "course-v1:edX+toy+2012_Fall", + "org": "edX", + "display_name": "Text", + "breadcrumbs": [ + {"display_name": "Toy Course"}, + {"display_name": "Overview"}, + {"display_name": "Toy Videos"}, + ], + "content": { + "html_content": ( + "This is a link to another page and some Chinese 四節比分和七年前 Some more Chinese 四節比分和七年前 " + ), + }, + "tags": { + "taxonomy": ["Difficulty", "Subject"], + "level0": ["Difficulty > Normal", "Subject > Hypertext", "Subject > Linguistics"], + "level1": ["Subject > Hypertext > Jump Links", "Subject > Linguistics > Asian Languages"], + "level2": ["Subject > Linguistics > Asian Languages > Chinese"], + }, + } + + def test_video_block_untagged(self): + """ + Test how a video block gets represented in the search index. + """ + block_usage_key = self.toy_course_key.make_usage_key("video", "Welcome") + block = self.store.get_item(block_usage_key) + doc = searchable_doc_for_course_block(block) + assert doc == { + "id": "block-v1edxtoy2012_falltypevideoblockwelcome-0c9fd626", + "type": "course_block", + "block_type": "video", + "usage_key": "block-v1:edX+toy+2012_Fall+type@video+block@Welcome", + "block_id": "Welcome", + "context_key": "course-v1:edX+toy+2012_Fall", + "org": "edX", + "display_name": "Welcome", + "breadcrumbs": [ + {"display_name": "Toy Course"}, + {"display_name": "Overview"}, + ], + "content": {}, + # This video has no tags. + } diff --git a/openedx/core/djangoapps/content/search/tests/test_views.py b/openedx/core/djangoapps/content/search/tests/test_views.py new file mode 100644 index 000000000000..e117cf1b969a --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_views.py @@ -0,0 +1,82 @@ +""" +Tests for the Studio content search REST API. +""" +from django.test import override_settings +from rest_framework.test import APITestCase, APIClient +from unittest import mock + +from common.djangoapps.student.tests.factories import UserFactory +from openedx.core.djangolib.testing.utils import skip_unless_cms + +STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" + + +@skip_unless_cms +class StudioSearchViewTest(APITestCase): + """ + General tests for the Studio search REST API. + """ + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.staff = UserFactory.create( + username='staff', email='staff@example.com', is_staff=True, password='staff_pass' + ) + cls.student = UserFactory.create( + username='student', email='student@example.com', is_staff=False, password='student_pass' + ) + + def setUp(self): + super().setUp() + self.client = APIClient() + + @override_settings(MEILISEARCH_ENABLED=False) + def test_studio_search_unathenticated_disabled(self): + """ + Whether or not Meilisearch is enabled, the API endpoint requires authentication. + """ + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 401 + + @override_settings(MEILISEARCH_ENABLED=True) + def test_studio_search_unathenticated_enabled(self): + """ + Whether or not Meilisearch is enabled, the API endpoint requires authentication. + """ + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 401 + + @override_settings(MEILISEARCH_ENABLED=False) + def test_studio_search_disabled(self): + """ + When Meilisearch is disabled, the Studio search endpoint gives a 404 + """ + self.client.login(username='student', password='student_pass') + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 404 + + @override_settings(MEILISEARCH_ENABLED=True) + def test_studio_search_student_forbidden(self): + """ + Until we implement fine-grained permissions, only global staff can use + the Studio search endpoint. + """ + self.client.login(username='student', password='student_pass') + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 403 + + @override_settings(MEILISEARCH_ENABLED=True) + @mock.patch('openedx.core.djangoapps.content.search.views._get_meili_api_key_uid') + def test_studio_search_staff(self, mock_get_api_key_uid): + """ + Global staff can get a restricted API key for Meilisearch using the REST + API. + """ + self.client.login(username='staff', password='staff_pass') + mock_get_api_key_uid.return_value = "3203d764-370f-4e99-a917-d47ab7f29739" + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 200 + assert result.data["index_name"] == "studio_content" + assert result.data["url"].startswith("http") + assert result.data["api_key"] and isinstance(result.data["api_key"], str) diff --git a/openedx/core/djangoapps/content/search/urls.py b/openedx/core/djangoapps/content/search/urls.py new file mode 100644 index 000000000000..c69be33d5bb0 --- /dev/null +++ b/openedx/core/djangoapps/content/search/urls.py @@ -0,0 +1,11 @@ +""" +URLs for content sesarch +""" +from django.urls import path + +from .views import StudioSearchView + + +urlpatterns = [ + path('api/content_search/v2/studio/', StudioSearchView.as_view(), name='studio_content_search') +] diff --git a/openedx/core/djangoapps/content/search/views.py b/openedx/core/djangoapps/content/search/views.py new file mode 100644 index 000000000000..69b686a3434f --- /dev/null +++ b/openedx/core/djangoapps/content/search/views.py @@ -0,0 +1,71 @@ +""" +REST API for content search +""" +from datetime import datetime, timedelta, timezone +import logging + +from django.conf import settings +from django.contrib.auth import get_user_model +import meilisearch +from rest_framework.exceptions import NotFound, PermissionDenied +from rest_framework.response import Response +from rest_framework.views import APIView + +from common.djangoapps.student.roles import GlobalStaff +from openedx.core.lib.api.view_utils import view_auth_classes +from openedx.core.djangoapps.content.search.documents import STUDIO_INDEX_NAME + +User = get_user_model() +log = logging.getLogger(__name__) + + +def _get_meili_api_key_uid(): + """ + Helper method to get the UID of the API key we're using for Meilisearch + """ + if not hasattr(_get_meili_api_key_uid, "uid"): + client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) + _get_meili_api_key_uid.uid = client.get_key(settings.MEILISEARCH_API_KEY).uid + return _get_meili_api_key_uid.uid + + +@view_auth_classes(is_authenticated=True) +class StudioSearchView(APIView): + """ + Give user details on how they can search studio content + """ + + def get(self, request): + """ + Give user details on how they can search studio content + """ + if not settings.MEILISEARCH_ENABLED: + raise NotFound("Meilisearch features are not enabled.") + if not GlobalStaff().has_user(request.user): + # Until we enforce permissions properly (see below), this endpoint is restricted to global staff, + # because it lets you search data from any course/library. + raise PermissionDenied("For the moment, use of this search preview is restricted to global staff.") + client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) + index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME + + # Create an API key that only allows the user to search content that they have permission to view: + expires_at = datetime.now(tz=timezone.utc) + timedelta(days=7) + search_rules = { + index_name: { + # TODO: Apply filters here based on the user's permissions, so they can only search for content + # that they have permission to view. Example: + # 'filter': 'org = BradenX' + } + } + # Note: the following is just generating a JWT. It doesn't actually make an API call to Meilisearch. + restricted_api_key = client.generate_tenant_token( + api_key_uid=_get_meili_api_key_uid(), + search_rules=search_rules, + expires_at=expires_at, + ) + + return Response({ + "url": settings.MEILISEARCH_PUBLIC_URL, + "index_name": index_name, + "api_key": restricted_api_key, + }) diff --git a/openedx/core/djangoapps/content_tagging/api.py b/openedx/core/djangoapps/content_tagging/api.py index c167eba3a629..e50d96fe2ce2 100644 --- a/openedx/core/djangoapps/content_tagging/api.py +++ b/openedx/core/djangoapps/content_tagging/api.py @@ -200,6 +200,9 @@ def set_object_tags( # Expose the oel_tagging APIs +add_tag_to_taxonomy = oel_tagging.add_tag_to_taxonomy +update_tag_in_taxonomy = oel_tagging.update_tag_in_taxonomy +delete_tags_from_taxonomy = oel_tagging.delete_tags_from_taxonomy get_taxonomy = oel_tagging.get_taxonomy get_taxonomies = oel_tagging.get_taxonomies get_tags = oel_tagging.get_tags diff --git a/openedx/core/djangoapps/xblock/api.py b/openedx/core/djangoapps/xblock/api.py index 98ba9e222dd0..f54464de9f4e 100644 --- a/openedx/core/djangoapps/xblock/api.py +++ b/openedx/core/djangoapps/xblock/api.py @@ -180,7 +180,7 @@ def get_block_display_name(block: XBlock) -> str: if display_name is not None: return display_name else: - return xblock_type_display_name(block.block_type) + return xblock_type_display_name(block.scope_ids.block_type) def get_component_from_usage_key(usage_key: UsageKeyV2) -> Component: diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 88474a7383cf..4cde6fbf33d4 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -22,6 +22,8 @@ analytics-python==1.4.post1 # via -r requirements/edx/kernel.in aniso8601==9.0.1 # via edx-tincan-py35 +annotated-types==0.6.0 + # via pydantic appdirs==1.4.4 # via fs asgiref==3.7.2 @@ -87,6 +89,8 @@ botocore==1.34.45 # s3transfer bridgekeeper==0.9 # via -r requirements/edx/kernel.in +camel-converter[pydantic]==3.1.1 + # via meilisearch celery==5.3.6 # via # -c requirements/edx/../constraints.txt @@ -719,6 +723,8 @@ markupsafe==2.1.5 # xblock maxminddb==2.5.2 # via geoip2 +meilisearch==0.30.0 + # via -r requirements/edx/kernel.in mock==5.1.0 # via -r requirements/edx/paver.txt mongoengine==0.27.0 @@ -863,6 +869,10 @@ pycryptodomex==3.20.0 # edx-proctoring # lti-consumer-xblock # pyjwkest +pydantic==2.6.3 + # via camel-converter +pydantic-core==2.16.3 + # via pydantic pygments==2.17.2 # via # -r requirements/edx/bundled.in @@ -1002,6 +1012,7 @@ requests==2.31.0 # edx-rest-api-client # geoip2 # mailsnake + # meilisearch # openai # optimizely-sdk # pyjwkest @@ -1144,12 +1155,15 @@ tqdm==4.66.2 typing-extensions==4.9.0 # via # -r requirements/edx/paver.txt + # annotated-types # asgiref # django-countries # drf-spectacular # edx-opaque-keys # jwcrypto # kombu + # pydantic + # pydantic-core # pylti1p3 # snowflake-connector-python tzdata==2024.1 diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 8571145b3bb9..3657252fcfd0 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -51,6 +51,7 @@ aniso8601==9.0.1 # edx-tincan-py35 annotated-types==0.6.0 # via + # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt # pydantic anyio==4.3.0 @@ -168,6 +169,11 @@ cachetools==5.3.2 # via # -r requirements/edx/testing.txt # tox +camel-converter[pydantic]==3.1.1 + # via + # -r requirements/edx/doc.txt + # -r requirements/edx/testing.txt + # meilisearch celery==5.3.6 # via # -c requirements/edx/../constraints.txt @@ -1189,6 +1195,10 @@ mccabe==0.7.0 # via # -r requirements/edx/testing.txt # pylint +meilisearch==0.30.0 + # via + # -r requirements/edx/doc.txt + # -r requirements/edx/testing.txt mistune==2.0.5 # via # -r requirements/edx/doc.txt @@ -1461,12 +1471,15 @@ pycryptodomex==3.20.0 # edx-proctoring # lti-consumer-xblock # pyjwkest -pydantic==2.6.1 +pydantic==2.6.3 # via + # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt + # camel-converter # fastapi -pydantic-core==2.16.2 +pydantic-core==2.16.3 # via + # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt # pydantic pydata-sphinx-theme==0.14.4 @@ -1738,6 +1751,7 @@ requests==2.31.0 # edx-rest-api-client # geoip2 # mailsnake + # meilisearch # openai # optimizely-sdk # pact-python diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index 2c13926f8c31..92e8b53acf89 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -33,6 +33,10 @@ aniso8601==9.0.1 # via # -r requirements/edx/base.txt # edx-tincan-py35 +annotated-types==0.6.0 + # via + # -r requirements/edx/base.txt + # pydantic appdirs==1.4.4 # via # -r requirements/edx/base.txt @@ -114,6 +118,10 @@ botocore==1.34.45 # s3transfer bridgekeeper==0.9 # via -r requirements/edx/base.txt +camel-converter[pydantic]==3.1.1 + # via + # -r requirements/edx/base.txt + # meilisearch celery==5.3.6 # via # -c requirements/edx/../constraints.txt @@ -842,6 +850,8 @@ maxminddb==2.5.2 # via # -r requirements/edx/base.txt # geoip2 +meilisearch==0.30.0 + # via -r requirements/edx/base.txt mistune==2.0.5 # via sphinx-mdinclude mock==5.1.0 @@ -1018,6 +1028,14 @@ pycryptodomex==3.20.0 # edx-proctoring # lti-consumer-xblock # pyjwkest +pydantic==2.6.3 + # via + # -r requirements/edx/base.txt + # camel-converter +pydantic-core==2.16.3 + # via + # -r requirements/edx/base.txt + # pydantic pydata-sphinx-theme==0.14.4 # via sphinx-book-theme pygments==2.17.2 @@ -1180,6 +1198,7 @@ requests==2.31.0 # edx-rest-api-client # geoip2 # mailsnake + # meilisearch # openai # optimizely-sdk # pyjwkest @@ -1391,12 +1410,15 @@ tqdm==4.66.2 typing-extensions==4.9.0 # via # -r requirements/edx/base.txt + # annotated-types # asgiref # django-countries # drf-spectacular # edx-opaque-keys # jwcrypto # kombu + # pydantic + # pydantic-core # pydata-sphinx-theme # pylti1p3 # snowflake-connector-python diff --git a/requirements/edx/kernel.in b/requirements/edx/kernel.in index 1f2b7a6c5bfb..0c2f110c84b9 100644 --- a/requirements/edx/kernel.in +++ b/requirements/edx/kernel.in @@ -107,6 +107,7 @@ lxml # XML parser lti-consumer-xblock>=7.3.0 mako # Primary template language used for server-side page rendering Markdown # Convert text markup to HTML; used in capa problems, forums, and course wikis +meilisearch # Library to access Meilisearch search engine (will replace ElasticSearch) mongoengine # Object-document mapper for MongoDB, used in the LMS dashboard mysqlclient # Driver for the default production relational database nodeenv # Utility for managing Node.js environments; we use this for deployments and testing diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 70257a6dbe80..8e27c6eced8d 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -30,7 +30,9 @@ aniso8601==9.0.1 # -r requirements/edx/base.txt # edx-tincan-py35 annotated-types==0.6.0 - # via pydantic + # via + # -r requirements/edx/base.txt + # pydantic anyio==4.3.0 # via starlette appdirs==1.4.4 @@ -118,6 +120,10 @@ bridgekeeper==0.9 # via -r requirements/edx/base.txt cachetools==5.3.2 # via tox +camel-converter[pydantic]==3.1.1 + # via + # -r requirements/edx/base.txt + # meilisearch celery==5.3.6 # via # -c requirements/edx/../constraints.txt @@ -901,6 +907,8 @@ maxminddb==2.5.2 # geoip2 mccabe==0.7.0 # via pylint +meilisearch==0.30.0 + # via -r requirements/edx/base.txt mock==5.1.0 # via -r requirements/edx/base.txt mongoengine==0.27.0 @@ -1094,10 +1102,15 @@ pycryptodomex==3.20.0 # edx-proctoring # lti-consumer-xblock # pyjwkest -pydantic==2.6.1 - # via fastapi -pydantic-core==2.16.2 - # via pydantic +pydantic==2.6.3 + # via + # -r requirements/edx/base.txt + # camel-converter + # fastapi +pydantic-core==2.16.3 + # via + # -r requirements/edx/base.txt + # pydantic pygments==2.17.2 # via # -r requirements/edx/base.txt @@ -1306,6 +1319,7 @@ requests==2.31.0 # edx-rest-api-client # geoip2 # mailsnake + # meilisearch # openai # optimizely-sdk # pact-python