From 463549f9711e66180189d15aeb441174f79b2902 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Tue, 27 Feb 2024 14:36:38 -0800
Subject: [PATCH 01/28] feat: management command to index content libraries
 using Meilisearch

---
 cms/envs/common.py                            |  11 ++
 .../management/commands/reindex_libraries.py  | 132 ++++++++++++++++++
 .../djangoapps/content_libraries/search.py    |  56 ++++++++
 requirements/edx/base.txt                     |  14 ++
 requirements/edx/development.txt              |  18 ++-
 requirements/edx/doc.txt                      |  22 +++
 requirements/edx/kernel.in                    |   1 +
 requirements/edx/testing.txt                  |  24 +++-
 8 files changed, 271 insertions(+), 7 deletions(-)
 create mode 100644 openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py
 create mode 100644 openedx/core/djangoapps/content_libraries/search.py

diff --git a/cms/envs/common.py b/cms/envs/common.py
index 754e6e9755a5..98b28b45ab24 100644
--- a/cms/envs/common.py
+++ b/cms/envs/common.py
@@ -2869,3 +2869,14 @@ def _should_send_xblock_events(settings):
 REST_FRAMEWORK['DEFAULT_SCHEMA_CLASS'] = 'drf_spectacular.openapi.AutoSchema'
 
 BEAMER_PRODUCT_ID = ""
+
+################### Search ###################
+
+# To support multi-tenancy, you can prefix all indexes with a common key like "sandbox7-"
+# and use a restricted tenant token in place of an API key, so that this Open edX instance
+# can only use the index(es) that start with this prefix.
+# See https://www.meilisearch.com/docs/learn/security/tenant_tokens
+MEILISEARCH_INDEX_PREFIX = ""
+# Set this to None to disable search functionality
+MEILISEARCH_URL = "http://meilisearch"
+MEILISEARCH_API_KEY = "devkey"
diff --git a/openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py b/openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py
new file mode 100644
index 000000000000..fd982665f793
--- /dev/null
+++ b/openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py
@@ -0,0 +1,132 @@
+"""
+Command to build or re-build the search index for content libraries.
+"""
+import logging
+import time
+
+from django.conf import settings
+from django.core.management import BaseCommand, CommandError
+import meilisearch
+from meilisearch.errors import MeilisearchError
+from meilisearch.models.task import TaskInfo
+
+from openedx.core.djangoapps.content_libraries import api as lib_api
+from openedx.core.djangoapps.content_libraries.search import searchable_doc_for_library_block
+from openedx.core.djangoapps.content_libraries.models import ContentLibrary
+
+
+log = logging.getLogger(__name__)
+
+LIBRARIES_INDEX_NAME = "content_libraries"
+
+
+class Command(BaseCommand):
+    """
+    Build or re-build the search index for content libraries.
+    """
+
+    def handle(self, *args, **options):
+        """
+        Build a new search index
+        """
+
+        # Connect to Meilisearch
+        if not settings.MEILISEARCH_URL:
+            raise CommandError("MEILISEARCH_URL is not set - search functionality disabled.")
+        # TODO: put this into a helper lib?
+        client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY)
+        try:
+            client.health()
+        except MeilisearchError as err:
+            self.stderr.write(err.message)  # print this because 'raise...from...' doesn't print the details
+            raise CommandError("Unable to connect to Meilisearch") from err
+
+        # Get the list of libraries
+        self.stdout.write("Counting libraries...")
+        lib_keys = [lib.library_key for lib in ContentLibrary.objects.select_related('org').only('org', 'slug')]
+        blocks_by_lib_key = {}
+        num_blocks = 0
+        for lib_key in lib_keys:
+            blocks_by_lib_key[lib_key] = []
+            for component in lib_api.get_library_components(lib_key):
+                blocks_by_lib_key[lib_key].append(lib_api.LibraryXBlockMetadata.from_component(lib_key, component))
+                num_blocks += 1
+
+        self.stdout.write(f"Found {num_blocks} XBlocks among {len(lib_keys)} libraries.")
+
+        # Check if the index exists already:
+        self.stdout.write("Checking index...")
+        index_name = settings.MEILISEARCH_INDEX_PREFIX + LIBRARIES_INDEX_NAME
+        temp_index_name = index_name + "_new"
+        try:
+            client.get_index(temp_index_name)
+        except MeilisearchError as err:
+            pass
+        else:
+            self.stdout.write("Index already exists. Deleting it...")
+            self._wait_for_meili_task(client, client.delete_index(temp_index_name))
+
+        self.stdout.write("Creating new index...")
+        self._wait_for_meili_task(
+            client,
+            client.create_index(temp_index_name, {'primaryKey': 'id'})
+        )
+
+        self.stdout.write("Indexing documents...")
+        num_done = 0
+        for lib_key in lib_keys:
+            self.stdout.write(f"{num_done}/{num_blocks}. Now indexing {lib_key}")
+            docs = []
+            for metadata in blocks_by_lib_key[lib_key]:
+                doc = searchable_doc_for_library_block(metadata)
+                docs.append(doc)
+            # Add all the docs in this library at once (usually faster than adding one at a time):
+            self._wait_for_meili_task(client, client.index(temp_index_name).add_documents(docs))
+            num_done += len(docs)
+
+        new_index_created = client.get_index(temp_index_name).created_at
+        if not self._index_exists(index_name, client):
+            # We have to create the "target" index before we can successfully swap the new one into it:
+            self.stdout.write(f"Preparing to swap into index (first time)...")
+            self._wait_for_meili_task(client, client.create_index(index_name))
+        self.stdout.write(f"Swapping index...")
+        client.swap_indexes([{'indexes': [temp_index_name, index_name]}])
+        # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status
+        # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103
+        while True:
+            time.sleep(1)
+            if client.get_index(index_name).created_at != new_index_created:
+                self.stdout.write("Waiting for swap completion...")
+            else:
+                break
+        self.stdout.write("Deleting old index...")
+        self._wait_for_meili_task(client, client.delete_index(temp_index_name))
+
+        self.stdout.write(f"Done! {num_blocks} blocks indexed.")
+
+    def _wait_for_meili_task(self, client: meilisearch.Client, info: TaskInfo):
+        """
+        Simple helper method to wait for a Meilisearch task to complete
+        """
+        current_status = client.get_task(info.task_uid)
+        while current_status.status in ("enqueued", "processing"):
+            self.stdout.write("...")
+            time.sleep(1)
+            current_status = client.get_task(info.task_uid)
+        if current_status.status != "succeeded":
+            self.stderr.write(f"Task has status: {current_status.status}")
+            self.stderr.write(str(current_status.error))
+            try:
+                raise MeilisearchError(current_status.error['message'])
+            except (TypeError, KeyError):
+                raise MeilisearchError("Unknown error")
+
+    def _index_exists(self, index_name: str, client: meilisearch.Client) -> bool:
+        """
+        Check if an index exists
+        """
+        try:
+            client.get_index(index_name)
+        except MeilisearchError as err:
+            return False
+        return True
diff --git a/openedx/core/djangoapps/content_libraries/search.py b/openedx/core/djangoapps/content_libraries/search.py
new file mode 100644
index 000000000000..e5931c5c4c34
--- /dev/null
+++ b/openedx/core/djangoapps/content_libraries/search.py
@@ -0,0 +1,56 @@
+import logging
+
+from django.utils.text import slugify
+
+from openedx.core.djangoapps.content_libraries import api as lib_api
+from openedx.core.djangoapps.xblock import api as xblock_api
+
+log = logging.getLogger(__name__)
+
+
+def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict:
+    """
+    Generate a dictionary document suitable for ingestion into a search engine
+    like Meilisearch or Elasticsearch, so that the given library block can be
+    found using faceted search.
+    """
+    doc = {}
+    try:
+        block = xblock_api.load_block(metadata.usage_key, user=None)
+        block_data = block.index_dictionary()
+        # Will be something like:
+        # {
+        #     'content': {'display_name': '...', 'capa_content': '...'},
+        #     'content_type': 'CAPA',
+        #     'problem_types': ['multiplechoiceresponse']
+        # }
+        # Which we need to flatten:
+        if "content_type" in block_data:
+            del block_data["content_type"]  # Redundant with our "type" field
+        if "content" in block_data and isinstance(block_data["content"], dict):
+            content = block_data["content"]
+            if "display_name" in content:
+                del content["display_name"]
+            del block_data["content"]
+            block_data.update(content)
+        # Now we have
+        # { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] }
+        doc.update(block_data)
+    except Exception as err:
+        log.exception(f"Failed to get index_dictionary for {metadata.usage_key}: {err}")
+    # The data below must always override any values from index_dictionary:
+    doc.update({
+        # A Meilisearch document identifier can be of type integer or string,
+        # only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).
+        # So our usage keys with ":" characters cannot be used as primary keys.
+        "id": slugify(str(metadata.usage_key)) + "-" + str(hash(str(metadata.usage_key)) % 1_000),
+        "usage_key": str(metadata.usage_key),
+        "block_id": str(metadata.usage_key.block_id),
+        "display_name": metadata.display_name,
+        "type": metadata.usage_key.block_type,
+        # This is called contextKey not libKey so we can use the same keys with courses, and maybe search
+        # both courses and libraries together in the future?
+        "context_key": str(metadata.usage_key.context_key),  # same as lib_key
+        "org": str(metadata.usage_key.context_key.org),
+    })
+    return doc
diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt
index 4fc911d74c43..2b0d648ec5ac 100644
--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -22,6 +22,8 @@ analytics-python==1.4.post1
     # via -r requirements/edx/kernel.in
 aniso8601==9.0.1
     # via edx-tincan-py35
+annotated-types==0.6.0
+    # via pydantic
 appdirs==1.4.4
     # via fs
 asgiref==3.7.2
@@ -87,6 +89,8 @@ botocore==1.34.45
     #   s3transfer
 bridgekeeper==0.9
     # via -r requirements/edx/kernel.in
+camel-converter[pydantic]==3.1.1
+    # via meilisearch
 celery==5.3.6
     # via
     #   -c requirements/edx/../constraints.txt
@@ -716,6 +720,8 @@ markupsafe==2.1.5
     #   xblock
 maxminddb==2.5.2
     # via geoip2
+meilisearch==0.30.0
+    # via -r requirements/edx/kernel.in
 mock==5.1.0
     # via -r requirements/edx/paver.txt
 mongoengine==0.27.0
@@ -860,6 +866,10 @@ pycryptodomex==3.20.0
     #   edx-proctoring
     #   lti-consumer-xblock
     #   pyjwkest
+pydantic==2.6.3
+    # via camel-converter
+pydantic-core==2.16.3
+    # via pydantic
 pygments==2.17.2
     # via
     #   -r requirements/edx/bundled.in
@@ -999,6 +1009,7 @@ requests==2.31.0
     #   edx-rest-api-client
     #   geoip2
     #   mailsnake
+    #   meilisearch
     #   openai
     #   optimizely-sdk
     #   pyjwkest
@@ -1141,12 +1152,15 @@ tqdm==4.66.2
 typing-extensions==4.9.0
     # via
     #   -r requirements/edx/paver.txt
+    #   annotated-types
     #   asgiref
     #   django-countries
     #   drf-spectacular
     #   edx-opaque-keys
     #   jwcrypto
     #   kombu
+    #   pydantic
+    #   pydantic-core
     #   pylti1p3
     #   snowflake-connector-python
 tzdata==2024.1
diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt
index 78a4bfb532ea..8dc79ee4fe03 100644
--- a/requirements/edx/development.txt
+++ b/requirements/edx/development.txt
@@ -51,6 +51,7 @@ aniso8601==9.0.1
     #   edx-tincan-py35
 annotated-types==0.6.0
     # via
+    #   -r requirements/edx/doc.txt
     #   -r requirements/edx/testing.txt
     #   pydantic
 anyio==4.3.0
@@ -168,6 +169,11 @@ cachetools==5.3.2
     # via
     #   -r requirements/edx/testing.txt
     #   tox
+camel-converter[pydantic]==3.1.1
+    # via
+    #   -r requirements/edx/doc.txt
+    #   -r requirements/edx/testing.txt
+    #   meilisearch
 celery==5.3.6
     # via
     #   -c requirements/edx/../constraints.txt
@@ -1187,6 +1193,10 @@ mccabe==0.7.0
     # via
     #   -r requirements/edx/testing.txt
     #   pylint
+meilisearch==0.30.0
+    # via
+    #   -r requirements/edx/doc.txt
+    #   -r requirements/edx/testing.txt
 mistune==2.0.5
     # via
     #   -r requirements/edx/doc.txt
@@ -1459,12 +1469,15 @@ pycryptodomex==3.20.0
     #   edx-proctoring
     #   lti-consumer-xblock
     #   pyjwkest
-pydantic==2.6.1
+pydantic==2.6.3
     # via
+    #   -r requirements/edx/doc.txt
     #   -r requirements/edx/testing.txt
+    #   camel-converter
     #   fastapi
-pydantic-core==2.16.2
+pydantic-core==2.16.3
     # via
+    #   -r requirements/edx/doc.txt
     #   -r requirements/edx/testing.txt
     #   pydantic
 pydata-sphinx-theme==0.14.4
@@ -1736,6 +1749,7 @@ requests==2.31.0
     #   edx-rest-api-client
     #   geoip2
     #   mailsnake
+    #   meilisearch
     #   openai
     #   optimizely-sdk
     #   pact-python
diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt
index c06a83d0b185..790b7c26f51f 100644
--- a/requirements/edx/doc.txt
+++ b/requirements/edx/doc.txt
@@ -33,6 +33,10 @@ aniso8601==9.0.1
     # via
     #   -r requirements/edx/base.txt
     #   edx-tincan-py35
+annotated-types==0.6.0
+    # via
+    #   -r requirements/edx/base.txt
+    #   pydantic
 appdirs==1.4.4
     # via
     #   -r requirements/edx/base.txt
@@ -114,6 +118,10 @@ botocore==1.34.45
     #   s3transfer
 bridgekeeper==0.9
     # via -r requirements/edx/base.txt
+camel-converter[pydantic]==3.1.1
+    # via
+    #   -r requirements/edx/base.txt
+    #   meilisearch
 celery==5.3.6
     # via
     #   -c requirements/edx/../constraints.txt
@@ -840,6 +848,8 @@ maxminddb==2.5.2
     # via
     #   -r requirements/edx/base.txt
     #   geoip2
+meilisearch==0.30.0
+    # via -r requirements/edx/base.txt
 mistune==2.0.5
     # via sphinx-mdinclude
 mock==5.1.0
@@ -1016,6 +1026,14 @@ pycryptodomex==3.20.0
     #   edx-proctoring
     #   lti-consumer-xblock
     #   pyjwkest
+pydantic==2.6.3
+    # via
+    #   -r requirements/edx/base.txt
+    #   camel-converter
+pydantic-core==2.16.3
+    # via
+    #   -r requirements/edx/base.txt
+    #   pydantic
 pydata-sphinx-theme==0.14.4
     # via sphinx-book-theme
 pygments==2.17.2
@@ -1178,6 +1196,7 @@ requests==2.31.0
     #   edx-rest-api-client
     #   geoip2
     #   mailsnake
+    #   meilisearch
     #   openai
     #   optimizely-sdk
     #   pyjwkest
@@ -1389,12 +1408,15 @@ tqdm==4.66.2
 typing-extensions==4.9.0
     # via
     #   -r requirements/edx/base.txt
+    #   annotated-types
     #   asgiref
     #   django-countries
     #   drf-spectacular
     #   edx-opaque-keys
     #   jwcrypto
     #   kombu
+    #   pydantic
+    #   pydantic-core
     #   pydata-sphinx-theme
     #   pylti1p3
     #   snowflake-connector-python
diff --git a/requirements/edx/kernel.in b/requirements/edx/kernel.in
index 1f2b7a6c5bfb..0c2f110c84b9 100644
--- a/requirements/edx/kernel.in
+++ b/requirements/edx/kernel.in
@@ -107,6 +107,7 @@ lxml                                # XML parser
 lti-consumer-xblock>=7.3.0
 mako                                # Primary template language used for server-side page rendering
 Markdown                            # Convert text markup to HTML; used in capa problems, forums, and course wikis
+meilisearch                         # Library to access Meilisearch search engine (will replace ElasticSearch)
 mongoengine                         # Object-document mapper for MongoDB, used in the LMS dashboard
 mysqlclient                         # Driver for the default production relational database
 nodeenv                             # Utility for managing Node.js environments; we use this for deployments and testing
diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt
index 6fcc7f7adab3..b622680b27a6 100644
--- a/requirements/edx/testing.txt
+++ b/requirements/edx/testing.txt
@@ -30,7 +30,9 @@ aniso8601==9.0.1
     #   -r requirements/edx/base.txt
     #   edx-tincan-py35
 annotated-types==0.6.0
-    # via pydantic
+    # via
+    #   -r requirements/edx/base.txt
+    #   pydantic
 anyio==4.3.0
     # via starlette
 appdirs==1.4.4
@@ -118,6 +120,10 @@ bridgekeeper==0.9
     # via -r requirements/edx/base.txt
 cachetools==5.3.2
     # via tox
+camel-converter[pydantic]==3.1.1
+    # via
+    #   -r requirements/edx/base.txt
+    #   meilisearch
 celery==5.3.6
     # via
     #   -c requirements/edx/../constraints.txt
@@ -899,6 +905,8 @@ maxminddb==2.5.2
     #   geoip2
 mccabe==0.7.0
     # via pylint
+meilisearch==0.30.0
+    # via -r requirements/edx/base.txt
 mock==5.1.0
     # via -r requirements/edx/base.txt
 mongoengine==0.27.0
@@ -1092,10 +1100,15 @@ pycryptodomex==3.20.0
     #   edx-proctoring
     #   lti-consumer-xblock
     #   pyjwkest
-pydantic==2.6.1
-    # via fastapi
-pydantic-core==2.16.2
-    # via pydantic
+pydantic==2.6.3
+    # via
+    #   -r requirements/edx/base.txt
+    #   camel-converter
+    #   fastapi
+pydantic-core==2.16.3
+    # via
+    #   -r requirements/edx/base.txt
+    #   pydantic
 pygments==2.17.2
     # via
     #   -r requirements/edx/base.txt
@@ -1304,6 +1317,7 @@ requests==2.31.0
     #   edx-rest-api-client
     #   geoip2
     #   mailsnake
+    #   meilisearch
     #   openai
     #   optimizely-sdk
     #   pact-python

From 81daab2b60a31720d31a63d3132e9afe31fe9bba Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Wed, 28 Feb 2024 17:55:17 -0800
Subject: [PATCH 02/28] feat: include tags in library content index

---
 .../core/djangoapps/content_libraries/search.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/openedx/core/djangoapps/content_libraries/search.py b/openedx/core/djangoapps/content_libraries/search.py
index e5931c5c4c34..74624b4e7f94 100644
--- a/openedx/core/djangoapps/content_libraries/search.py
+++ b/openedx/core/djangoapps/content_libraries/search.py
@@ -1,8 +1,12 @@
+"""
+Utilities related to searching content libraries
+"""
 import logging
 
 from django.utils.text import slugify
 
 from openedx.core.djangoapps.content_libraries import api as lib_api
+from openedx.core.djangoapps.content_tagging import api as tagging_api
 from openedx.core.djangoapps.xblock import api as xblock_api
 
 log = logging.getLogger(__name__)
@@ -36,7 +40,7 @@ def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) ->
         # Now we have
         # { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] }
         doc.update(block_data)
-    except Exception as err:
+    except Exception as err:  # pylint: disable=broad-except
         log.exception(f"Failed to get index_dictionary for {metadata.usage_key}: {err}")
     # The data below must always override any values from index_dictionary:
     doc.update({
@@ -53,4 +57,15 @@ def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) ->
         "context_key": str(metadata.usage_key.context_key),  # same as lib_key
         "org": str(metadata.usage_key.context_key.org),
     })
+    # Add tags. Note that we could improve performance for indexing many components from the same library,
+    # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
+    # tags for each component separately.
+    for obj_tag in tagging_api.get_object_tags(metadata.usage_key).all():
+        key = f"tags:{obj_tag.name}"  # Taxonomy name
+        if key not in doc:
+            doc[key] = []
+        # Add the tag and all its parent tags, which are implied
+        for tag_value in obj_tag.get_lineage():
+            if tag_value not in doc[key]:
+                doc[key].append(tag_value)
     return doc

From ea01c7084968c1678f748f6b75df2e58ac19015d Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Wed, 28 Feb 2024 17:55:23 -0800
Subject: [PATCH 03/28] refactor: cleanup

---
 .../management/commands/reindex_libraries.py  | 110 ++++++++++--------
 1 file changed, 61 insertions(+), 49 deletions(-)

diff --git a/openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py b/openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py
index fd982665f793..d8a8139d4f0e 100644
--- a/openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py
+++ b/openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py
@@ -20,26 +20,70 @@
 LIBRARIES_INDEX_NAME = "content_libraries"
 
 
-class Command(BaseCommand):
+class MeiliCommandMixin:
     """
-    Build or re-build the search index for content libraries.
+    Mixin for Django management commands that interact with Meilisearch
     """
 
-    def handle(self, *args, **options):
+    def get_meilisearch_client(self):
         """
-        Build a new search index
+        Get the Meiliesearch client
         """
-
+        if hasattr(self, "_meili_client"):
+            return self._meili_client
         # Connect to Meilisearch
         if not settings.MEILISEARCH_URL:
             raise CommandError("MEILISEARCH_URL is not set - search functionality disabled.")
-        # TODO: put this into a helper lib?
-        client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY)
+
+        self._meili_client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY)
         try:
-            client.health()
+            self._meili_client.health()
         except MeilisearchError as err:
             self.stderr.write(err.message)  # print this because 'raise...from...' doesn't print the details
             raise CommandError("Unable to connect to Meilisearch") from err
+        return self._meili_client
+
+    def wait_for_meili_task(self, info: TaskInfo):
+        """
+        Simple helper method to wait for a Meilisearch task to complete
+        """
+        client = self.get_meilisearch_client()
+        current_status = client.get_task(info.task_uid)
+        while current_status.status in ("enqueued", "processing"):
+            self.stdout.write("...")
+            time.sleep(1)
+            current_status = client.get_task(info.task_uid)
+        if current_status.status != "succeeded":
+            self.stderr.write(f"Task has status: {current_status.status}")
+            self.stderr.write(str(current_status.error))
+            try:
+                err_reason = current_status.error['message']
+            except (TypeError, KeyError):
+                err_reason = "Unknown error"
+            raise MeilisearchError(err_reason)
+
+    def index_exists(self, index_name: str) -> bool:
+        """
+        Check if an index exists
+        """
+        client = self.get_meilisearch_client()
+        try:
+            client.get_index(index_name)
+        except MeilisearchError as err:
+            return False
+        return True
+
+
+class Command(MeiliCommandMixin, BaseCommand):
+    """
+    Build or re-build the search index for content libraries.
+    """
+
+    def handle(self, *args, **options):
+        """
+        Build a new search index
+        """
+        client = self.get_meilisearch_client()
 
         # Get the list of libraries
         self.stdout.write("Counting libraries...")
@@ -58,17 +102,12 @@ def handle(self, *args, **options):
         self.stdout.write("Checking index...")
         index_name = settings.MEILISEARCH_INDEX_PREFIX + LIBRARIES_INDEX_NAME
         temp_index_name = index_name + "_new"
-        try:
-            client.get_index(temp_index_name)
-        except MeilisearchError as err:
-            pass
-        else:
+        if self.index_exists(temp_index_name):
             self.stdout.write("Index already exists. Deleting it...")
-            self._wait_for_meili_task(client, client.delete_index(temp_index_name))
+            self.wait_for_meili_task(client.delete_index(temp_index_name))
 
         self.stdout.write("Creating new index...")
-        self._wait_for_meili_task(
-            client,
+        self.wait_for_meili_task(
             client.create_index(temp_index_name, {'primaryKey': 'id'})
         )
 
@@ -81,15 +120,15 @@ def handle(self, *args, **options):
                 doc = searchable_doc_for_library_block(metadata)
                 docs.append(doc)
             # Add all the docs in this library at once (usually faster than adding one at a time):
-            self._wait_for_meili_task(client, client.index(temp_index_name).add_documents(docs))
+            self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
             num_done += len(docs)
 
         new_index_created = client.get_index(temp_index_name).created_at
-        if not self._index_exists(index_name, client):
+        if not self.index_exists(index_name):
             # We have to create the "target" index before we can successfully swap the new one into it:
-            self.stdout.write(f"Preparing to swap into index (first time)...")
-            self._wait_for_meili_task(client, client.create_index(index_name))
-        self.stdout.write(f"Swapping index...")
+            self.stdout.write("Preparing to swap into index (first time)...")
+            self.wait_for_meili_task(client.create_index(index_name))
+        self.stdout.write("Swapping index...")
         client.swap_indexes([{'indexes': [temp_index_name, index_name]}])
         # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status
         # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103
@@ -100,33 +139,6 @@ def handle(self, *args, **options):
             else:
                 break
         self.stdout.write("Deleting old index...")
-        self._wait_for_meili_task(client, client.delete_index(temp_index_name))
+        self.wait_for_meili_task(client.delete_index(temp_index_name))
 
         self.stdout.write(f"Done! {num_blocks} blocks indexed.")
-
-    def _wait_for_meili_task(self, client: meilisearch.Client, info: TaskInfo):
-        """
-        Simple helper method to wait for a Meilisearch task to complete
-        """
-        current_status = client.get_task(info.task_uid)
-        while current_status.status in ("enqueued", "processing"):
-            self.stdout.write("...")
-            time.sleep(1)
-            current_status = client.get_task(info.task_uid)
-        if current_status.status != "succeeded":
-            self.stderr.write(f"Task has status: {current_status.status}")
-            self.stderr.write(str(current_status.error))
-            try:
-                raise MeilisearchError(current_status.error['message'])
-            except (TypeError, KeyError):
-                raise MeilisearchError("Unknown error")
-
-    def _index_exists(self, index_name: str, client: meilisearch.Client) -> bool:
-        """
-        Check if an index exists
-        """
-        try:
-            client.get_index(index_name)
-        except MeilisearchError as err:
-            return False
-        return True

From 9f75b4c53c494eaf3e2bc53d364b55e3c48507c1 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Thu, 29 Feb 2024 13:25:01 -0800
Subject: [PATCH 04/28] feat: index courseware too

---
 cms/envs/common.py                            |   3 +
 .../djangoapps/content/search/__init__.py     |   0
 .../core/djangoapps/content/search/apps.py    |  16 +++
 .../djangoapps/content/search/documents.py    | 126 ++++++++++++++++++
 .../djangoapps/content/search/handlers.py     |   0
 .../content/search/management/__init__.py     |   0
 .../search/management/commands/__init__.py    |   0
 .../management/commands/meili_mixin.py}       |  72 +++-------
 .../commands/reindex_courses_studio.py        |  82 ++++++++++++
 .../management/commands/reindex_libraries.py  |  57 ++++++++
 .../djangoapps/content_libraries/search.py    |  71 ----------
 11 files changed, 299 insertions(+), 128 deletions(-)
 create mode 100644 openedx/core/djangoapps/content/search/__init__.py
 create mode 100644 openedx/core/djangoapps/content/search/apps.py
 create mode 100644 openedx/core/djangoapps/content/search/documents.py
 create mode 100644 openedx/core/djangoapps/content/search/handlers.py
 create mode 100644 openedx/core/djangoapps/content/search/management/__init__.py
 create mode 100644 openedx/core/djangoapps/content/search/management/commands/__init__.py
 rename openedx/core/djangoapps/{content_libraries/management/commands/reindex_libraries.py => content/search/management/commands/meili_mixin.py} (57%)
 create mode 100644 openedx/core/djangoapps/content/search/management/commands/reindex_courses_studio.py
 create mode 100644 openedx/core/djangoapps/content/search/management/commands/reindex_libraries.py
 delete mode 100644 openedx/core/djangoapps/content_libraries/search.py

diff --git a/cms/envs/common.py b/cms/envs/common.py
index 98b28b45ab24..d721535785c6 100644
--- a/cms/envs/common.py
+++ b/cms/envs/common.py
@@ -1776,6 +1776,9 @@
     'openedx_tagging.core.tagging.apps.TaggingConfig',
     'openedx.core.djangoapps.content_tagging',
 
+    # Search
+    'openedx.core.djangoapps.content.search.apps.ContentSearchConfig',
+
     'openedx.features.course_duration_limits',
     'openedx.features.content_type_gating',
     'openedx.features.discounts',
diff --git a/openedx/core/djangoapps/content/search/__init__.py b/openedx/core/djangoapps/content/search/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/openedx/core/djangoapps/content/search/apps.py b/openedx/core/djangoapps/content/search/apps.py
new file mode 100644
index 000000000000..13a83a11a6a6
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/apps.py
@@ -0,0 +1,16 @@
+"""
+Define the content search Django App.
+"""
+
+from django.apps import AppConfig
+
+
+class ContentSearchConfig(AppConfig):
+    """App config for the content search feature"""
+
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "openedx.core.djangoapps.content.search"
+
+    def ready(self):
+        # Connect signal handlers
+        from . import handlers  # pylint: disable=unused-import
diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
new file mode 100644
index 000000000000..4bccd406f7aa
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -0,0 +1,126 @@
+"""
+Utilities related to searching content libraries
+"""
+import logging
+
+from django.utils.text import slugify
+from opaque_keys.edx.keys import UsageKey
+
+from openedx.core.djangoapps.content_libraries import api as lib_api
+from openedx.core.djangoapps.content_tagging import api as tagging_api
+from openedx.core.djangoapps.xblock import api as xblock_api
+
+log = logging.getLogger(__name__)
+
+
+def _meili_id_from_opaque_key(usage_key: UsageKey) -> str:
+    """
+    Meilisearch requires each document to have a primary key that's either an
+    integer or a string composed of alphanumeric characters (a-z A-Z 0-9),
+    hyphens (-) and underscores (_). Since our opaque keys don't meet this
+    requirement, we transform them to a similar slug ID string that does.
+    """
+    # The slugified key _may_ not be unique so we append a hashed number too
+    return slugify(str(usage_key)) + "-" + str(hash(str(usage_key)) % 1_000)
+
+
+def _fields_from_block(block) -> dict:
+    """
+    Given an XBlock instance, call its index_dictionary() method to load any
+    data that it wants included in the search index. Format into a flat dict.
+
+    Note: the format of index_dictionary() depends on the block type. The base
+    class implementation returns only:
+        {"content": {"display_name": "..."}, "content_type": "..."}
+    """
+    try:
+        block_data = block.index_dictionary()
+        # Will be something like:
+        # {
+        #     'content': {'display_name': '...', 'capa_content': '...'},
+        #     'content_type': 'CAPA',
+        #     'problem_types': ['multiplechoiceresponse']
+        # }
+        # Which we need to flatten:
+        if "content_type" in block_data:
+            del block_data["content_type"]  # Redundant with our "type" field that we add later
+        if "content" in block_data and isinstance(block_data["content"], dict):
+            content = block_data["content"]
+            if "display_name" in content:
+                del content["display_name"]
+            del block_data["content"]
+            block_data.update(content)
+        # Now we have something like:
+        # { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] }
+    except Exception as err:  # pylint: disable=broad-except
+        log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
+        block_data = {}
+    block_data.update({
+        "id": _meili_id_from_opaque_key(block.usage_key),
+        "usage_key": str(block.usage_key),
+        "block_id": str(block.usage_key.block_id),
+        "display_name": block.display_name,  # TODO: there is some function to get the fallback display_name
+        "type": block.scope_ids.block_type,
+        # This is called context_key so it's the same for courses and libraries
+        "context_key": str(block.usage_key.context_key),  # same as lib_key
+        "org": str(block.usage_key.context_key.org),
+    })
+    return block_data
+
+
+def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict:
+    """
+    Generate a dictionary document suitable for ingestion into a search engine
+    like Meilisearch or Elasticsearch, so that the given library block can be
+    found using faceted search.
+    """
+    doc = {}
+    try:
+        block = xblock_api.load_block(metadata.usage_key, user=None)
+    except Exception as err:  # pylint: disable=broad-except
+        log.exception(f"Failed to load XBlock {metadata.usage_key}: {err}")
+        # Even though we couldn't load the block, we can still include basic data about it in the index, from 'metadata'
+        doc.update({
+            "id": _meili_id_from_opaque_key(metadata.usage_key),
+            "usage_key": str(metadata.usage_key),
+            "block_id": str(metadata.usage_key.block_id),
+            "display_name": metadata.display_name,
+            "type": metadata.usage_key.block_type,
+            "context_key": str(metadata.usage_key.context_key),
+            "org": str(metadata.usage_key.context_key.org),
+        })
+    else:
+        doc.update(_fields_from_block(block))
+    # Add tags. Note that we could improve performance for indexing many components from the same library,
+    # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
+    # tags for each component separately.
+    for obj_tag in tagging_api.get_object_tags(metadata.usage_key).all():
+        key = f"tags:{obj_tag.name}"  # Taxonomy name
+        if key not in doc:
+            doc[key] = []
+        # Add the tag and all its parent tags, which are implied
+        for tag_value in obj_tag.get_lineage():
+            if tag_value not in doc[key]:
+                doc[key].append(tag_value)
+    return doc
+
+
+def searchable_doc_for_course_block(block) -> dict:
+    """
+    Generate a dictionary document suitable for ingestion into a search engine
+    like Meilisearch or Elasticsearch, so that the given library block can be
+    found using faceted search.
+    """
+    doc = _fields_from_block(block)
+    # Add tags. Note that we could improve performance for indexing many components from the same library,
+    # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
+    # tags for each component separately.
+    for obj_tag in tagging_api.get_object_tags(block.usage_key).all():
+        key = f"tags:{obj_tag.name}"  # Taxonomy name
+        if key not in doc:
+            doc[key] = []
+        # Add the tag and all its parent tags, which are implied
+        for tag_value in obj_tag.get_lineage():
+            if tag_value not in doc[key]:
+                doc[key].append(tag_value)
+    return doc
diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/openedx/core/djangoapps/content/search/management/__init__.py b/openedx/core/djangoapps/content/search/management/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/openedx/core/djangoapps/content/search/management/commands/__init__.py b/openedx/core/djangoapps/content/search/management/commands/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py
similarity index 57%
rename from openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py
rename to openedx/core/djangoapps/content/search/management/commands/meili_mixin.py
index d8a8139d4f0e..8fb85a598ef2 100644
--- a/openedx/core/djangoapps/content_libraries/management/commands/reindex_libraries.py
+++ b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py
@@ -1,30 +1,20 @@
 """
-Command to build or re-build the search index for content libraries.
+Mixin for Django management commands that interact with Meilisearch
 """
-import logging
+from contextlib import contextmanager
 import time
 
 from django.conf import settings
-from django.core.management import BaseCommand, CommandError
+from django.core.management import CommandError
 import meilisearch
 from meilisearch.errors import MeilisearchError
 from meilisearch.models.task import TaskInfo
 
-from openedx.core.djangoapps.content_libraries import api as lib_api
-from openedx.core.djangoapps.content_libraries.search import searchable_doc_for_library_block
-from openedx.core.djangoapps.content_libraries.models import ContentLibrary
-
-
-log = logging.getLogger(__name__)
-
-LIBRARIES_INDEX_NAME = "content_libraries"
-
 
 class MeiliCommandMixin:
     """
     Mixin for Django management commands that interact with Meilisearch
     """
-
     def get_meilisearch_client(self):
         """
         Get the Meiliesearch client
@@ -73,72 +63,40 @@ def index_exists(self, index_name: str) -> bool:
             return False
         return True
 
-
-class Command(MeiliCommandMixin, BaseCommand):
-    """
-    Build or re-build the search index for content libraries.
-    """
-
-    def handle(self, *args, **options):
+    @contextmanager
+    def using_temp_index(self, target_index):
         """
-        Build a new search index
+        Create a new temporary Meilisearch index, populate it, then swap it to
+        become the active index.
         """
         client = self.get_meilisearch_client()
-
-        # Get the list of libraries
-        self.stdout.write("Counting libraries...")
-        lib_keys = [lib.library_key for lib in ContentLibrary.objects.select_related('org').only('org', 'slug')]
-        blocks_by_lib_key = {}
-        num_blocks = 0
-        for lib_key in lib_keys:
-            blocks_by_lib_key[lib_key] = []
-            for component in lib_api.get_library_components(lib_key):
-                blocks_by_lib_key[lib_key].append(lib_api.LibraryXBlockMetadata.from_component(lib_key, component))
-                num_blocks += 1
-
-        self.stdout.write(f"Found {num_blocks} XBlocks among {len(lib_keys)} libraries.")
-
-        # Check if the index exists already:
         self.stdout.write("Checking index...")
-        index_name = settings.MEILISEARCH_INDEX_PREFIX + LIBRARIES_INDEX_NAME
-        temp_index_name = index_name + "_new"
+        temp_index_name = target_index + "_new"
         if self.index_exists(temp_index_name):
-            self.stdout.write("Index already exists. Deleting it...")
+            self.stdout.write("Temporary index already exists. Deleting it...")
             self.wait_for_meili_task(client.delete_index(temp_index_name))
 
         self.stdout.write("Creating new index...")
         self.wait_for_meili_task(
             client.create_index(temp_index_name, {'primaryKey': 'id'})
         )
+        new_index_created = client.get_index(temp_index_name).created_at
 
-        self.stdout.write("Indexing documents...")
-        num_done = 0
-        for lib_key in lib_keys:
-            self.stdout.write(f"{num_done}/{num_blocks}. Now indexing {lib_key}")
-            docs = []
-            for metadata in blocks_by_lib_key[lib_key]:
-                doc = searchable_doc_for_library_block(metadata)
-                docs.append(doc)
-            # Add all the docs in this library at once (usually faster than adding one at a time):
-            self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
-            num_done += len(docs)
+        yield temp_index_name
 
-        new_index_created = client.get_index(temp_index_name).created_at
-        if not self.index_exists(index_name):
+        if not self.index_exists(target_index):
             # We have to create the "target" index before we can successfully swap the new one into it:
             self.stdout.write("Preparing to swap into index (first time)...")
-            self.wait_for_meili_task(client.create_index(index_name))
+            self.wait_for_meili_task(client.create_index(target_index))
         self.stdout.write("Swapping index...")
-        client.swap_indexes([{'indexes': [temp_index_name, index_name]}])
+        client.swap_indexes([{'indexes': [temp_index_name, target_index]}])
         # If we're using an API key that's restricted to certain index prefix(es), we won't be able to get the status
         # of this request unfortunately. https://github.com/meilisearch/meilisearch/issues/4103
         while True:
             time.sleep(1)
-            if client.get_index(index_name).created_at != new_index_created:
+            if client.get_index(target_index).created_at != new_index_created:
                 self.stdout.write("Waiting for swap completion...")
             else:
                 break
         self.stdout.write("Deleting old index...")
         self.wait_for_meili_task(client.delete_index(temp_index_name))
-
-        self.stdout.write(f"Done! {num_blocks} blocks indexed.")
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_courses_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_courses_studio.py
new file mode 100644
index 000000000000..d05975257acd
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_courses_studio.py
@@ -0,0 +1,82 @@
+"""
+Command to build or re-build the search index for courses (in Studio, i.e. Draft
+mode), in Meilisearch.
+
+See also cms/djangoapps/contentstore/management/commands/reindex_course.py which
+indexes LMS (published) courses in ElasticSearch.
+"""
+import logging
+
+from django.conf import settings
+from django.core.management import BaseCommand
+
+from openedx.core.djangoapps.content.search.documents import searchable_doc_for_course_block
+from xmodule.modulestore import ModuleStoreEnum
+from xmodule.modulestore.django import modulestore
+from .meili_mixin import MeiliCommandMixin
+
+
+log = logging.getLogger(__name__)
+
+STUDIO_COURSES_INDEX_NAME = "courseware_draft"
+
+
+class Command(MeiliCommandMixin, BaseCommand):
+    """
+    Build or re-build the search index for courses (in Studio, i.e. Draft mode)
+    """
+
+    def handle(self, *args, **options):
+        """
+        Build a new search index
+        """
+        client = self.get_meilisearch_client()
+        store = modulestore()
+
+        # Get the list of libraries
+        self.stdout.write("Counting courses...")
+        with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred):
+            all_courses = store.get_courses()
+        num_courses = len(all_courses)
+        self.stdout.write(f"Found {num_courses} courses.")
+
+        index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_COURSES_INDEX_NAME
+        with self.using_temp_index(index_name) as temp_index_name:
+            self.stdout.write("Indexing documents...")
+            num_courses_done = 0
+            num_blocks_done = 0
+            for course in all_courses:
+                self.stdout.write(f"{num_courses_done}/{num_courses}. Now indexing {course.display_name} ({course.id})")
+                docs = []
+
+                def add_with_children(block):
+                    """ Recursively index the given XBlock/component """
+                    doc = searchable_doc_for_course_block(block)
+                    docs.append(doc)  # pylint: disable=cell-var-from-loop
+                    self.recurse_children(block, add_with_children)  # pylint: disable=cell-var-from-loop
+
+                self.recurse_children(course, add_with_children)
+
+                # Add all the docs in this library at once (usually faster than adding one at a time):
+                self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
+                num_courses_done += 1
+                num_blocks_done += len(docs)
+
+        self.stdout.write(f"Done! {num_blocks_done} blocks indexed across {num_courses_done} courses.")
+
+    def recurse_children(self, block, fn):
+        """
+        Recurse the children of an XBlock and call the given function for each
+
+        The main purpose of this is just to wrap the loading of each child in
+        try...except. Otherwise block.get_children() would do what we need.
+        """
+        if block.has_children:
+            for child_id in block.children:
+                try:
+                    child = block.get_child(child_id)
+                except Exception as err:  # pylint: disable=broad-except
+                    log.exception(err)
+                    self.stderr.write(f"Unable to load block {child_id}")
+                else:
+                    fn(child)
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_libraries.py b/openedx/core/djangoapps/content/search/management/commands/reindex_libraries.py
new file mode 100644
index 000000000000..61677eb13939
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_libraries.py
@@ -0,0 +1,57 @@
+"""
+Command to build or re-build the search index for content libraries.
+"""
+import logging
+
+from django.conf import settings
+from django.core.management import BaseCommand
+
+from openedx.core.djangoapps.content_libraries import api as lib_api
+from openedx.core.djangoapps.content.search.documents import searchable_doc_for_library_block
+from .meili_mixin import MeiliCommandMixin
+
+
+log = logging.getLogger(__name__)
+
+LIBRARIES_INDEX_NAME = "content_libraries"
+
+
+class Command(MeiliCommandMixin, BaseCommand):
+    """
+    Build or re-build the search index for content libraries.
+    """
+
+    def handle(self, *args, **options):
+        """
+        Build a new search index
+        """
+        client = self.get_meilisearch_client()
+
+        # Get the list of libraries
+        self.stdout.write("Counting libraries...")
+        lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')]
+        blocks_by_lib_key = {}
+        num_blocks = 0
+        for lib_key in lib_keys:
+            blocks_by_lib_key[lib_key] = []
+            for component in lib_api.get_library_components(lib_key):
+                blocks_by_lib_key[lib_key].append(lib_api.LibraryXBlockMetadata.from_component(lib_key, component))
+                num_blocks += 1
+
+        self.stdout.write(f"Found {num_blocks} XBlocks among {len(lib_keys)} libraries.")
+
+        index_name = settings.MEILISEARCH_INDEX_PREFIX + LIBRARIES_INDEX_NAME
+        with self.using_temp_index(index_name) as temp_index_name:
+            self.stdout.write("Indexing documents...")
+            num_done = 0
+            for lib_key in lib_keys:
+                self.stdout.write(f"{num_done}/{num_blocks}. Now indexing {lib_key}")
+                docs = []
+                for metadata in blocks_by_lib_key[lib_key]:
+                    doc = searchable_doc_for_library_block(metadata)
+                    docs.append(doc)
+                # Add all the docs in this library at once (usually faster than adding one at a time):
+                self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
+                num_done += len(docs)
+
+        self.stdout.write(f"Done! {num_done} blocks indexed.")
diff --git a/openedx/core/djangoapps/content_libraries/search.py b/openedx/core/djangoapps/content_libraries/search.py
deleted file mode 100644
index 74624b4e7f94..000000000000
--- a/openedx/core/djangoapps/content_libraries/search.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-Utilities related to searching content libraries
-"""
-import logging
-
-from django.utils.text import slugify
-
-from openedx.core.djangoapps.content_libraries import api as lib_api
-from openedx.core.djangoapps.content_tagging import api as tagging_api
-from openedx.core.djangoapps.xblock import api as xblock_api
-
-log = logging.getLogger(__name__)
-
-
-def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict:
-    """
-    Generate a dictionary document suitable for ingestion into a search engine
-    like Meilisearch or Elasticsearch, so that the given library block can be
-    found using faceted search.
-    """
-    doc = {}
-    try:
-        block = xblock_api.load_block(metadata.usage_key, user=None)
-        block_data = block.index_dictionary()
-        # Will be something like:
-        # {
-        #     'content': {'display_name': '...', 'capa_content': '...'},
-        #     'content_type': 'CAPA',
-        #     'problem_types': ['multiplechoiceresponse']
-        # }
-        # Which we need to flatten:
-        if "content_type" in block_data:
-            del block_data["content_type"]  # Redundant with our "type" field
-        if "content" in block_data and isinstance(block_data["content"], dict):
-            content = block_data["content"]
-            if "display_name" in content:
-                del content["display_name"]
-            del block_data["content"]
-            block_data.update(content)
-        # Now we have
-        # { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] }
-        doc.update(block_data)
-    except Exception as err:  # pylint: disable=broad-except
-        log.exception(f"Failed to get index_dictionary for {metadata.usage_key}: {err}")
-    # The data below must always override any values from index_dictionary:
-    doc.update({
-        # A Meilisearch document identifier can be of type integer or string,
-        # only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).
-        # So our usage keys with ":" characters cannot be used as primary keys.
-        "id": slugify(str(metadata.usage_key)) + "-" + str(hash(str(metadata.usage_key)) % 1_000),
-        "usage_key": str(metadata.usage_key),
-        "block_id": str(metadata.usage_key.block_id),
-        "display_name": metadata.display_name,
-        "type": metadata.usage_key.block_type,
-        # This is called contextKey not libKey so we can use the same keys with courses, and maybe search
-        # both courses and libraries together in the future?
-        "context_key": str(metadata.usage_key.context_key),  # same as lib_key
-        "org": str(metadata.usage_key.context_key.org),
-    })
-    # Add tags. Note that we could improve performance for indexing many components from the same library,
-    # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
-    # tags for each component separately.
-    for obj_tag in tagging_api.get_object_tags(metadata.usage_key).all():
-        key = f"tags:{obj_tag.name}"  # Taxonomy name
-        if key not in doc:
-            doc[key] = []
-        # Add the tag and all its parent tags, which are implied
-        for tag_value in obj_tag.get_lineage():
-            if tag_value not in doc[key]:
-                doc[key].append(tag_value)
-    return doc

From b9eddb8e654e280d03e3545f0ad70f48f5702049 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Thu, 29 Feb 2024 14:12:47 -0800
Subject: [PATCH 05/28] refactor: combine indexes into one

---
 .../djangoapps/content/search/documents.py    | 54 +++++++++++-----
 .../management/commands/reindex_libraries.py  | 57 -----------------
 ...ex_courses_studio.py => reindex_studio.py} | 63 +++++++++++++++----
 3 files changed, 91 insertions(+), 83 deletions(-)
 delete mode 100644 openedx/core/djangoapps/content/search/management/commands/reindex_libraries.py
 rename openedx/core/djangoapps/content/search/management/commands/{reindex_courses_studio.py => reindex_studio.py} (50%)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index 4bccd406f7aa..af53d1b43613 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -13,6 +13,30 @@
 log = logging.getLogger(__name__)
 
 
+class Fields:
+    """
+    Fields that exist on the documents in our search index
+    """
+    # Meilisearch primary key. String.
+    id = "id"
+    usage_key = "usage_key"
+    block_id = "block_id"
+    display_name = "display_name"
+    block_type = "block_type"
+    context_key = "context_key"
+    org = "org"
+    # type: "course_block", "library_block"
+    type = "type"
+
+
+class DocType:
+    """
+    Values for the 'type' field on each doc in the search index
+    """
+    course_block = "course_block"
+    library_block = "library_block"
+
+
 def _meili_id_from_opaque_key(usage_key: UsageKey) -> str:
     """
     Meilisearch requires each document to have a primary key that's either an
@@ -56,14 +80,14 @@ class implementation returns only:
         log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
         block_data = {}
     block_data.update({
-        "id": _meili_id_from_opaque_key(block.usage_key),
-        "usage_key": str(block.usage_key),
-        "block_id": str(block.usage_key.block_id),
-        "display_name": block.display_name,  # TODO: there is some function to get the fallback display_name
-        "type": block.scope_ids.block_type,
+        Fields.id: _meili_id_from_opaque_key(block.usage_key),
+        Fields.usage_key: str(block.usage_key),
+        Fields.block_id: str(block.usage_key.block_id),
+        Fields.display_name: block.display_name,  # TODO: there is some function to get the fallback display_name
+        Fields.block_type: block.scope_ids.block_type,
         # This is called context_key so it's the same for courses and libraries
-        "context_key": str(block.usage_key.context_key),  # same as lib_key
-        "org": str(block.usage_key.context_key.org),
+        Fields.context_key: str(block.usage_key.context_key),  # same as lib_key
+        Fields.org: str(block.usage_key.context_key.org),
     })
     return block_data
 
@@ -81,16 +105,17 @@ def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) ->
         log.exception(f"Failed to load XBlock {metadata.usage_key}: {err}")
         # Even though we couldn't load the block, we can still include basic data about it in the index, from 'metadata'
         doc.update({
-            "id": _meili_id_from_opaque_key(metadata.usage_key),
-            "usage_key": str(metadata.usage_key),
-            "block_id": str(metadata.usage_key.block_id),
-            "display_name": metadata.display_name,
-            "type": metadata.usage_key.block_type,
-            "context_key": str(metadata.usage_key.context_key),
-            "org": str(metadata.usage_key.context_key.org),
+            Fields.id: _meili_id_from_opaque_key(metadata.usage_key),
+            Fields.usage_key: str(metadata.usage_key),
+            Fields.block_id: str(metadata.usage_key.block_id),
+            Fields.display_name: metadata.display_name,
+            Fields.type: metadata.usage_key.block_type,
+            Fields.context_key: str(metadata.usage_key.context_key),
+            Fields.org: str(metadata.usage_key.context_key.org),
         })
     else:
         doc.update(_fields_from_block(block))
+    doc[Fields.type] = DocType.library_block
     # Add tags. Note that we could improve performance for indexing many components from the same library,
     # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
     # tags for each component separately.
@@ -112,6 +137,7 @@ def searchable_doc_for_course_block(block) -> dict:
     found using faceted search.
     """
     doc = _fields_from_block(block)
+    doc[Fields.type] = DocType.course_block
     # Add tags. Note that we could improve performance for indexing many components from the same library,
     # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
     # tags for each component separately.
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_libraries.py b/openedx/core/djangoapps/content/search/management/commands/reindex_libraries.py
deleted file mode 100644
index 61677eb13939..000000000000
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_libraries.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-Command to build or re-build the search index for content libraries.
-"""
-import logging
-
-from django.conf import settings
-from django.core.management import BaseCommand
-
-from openedx.core.djangoapps.content_libraries import api as lib_api
-from openedx.core.djangoapps.content.search.documents import searchable_doc_for_library_block
-from .meili_mixin import MeiliCommandMixin
-
-
-log = logging.getLogger(__name__)
-
-LIBRARIES_INDEX_NAME = "content_libraries"
-
-
-class Command(MeiliCommandMixin, BaseCommand):
-    """
-    Build or re-build the search index for content libraries.
-    """
-
-    def handle(self, *args, **options):
-        """
-        Build a new search index
-        """
-        client = self.get_meilisearch_client()
-
-        # Get the list of libraries
-        self.stdout.write("Counting libraries...")
-        lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')]
-        blocks_by_lib_key = {}
-        num_blocks = 0
-        for lib_key in lib_keys:
-            blocks_by_lib_key[lib_key] = []
-            for component in lib_api.get_library_components(lib_key):
-                blocks_by_lib_key[lib_key].append(lib_api.LibraryXBlockMetadata.from_component(lib_key, component))
-                num_blocks += 1
-
-        self.stdout.write(f"Found {num_blocks} XBlocks among {len(lib_keys)} libraries.")
-
-        index_name = settings.MEILISEARCH_INDEX_PREFIX + LIBRARIES_INDEX_NAME
-        with self.using_temp_index(index_name) as temp_index_name:
-            self.stdout.write("Indexing documents...")
-            num_done = 0
-            for lib_key in lib_keys:
-                self.stdout.write(f"{num_done}/{num_blocks}. Now indexing {lib_key}")
-                docs = []
-                for metadata in blocks_by_lib_key[lib_key]:
-                    doc = searchable_doc_for_library_block(metadata)
-                    docs.append(doc)
-                # Add all the docs in this library at once (usually faster than adding one at a time):
-                self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
-                num_done += len(docs)
-
-        self.stdout.write(f"Done! {num_done} blocks indexed.")
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_courses_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
similarity index 50%
rename from openedx/core/djangoapps/content/search/management/commands/reindex_courses_studio.py
rename to openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index d05975257acd..4af2aa5540e9 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_courses_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -10,7 +10,12 @@
 from django.conf import settings
 from django.core.management import BaseCommand
 
-from openedx.core.djangoapps.content.search.documents import searchable_doc_for_course_block
+from openedx.core.djangoapps.content_libraries import api as lib_api
+from openedx.core.djangoapps.content.search.documents import (
+    Fields,
+    searchable_doc_for_course_block,
+    searchable_doc_for_library_block,
+)
 from xmodule.modulestore import ModuleStoreEnum
 from xmodule.modulestore.django import modulestore
 from .meili_mixin import MeiliCommandMixin
@@ -18,7 +23,7 @@
 
 log = logging.getLogger(__name__)
 
-STUDIO_COURSES_INDEX_NAME = "courseware_draft"
+STUDIO_INDEX_NAME = "studio_content"
 
 
 class Command(MeiliCommandMixin, BaseCommand):
@@ -28,25 +33,59 @@ class Command(MeiliCommandMixin, BaseCommand):
 
     def handle(self, *args, **options):
         """
-        Build a new search index
+        Build a new search index for Studio, containing content from courses and libraries
         """
         client = self.get_meilisearch_client()
         store = modulestore()
 
-        # Get the list of libraries
+        # Get the lists of libraries
+        self.stdout.write("Counting libraries...")
+        lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')]
+        num_libraries = len(lib_keys)
+        
+        # Get the list of courses
         self.stdout.write("Counting courses...")
         with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred):
             all_courses = store.get_courses()
         num_courses = len(all_courses)
-        self.stdout.write(f"Found {num_courses} courses.")
+        num_contexts = num_courses + num_libraries
+        num_contexts_done = 0  # How many courses/libraries we've done
+        num_blocks_done = 0
 
-        index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_COURSES_INDEX_NAME
+        self.stdout.write(f"Found {num_courses} courses and {num_libraries} libraries.")
+        index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME
         with self.using_temp_index(index_name) as temp_index_name:
-            self.stdout.write("Indexing documents...")
-            num_courses_done = 0
-            num_blocks_done = 0
+            ############## Configure the index ##############
+            # usage_key is not the primary key but nevertheless must be unique:
+            client.index(temp_index_name).update_distinct_attribute(Fields.usage_key)
+            client.index(temp_index_name).update_filterable_attributes([
+                Fields.block_type,
+                Fields.context_key,
+                Fields.org,
+                Fields.type,
+                # TODO: tags
+            ])
+
+            ############## Libraries ##############
+            self.stdout.write("Indexing libraries...")
+            for lib_key in lib_keys:
+                self.stdout.write(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}")
+                docs = []
+                for component in lib_api.get_library_components(lib_key):
+                    metadata = lib_api.LibraryXBlockMetadata.from_component(lib_key, component)
+                    doc = searchable_doc_for_library_block(metadata)
+                    docs.append(doc)
+                    num_blocks_done += 1
+                # Add all the docs in this library at once (usually faster than adding one at a time):
+                self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
+                num_contexts_done += 1
+
+            ############## Courses ##############
+            self.stdout.write("Indexing courses...")
             for course in all_courses:
-                self.stdout.write(f"{num_courses_done}/{num_courses}. Now indexing {course.display_name} ({course.id})")
+                self.stdout.write(
+                    f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})"
+                )
                 docs = []
 
                 def add_with_children(block):
@@ -59,10 +98,10 @@ def add_with_children(block):
 
                 # Add all the docs in this library at once (usually faster than adding one at a time):
                 self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
-                num_courses_done += 1
+                num_contexts_done += 1
                 num_blocks_done += len(docs)
 
-        self.stdout.write(f"Done! {num_blocks_done} blocks indexed across {num_courses_done} courses.")
+        self.stdout.write(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses and libraries.")
 
     def recurse_children(self, block, fn):
         """

From adbf3576f3a59e51598119236253c3acf871070c Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Thu, 29 Feb 2024 14:51:34 -0800
Subject: [PATCH 06/28] feat: index tags (in a way compatible with
 InstantSearch)

---
 .../djangoapps/content/search/documents.py    | 84 ++++++++++++++-----
 .../management/commands/reindex_studio.py     |  2 +-
 2 files changed, 62 insertions(+), 24 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index af53d1b43613..12034428fdf5 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -1,10 +1,11 @@
 """
 Utilities related to searching content libraries
 """
+from __future__ import annotations
 import logging
 
 from django.utils.text import slugify
-from opaque_keys.edx.keys import UsageKey
+from opaque_keys.edx.keys import UsageKey, LearningContextKey
 
 from openedx.core.djangoapps.content_libraries import api as lib_api
 from openedx.core.djangoapps.content_tagging import api as tagging_api
@@ -27,6 +28,16 @@ class Fields:
     org = "org"
     # type: "course_block", "library_block"
     type = "type"
+    # tags (dictionary)
+    # See https://blog.meilisearch.com/nested-hierarchical-facets-guide/
+    # and https://www.algolia.com/doc/api-reference/widgets/hierarchical-menu/js/
+    # For details on the format of the hierarchical tag data.
+    tags = "tags"
+    tags_taxonomy = "taxonomy"
+    tags_level0 = "level0"
+    tags_level1 = "level1"
+    tags_level2 = "level2"
+    tags_level3 = "level3"
 
 
 class DocType:
@@ -92,6 +103,53 @@ class implementation returns only:
     return block_data
 
 
+def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
+    """
+    Given an XBlock, course, library, etc., get the tag data for its index doc.
+
+    See the comments above on "Field.tags" for an explanation of the format.
+
+    e.g. for something tagged "Difficulty: Hard" and "Location: Vancouver" this
+    would return:
+        {
+            "tags": {
+                "taxonomy": ["Location", "Difficulty"],
+                "level0": ["Location\tNorth America", "Difficulty\tHard"],
+                "level1": ["Location\tNorth America\tCanada"],
+                "level2": ["Location\tNorth America\tCanada\tVancouver"],
+            }
+        }
+    """
+    # Note that we could improve performance for indexing many components from the same library/course,
+    # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
+    # tags for each component separately.
+    all_tags = tagging_api.get_object_tags(object_id).all()
+    if not all_tags:
+        return {}
+    result = {
+        Fields.tags_taxonomy: [],
+        Fields.tags_level0: [],
+        # ... other levels added as needed
+    }
+    for obj_tag in all_tags:
+        # Add the taxonomy name:
+        if obj_tag.name not in result[Fields.tags_taxonomy]:
+            result[Fields.tags_taxonomy].append(obj_tag.name)
+        # Taxonomy name plus each level of tags, in a list:
+        parts = [obj_tag.name] + obj_tag.get_lineage()  # e.g. ["Location", "North America", "Canada", "Vancouver"]
+        parts = [part.replace(" > ", " _ ") for part in parts]  # Escape our separator
+        for level in range(0, 4):
+            new_value = " > ".join(parts[0:level + 2])
+            if not f"level{level}" in result:
+                result[f"level{level}"] = [new_value]
+            elif new_value not in result[f"level{level}"]:
+                result[f"level{level}"].append(new_value)
+            if len(parts) == level + 2:
+                break
+
+    return {Fields.tags: result}
+
+
 def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) -> dict:
     """
     Generate a dictionary document suitable for ingestion into a search engine
@@ -115,18 +173,8 @@ def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) ->
         })
     else:
         doc.update(_fields_from_block(block))
+    doc.update(_tags_for_content_object(metadata.usage_key))
     doc[Fields.type] = DocType.library_block
-    # Add tags. Note that we could improve performance for indexing many components from the same library,
-    # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
-    # tags for each component separately.
-    for obj_tag in tagging_api.get_object_tags(metadata.usage_key).all():
-        key = f"tags:{obj_tag.name}"  # Taxonomy name
-        if key not in doc:
-            doc[key] = []
-        # Add the tag and all its parent tags, which are implied
-        for tag_value in obj_tag.get_lineage():
-            if tag_value not in doc[key]:
-                doc[key].append(tag_value)
     return doc
 
 
@@ -137,16 +185,6 @@ def searchable_doc_for_course_block(block) -> dict:
     found using faceted search.
     """
     doc = _fields_from_block(block)
+    doc.update(_tags_for_content_object(block.usage_key))
     doc[Fields.type] = DocType.course_block
-    # Add tags. Note that we could improve performance for indexing many components from the same library,
-    # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
-    # tags for each component separately.
-    for obj_tag in tagging_api.get_object_tags(block.usage_key).all():
-        key = f"tags:{obj_tag.name}"  # Taxonomy name
-        if key not in doc:
-            doc[key] = []
-        # Add the tag and all its parent tags, which are implied
-        for tag_value in obj_tag.get_lineage():
-            if tag_value not in doc[key]:
-                doc[key].append(tag_value)
     return doc
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index 4af2aa5540e9..7bddccd7ae97 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -62,8 +62,8 @@ def handle(self, *args, **options):
                 Fields.block_type,
                 Fields.context_key,
                 Fields.org,
+                Fields.tags,
                 Fields.type,
-                # TODO: tags
             ])
 
             ############## Libraries ##############

From 47dc3c1b8c00607e2fde3a361cec6a8c2e73a4ec Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Tue, 5 Mar 2024 11:00:44 -0800
Subject: [PATCH 07/28] feat: REST API to retrieve a user-specific API key

---
 cms/envs/common.py                            |  4 +-
 cms/urls.py                                   |  4 ++
 .../djangoapps/content/search/documents.py    |  1 +
 .../management/commands/reindex_studio.py     |  3 +-
 .../core/djangoapps/content/search/urls.py    | 11 +++
 .../core/djangoapps/content/search/views.py   | 70 +++++++++++++++++++
 6 files changed, 90 insertions(+), 3 deletions(-)
 create mode 100644 openedx/core/djangoapps/content/search/urls.py
 create mode 100644 openedx/core/djangoapps/content/search/views.py

diff --git a/cms/envs/common.py b/cms/envs/common.py
index d721535785c6..a85c85787f8a 100644
--- a/cms/envs/common.py
+++ b/cms/envs/common.py
@@ -2880,6 +2880,8 @@ def _should_send_xblock_events(settings):
 # can only use the index(es) that start with this prefix.
 # See https://www.meilisearch.com/docs/learn/security/tenant_tokens
 MEILISEARCH_INDEX_PREFIX = ""
-# Set this to None to disable search functionality
+# Meilisearch URL that the python backend can use. Often points to another docker container or k8s service.
 MEILISEARCH_URL = "http://meilisearch"
+# URL that browsers (end users) can user to reach Meilisearch. Should be HTTPS in production.
+MEILISEARCH_PUBLIC_URL = "http://meilisearch.example.com"
 MEILISEARCH_API_KEY = "devkey"
diff --git a/cms/urls.py b/cms/urls.py
index 8c3588b04960..9828e9d0fbf0 100644
--- a/cms/urls.py
+++ b/cms/urls.py
@@ -307,6 +307,10 @@
     ),
 )
 
+urlpatterns.append(
+    path('', include(('openedx.core.djangoapps.content.search.urls', 'content_search'), namespace='content_search')),
+)
+
 # display error page templates, for testing purposes
 urlpatterns += [
     path('404', handler404),
diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index 12034428fdf5..7cb61e5dca06 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -12,6 +12,7 @@
 from openedx.core.djangoapps.xblock import api as xblock_api
 
 log = logging.getLogger(__name__)
+STUDIO_INDEX_NAME = "studio_content"
 
 
 class Fields:
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index 7bddccd7ae97..3b46370ebf21 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -15,6 +15,7 @@
     Fields,
     searchable_doc_for_course_block,
     searchable_doc_for_library_block,
+    STUDIO_INDEX_NAME,
 )
 from xmodule.modulestore import ModuleStoreEnum
 from xmodule.modulestore.django import modulestore
@@ -23,8 +24,6 @@
 
 log = logging.getLogger(__name__)
 
-STUDIO_INDEX_NAME = "studio_content"
-
 
 class Command(MeiliCommandMixin, BaseCommand):
     """
diff --git a/openedx/core/djangoapps/content/search/urls.py b/openedx/core/djangoapps/content/search/urls.py
new file mode 100644
index 000000000000..c69be33d5bb0
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/urls.py
@@ -0,0 +1,11 @@
+"""
+URLs for content sesarch
+"""
+from django.urls import path
+
+from .views import StudioSearchView
+
+
+urlpatterns = [
+    path('api/content_search/v2/studio/', StudioSearchView.as_view(), name='studio_content_search')
+]
diff --git a/openedx/core/djangoapps/content/search/views.py b/openedx/core/djangoapps/content/search/views.py
new file mode 100644
index 000000000000..807e716ccff7
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/views.py
@@ -0,0 +1,70 @@
+"""
+REST API for content search
+"""
+from datetime import datetime, timedelta, timezone
+import logging
+
+from django.conf import settings
+from django.contrib.auth import get_user_model
+from django.contrib.auth.models import AnonymousUser
+from opaque_keys.edx.keys import CourseKey
+import meilisearch
+from rest_framework import serializers
+from rest_framework.exceptions import NotAuthenticated, NotFound, PermissionDenied
+from rest_framework.response import Response
+from rest_framework.views import APIView
+
+from lms.djangoapps.courseware.access import has_access
+from lms.djangoapps.courseware.masquerade import setup_masquerade
+from openedx.core import types
+from openedx.core.lib.api.view_utils import validate_course_key, view_auth_classes
+from openedx.core.djangoapps.content.search.documents import STUDIO_INDEX_NAME
+
+User = get_user_model()
+log = logging.getLogger(__name__)
+
+
+def _get_meili_api_key_uid():
+    """
+    Helper method to get the UID of the API key we're using for Meilisearch
+    """
+    if not hasattr(_get_meili_api_key_uid, "uid"):
+        client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY)
+        _get_meili_api_key_uid.uid = client.get_key(settings.MEILISEARCH_API_KEY).uid
+    return _get_meili_api_key_uid.uid
+
+
+@view_auth_classes(is_authenticated=True)
+class StudioSearchView(APIView):
+    """
+    Give user details on how they can search studio content
+    """
+
+    def get(self, request):
+        """
+        Give user details on how they can search studio content
+        """
+        client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY)
+        index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME
+
+        # Create an API key that only allows the user to search content that they have permission to view:
+        expires_at = datetime.now(tz=timezone.utc) + timedelta(days=7)
+        search_rules = {
+            index_name: {
+                # TODO: Apply filters here based on the user's permissions, so they can only search for content
+                # that they have permission to view. Example:
+                # 'filter': 'org = BradenX'
+            }
+        }
+        # Note: the following is just generating a JWT. It doesn't actually make an API call to Meilisearch.
+        restricted_api_key = client.generate_tenant_token(
+            api_key_uid=_get_meili_api_key_uid(),
+            search_rules=search_rules,
+            expires_at=expires_at,
+        )
+
+        return Response({
+            "url": settings.MEILISEARCH_PUBLIC_URL,
+            "index_name": index_name,
+            "api_key": restricted_api_key,
+        })

From 6cf9e04f10f1d1cad810529dab3e48a45c2e42c7 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Tue, 5 Mar 2024 14:38:19 -0800
Subject: [PATCH 08/28] fix: better handling index existence check, better
 comments

---
 .../content/search/management/commands/meili_mixin.py |  5 ++++-
 .../search/management/commands/reindex_studio.py      | 11 +++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py
index 8fb85a598ef2..798dfd8a7e47 100644
--- a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py
+++ b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py
@@ -60,7 +60,10 @@ def index_exists(self, index_name: str) -> bool:
         try:
             client.get_index(index_name)
         except MeilisearchError as err:
-            return False
+            if err.code == "index_not_found":
+                return False
+            else:
+                raise err
         return True
 
     @contextmanager
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index 3b46370ebf21..fe0981e24ca4 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -47,16 +47,19 @@ def handle(self, *args, **options):
         with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred):
             all_courses = store.get_courses()
         num_courses = len(all_courses)
+
+        # Some counters so we can track our progress as indexing progresses:
         num_contexts = num_courses + num_libraries
-        num_contexts_done = 0  # How many courses/libraries we've done
-        num_blocks_done = 0
+        num_contexts_done = 0  # How many courses/libraries we've indexed
+        num_blocks_done = 0  # How many individual components/XBlocks we've indexed
 
         self.stdout.write(f"Found {num_courses} courses and {num_libraries} libraries.")
         index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME
         with self.using_temp_index(index_name) as temp_index_name:
             ############## Configure the index ##############
-            # usage_key is not the primary key but nevertheless must be unique:
+            # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique):
             client.index(temp_index_name).update_distinct_attribute(Fields.usage_key)
+            # Mark which attributes can be used for filtering/faceted search:
             client.index(temp_index_name).update_filterable_attributes([
                 Fields.block_type,
                 Fields.context_key,
@@ -95,7 +98,7 @@ def add_with_children(block):
 
                 self.recurse_children(course, add_with_children)
 
-                # Add all the docs in this library at once (usually faster than adding one at a time):
+                # Add all the docs in this course at once (usually faster than adding one at a time):
                 self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
                 num_contexts_done += 1
                 num_blocks_done += len(docs)

From 8637e16328865be25d35461dc77725305a483dba Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Thu, 14 Mar 2024 19:12:33 -0700
Subject: [PATCH 09/28] feat: disable studio search (w/ Meilisearch) by default

---
 cms/envs/common.py                                  | 13 ++++++++-----
 .../search/management/commands/meili_mixin.py       |  4 ++--
 openedx/core/djangoapps/content/search/views.py     |  2 ++
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/cms/envs/common.py b/cms/envs/common.py
index a85c85787f8a..b8c892145d34 100644
--- a/cms/envs/common.py
+++ b/cms/envs/common.py
@@ -2873,15 +2873,18 @@ def _should_send_xblock_events(settings):
 
 BEAMER_PRODUCT_ID = ""
 
-################### Search ###################
+################### Studio Search (alpha, using Meilisearch) ###################
 
+# Enable Studio search features (powered by Meilisearch) (beta, off by default)
+MEILISEARCH_ENABLED = False
+# Meilisearch URL that the python backend can use. Often points to another docker container or k8s service.
+# Set this to None (default) to disable 
+MEILISEARCH_URL = "http://meilisearch"
+# URL that browsers (end users) can user to reach Meilisearch. Should be HTTPS in production.
+MEILISEARCH_PUBLIC_URL = "http://meilisearch.example.com"
 # To support multi-tenancy, you can prefix all indexes with a common key like "sandbox7-"
 # and use a restricted tenant token in place of an API key, so that this Open edX instance
 # can only use the index(es) that start with this prefix.
 # See https://www.meilisearch.com/docs/learn/security/tenant_tokens
 MEILISEARCH_INDEX_PREFIX = ""
-# Meilisearch URL that the python backend can use. Often points to another docker container or k8s service.
-MEILISEARCH_URL = "http://meilisearch"
-# URL that browsers (end users) can user to reach Meilisearch. Should be HTTPS in production.
-MEILISEARCH_PUBLIC_URL = "http://meilisearch.example.com"
 MEILISEARCH_API_KEY = "devkey"
diff --git a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py
index 798dfd8a7e47..18c7681879cb 100644
--- a/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py
+++ b/openedx/core/djangoapps/content/search/management/commands/meili_mixin.py
@@ -22,8 +22,8 @@ def get_meilisearch_client(self):
         if hasattr(self, "_meili_client"):
             return self._meili_client
         # Connect to Meilisearch
-        if not settings.MEILISEARCH_URL:
-            raise CommandError("MEILISEARCH_URL is not set - search functionality disabled.")
+        if not settings.MEILISEARCH_ENABLED:
+            raise CommandError("MEILISEARCH_ENABLED is not set - search functionality disabled.")
 
         self._meili_client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY)
         try:
diff --git a/openedx/core/djangoapps/content/search/views.py b/openedx/core/djangoapps/content/search/views.py
index 807e716ccff7..76e5eb91781e 100644
--- a/openedx/core/djangoapps/content/search/views.py
+++ b/openedx/core/djangoapps/content/search/views.py
@@ -44,6 +44,8 @@ def get(self, request):
         """
         Give user details on how they can search studio content
         """
+        if not settings.MEILISEARCH_ENABLED:
+            raise NotFound("Meilisearch features are not enabled.")
         client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY)
         index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME
 

From 64319488c4e587a62b1705c0d0144bd70aaf31a5 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Thu, 14 Mar 2024 19:12:44 -0700
Subject: [PATCH 10/28] feat: limit search endpoint usage to global staff for
 now

---
 openedx/core/djangoapps/content/search/views.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/views.py b/openedx/core/djangoapps/content/search/views.py
index 76e5eb91781e..4a19a023ad63 100644
--- a/openedx/core/djangoapps/content/search/views.py
+++ b/openedx/core/djangoapps/content/search/views.py
@@ -6,14 +6,12 @@
 
 from django.conf import settings
 from django.contrib.auth import get_user_model
-from django.contrib.auth.models import AnonymousUser
-from opaque_keys.edx.keys import CourseKey
 import meilisearch
-from rest_framework import serializers
-from rest_framework.exceptions import NotAuthenticated, NotFound, PermissionDenied
+from rest_framework.exceptions import NotFound, PermissionDenied
 from rest_framework.response import Response
 from rest_framework.views import APIView
 
+from common.djangoapps.student.roles import GlobalStaff
 from lms.djangoapps.courseware.access import has_access
 from lms.djangoapps.courseware.masquerade import setup_masquerade
 from openedx.core import types
@@ -46,6 +44,10 @@ def get(self, request):
         """
         if not settings.MEILISEARCH_ENABLED:
             raise NotFound("Meilisearch features are not enabled.")
+        if not GlobalStaff().has_user(request.user):
+            # Until we enforce permissions properly (see below), this endpoint is restricted to global staff,
+            # because it lets you search data from any course/library.
+            raise PermissionDenied("For the moment, use of this search preview is restricted to global staff.")
         client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY)
         index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME
 

From 9c058062c3350fd792a4de999ea133105fd389d0 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Fri, 15 Mar 2024 11:13:04 -0700
Subject: [PATCH 11/28] chore: fix quality issues

---
 cms/envs/common.py                                           | 1 -
 openedx/core/djangoapps/content/search/documents.py          | 5 ++---
 .../content/search/management/commands/reindex_studio.py     | 2 +-
 openedx/core/djangoapps/content/search/views.py              | 5 +----
 4 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/cms/envs/common.py b/cms/envs/common.py
index b8c892145d34..d38b156a0691 100644
--- a/cms/envs/common.py
+++ b/cms/envs/common.py
@@ -2878,7 +2878,6 @@ def _should_send_xblock_events(settings):
 # Enable Studio search features (powered by Meilisearch) (beta, off by default)
 MEILISEARCH_ENABLED = False
 # Meilisearch URL that the python backend can use. Often points to another docker container or k8s service.
-# Set this to None (default) to disable 
 MEILISEARCH_URL = "http://meilisearch"
 # URL that browsers (end users) can user to reach Meilisearch. Should be HTTPS in production.
 MEILISEARCH_PUBLIC_URL = "http://meilisearch.example.com"
diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index 7cb61e5dca06..d09892981c96 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -27,8 +27,7 @@ class Fields:
     block_type = "block_type"
     context_key = "context_key"
     org = "org"
-    # type: "course_block", "library_block"
-    type = "type"
+    type = "type"  # "course_block" or "library_block"
     # tags (dictionary)
     # See https://blog.meilisearch.com/nested-hierarchical-facets-guide/
     # and https://www.algolia.com/doc/api-reference/widgets/hierarchical-menu/js/
@@ -141,7 +140,7 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
         parts = [part.replace(" > ", " _ ") for part in parts]  # Escape our separator
         for level in range(0, 4):
             new_value = " > ".join(parts[0:level + 2])
-            if not f"level{level}" in result:
+            if f"level{level}" not in result:
                 result[f"level{level}"] = [new_value]
             elif new_value not in result[f"level{level}"]:
                 result[f"level{level}"].append(new_value)
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index fe0981e24ca4..e672fcc60203 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -41,7 +41,7 @@ def handle(self, *args, **options):
         self.stdout.write("Counting libraries...")
         lib_keys = [lib.library_key for lib in lib_api.ContentLibrary.objects.select_related('org').only('org', 'slug')]
         num_libraries = len(lib_keys)
-        
+
         # Get the list of courses
         self.stdout.write("Counting courses...")
         with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred):
diff --git a/openedx/core/djangoapps/content/search/views.py b/openedx/core/djangoapps/content/search/views.py
index 4a19a023ad63..69b686a3434f 100644
--- a/openedx/core/djangoapps/content/search/views.py
+++ b/openedx/core/djangoapps/content/search/views.py
@@ -12,10 +12,7 @@
 from rest_framework.views import APIView
 
 from common.djangoapps.student.roles import GlobalStaff
-from lms.djangoapps.courseware.access import has_access
-from lms.djangoapps.courseware.masquerade import setup_masquerade
-from openedx.core import types
-from openedx.core.lib.api.view_utils import validate_course_key, view_auth_classes
+from openedx.core.lib.api.view_utils import view_auth_classes
 from openedx.core.djangoapps.content.search.documents import STUDIO_INDEX_NAME
 
 User = get_user_model()

From af1402faac50040ab2b26d486d6dc6d737413cc7 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Fri, 15 Mar 2024 11:32:46 -0700
Subject: [PATCH 12/28] refactor: put all "content" data into the "content"
 field

---
 .../djangoapps/content/search/documents.py    | 42 ++++++++++---------
 .../management/commands/reindex_studio.py     |  8 ++++
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index d09892981c96..b2ec0907fa4c 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -27,7 +27,7 @@ class Fields:
     block_type = "block_type"
     context_key = "context_key"
     org = "org"
-    type = "type"  # "course_block" or "library_block"
+    type = "type"  # DocType.course_block or DocType.library_block (see below)
     # tags (dictionary)
     # See https://blog.meilisearch.com/nested-hierarchical-facets-guide/
     # and https://www.algolia.com/doc/api-reference/widgets/hierarchical-menu/js/
@@ -38,6 +38,10 @@ class Fields:
     tags_level1 = "level1"
     tags_level2 = "level2"
     tags_level3 = "level3"
+    # The "content" field is a dictionary of arbitrary data, depending on the block_type.
+    # It comes from each XBlock's index_dictionary() method (if present) plus some processing.
+    # Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on.
+    content = "content"
 
 
 class DocType:
@@ -68,8 +72,18 @@ def _fields_from_block(block) -> dict:
     class implementation returns only:
         {"content": {"display_name": "..."}, "content_type": "..."}
     """
+    block_data = {
+        Fields.id: _meili_id_from_opaque_key(block.usage_key),
+        Fields.usage_key: str(block.usage_key),
+        Fields.block_id: str(block.usage_key.block_id),
+        Fields.display_name: block.display_name,  # TODO: there is some function to get the fallback display_name
+        Fields.block_type: block.scope_ids.block_type,
+        # This is called context_key so it's the same for courses and libraries
+        Fields.context_key: str(block.usage_key.context_key),  # same as lib_key
+        Fields.org: str(block.usage_key.context_key.org),
+    }
     try:
-        block_data = block.index_dictionary()
+        content_data = block.index_dictionary()
         # Will be something like:
         # {
         #     'content': {'display_name': '...', 'capa_content': '...'},
@@ -77,29 +91,19 @@ class implementation returns only:
         #     'problem_types': ['multiplechoiceresponse']
         # }
         # Which we need to flatten:
-        if "content_type" in block_data:
-            del block_data["content_type"]  # Redundant with our "type" field that we add later
-        if "content" in block_data and isinstance(block_data["content"], dict):
-            content = block_data["content"]
+        if "content_type" in content_data:
+            del content_data["content_type"]  # Redundant with our standard Fields.block_type field.
+        if "content" in content_data and isinstance(content_data["content"], dict):
+            content = content_data["content"]
             if "display_name" in content:
                 del content["display_name"]
-            del block_data["content"]
-            block_data.update(content)
+            del content_data["content"]
+            content_data.update(content)
         # Now we have something like:
         # { 'capa_content': '...', 'problem_types': ['multiplechoiceresponse'] }
+        block_data[Fields.content] = content_data
     except Exception as err:  # pylint: disable=broad-except
         log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
-        block_data = {}
-    block_data.update({
-        Fields.id: _meili_id_from_opaque_key(block.usage_key),
-        Fields.usage_key: str(block.usage_key),
-        Fields.block_id: str(block.usage_key.block_id),
-        Fields.display_name: block.display_name,  # TODO: there is some function to get the fallback display_name
-        Fields.block_type: block.scope_ids.block_type,
-        # This is called context_key so it's the same for courses and libraries
-        Fields.context_key: str(block.usage_key.context_key),  # same as lib_key
-        Fields.org: str(block.usage_key.context_key.org),
-    })
     return block_data
 
 
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index e672fcc60203..623f01bc05cc 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -67,6 +67,14 @@ def handle(self, *args, **options):
                 Fields.tags,
                 Fields.type,
             ])
+            # Mark which attributes are used for keyword search, in order of importance:
+            client.index(temp_index_name).update_searchable_attributes([
+                Fields.display_name,
+                Fields.block_id,
+                Fields.content,
+                Fields.tags,
+                # Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
+            ])
 
             ############## Libraries ##############
             self.stdout.write("Indexing libraries...")

From 7c5cd7fa40823347320e2c1c39f0c389620f8d0a Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Fri, 15 Mar 2024 11:35:43 -0700
Subject: [PATCH 13/28] chore: fix lint issue and comment

---
 openedx/core/djangoapps/content/search/documents.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index b2ec0907fa4c..fc608688d9f5 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -118,9 +118,9 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
         {
             "tags": {
                 "taxonomy": ["Location", "Difficulty"],
-                "level0": ["Location\tNorth America", "Difficulty\tHard"],
-                "level1": ["Location\tNorth America\tCanada"],
-                "level2": ["Location\tNorth America\tCanada\tVancouver"],
+                "level0": ["Location > North America", "Difficulty > Hard"],
+                "level1": ["Location > North America > Canada"],
+                "level2": ["Location > North America > Canada > Vancouver"],
             }
         }
     """
@@ -142,7 +142,7 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
         # Taxonomy name plus each level of tags, in a list:
         parts = [obj_tag.name] + obj_tag.get_lineage()  # e.g. ["Location", "North America", "Canada", "Vancouver"]
         parts = [part.replace(" > ", " _ ") for part in parts]  # Escape our separator
-        for level in range(0, 4):
+        for level in range(4):
             new_value = " > ".join(parts[0:level + 2])
             if f"level{level}" not in result:
                 result[f"level{level}"] = [new_value]

From 4c66d068a180e2724e5e9757624e6d6b96d22c32 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Fri, 15 Mar 2024 12:13:43 -0700
Subject: [PATCH 14/28] feat: add breadcrumbs to index

---
 .../djangoapps/content/search/documents.py    | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index fc608688d9f5..8e69ee08a763 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -22,12 +22,16 @@ class Fields:
     # Meilisearch primary key. String.
     id = "id"
     usage_key = "usage_key"
-    block_id = "block_id"
+    type = "type"  # DocType.course_block or DocType.library_block (see below)
+    block_id = "block_id"  # The block_id part of the usage key. Sometimes human-readable, sometimes a random hex ID
     display_name = "display_name"
     block_type = "block_type"
     context_key = "context_key"
     org = "org"
-    type = "type"  # DocType.course_block or DocType.library_block (see below)
+    # breadcrumbs: an array of {"display_name": "..."} entries. First one is the name of the course/library itself.
+    # After that is the name of any parent Section/Subsection/Unit/etc.
+    # It's a list of dictionaries because for now we just include the name of each but in future we may add their IDs.
+    breadcrumbs = "breadcrumbs"
     # tags (dictionary)
     # See https://blog.meilisearch.com/nested-hierarchical-facets-guide/
     # and https://www.algolia.com/doc/api-reference/widgets/hierarchical-menu/js/
@@ -81,7 +85,18 @@ class implementation returns only:
         # This is called context_key so it's the same for courses and libraries
         Fields.context_key: str(block.usage_key.context_key),  # same as lib_key
         Fields.org: str(block.usage_key.context_key.org),
+        Fields.breadcrumbs: []
     }
+    # Get the breadcrumbs (course, section, subsection, etc.):
+    if block.usage_key.context_key.is_course:  # Getting parent is not yet implemented in Learning Core (for libraries).
+        cur_block = block
+        while cur_block.parent:
+            if not cur_block.has_cached_parent:
+                # This is not a big deal, but if you're updating many blocks in the same course at once,
+                # this would be very inefficient. Better to recurse the tree top-down with the parent blocks loaded.
+                log.warning(f"Updating Studio search index for XBlock {block.usage_key} but ancestors weren't cached.")
+            cur_block = cur_block.get_parent()
+            block_data[Fields.breadcrumbs].insert(0, {"display_name": cur_block.display_name})
     try:
         content_data = block.index_dictionary()
         # Will be something like:
@@ -160,6 +175,7 @@ def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) ->
     like Meilisearch or Elasticsearch, so that the given library block can be
     found using faceted search.
     """
+    library_name = lib_api.get_library(metadata.usage_key.context_key).title
     doc = {}
     try:
         block = xblock_api.load_block(metadata.usage_key, user=None)
@@ -179,6 +195,8 @@ def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) ->
         doc.update(_fields_from_block(block))
     doc.update(_tags_for_content_object(metadata.usage_key))
     doc[Fields.type] = DocType.library_block
+    # Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here:
+    doc[Fields.breadcrumbs] = [{"display_name": library_name}]
     return doc
 
 

From 67cf0ccbc6a3670e993946f19a5d77912100d88f Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Fri, 15 Mar 2024 12:13:55 -0700
Subject: [PATCH 15/28] feat: use fallback display_name if needed

---
 openedx/core/djangoapps/content/search/documents.py | 2 +-
 openedx/core/djangoapps/xblock/api.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index 8e69ee08a763..aee51a53e65e 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -80,7 +80,7 @@ class implementation returns only:
         Fields.id: _meili_id_from_opaque_key(block.usage_key),
         Fields.usage_key: str(block.usage_key),
         Fields.block_id: str(block.usage_key.block_id),
-        Fields.display_name: block.display_name,  # TODO: there is some function to get the fallback display_name
+        Fields.display_name: xblock_api.get_block_display_name(block),
         Fields.block_type: block.scope_ids.block_type,
         # This is called context_key so it's the same for courses and libraries
         Fields.context_key: str(block.usage_key.context_key),  # same as lib_key
diff --git a/openedx/core/djangoapps/xblock/api.py b/openedx/core/djangoapps/xblock/api.py
index 98ba9e222dd0..f54464de9f4e 100644
--- a/openedx/core/djangoapps/xblock/api.py
+++ b/openedx/core/djangoapps/xblock/api.py
@@ -180,7 +180,7 @@ def get_block_display_name(block: XBlock) -> str:
     if display_name is not None:
         return display_name
     else:
-        return xblock_type_display_name(block.block_type)
+        return xblock_type_display_name(block.scope_ids.block_type)
 
 
 def get_component_from_usage_key(usage_key: UsageKeyV2) -> Component:

From c53963ed470ac94a60951ba0576831fe3d7d8c37 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Fri, 15 Mar 2024 12:14:07 -0700
Subject: [PATCH 16/28] feat: state how long the reindex_studio command took

---
 .../content/search/management/commands/reindex_studio.py  | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index 623f01bc05cc..462c0a732e5f 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -6,6 +6,7 @@
 indexes LMS (published) courses in ElasticSearch.
 """
 import logging
+import time
 
 from django.conf import settings
 from django.core.management import BaseCommand
@@ -34,6 +35,7 @@ def handle(self, *args, **options):
         """
         Build a new search index for Studio, containing content from courses and libraries
         """
+        start_time = time.perf_counter()
         client = self.get_meilisearch_client()
         store = modulestore()
 
@@ -111,7 +113,11 @@ def add_with_children(block):
                 num_contexts_done += 1
                 num_blocks_done += len(docs)
 
-        self.stdout.write(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses and libraries.")
+        elapsed_time = time.perf_counter() - start_time
+        self.stdout.write(
+            f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses "
+            f"and libraries in {elapsed_time:.0f}s."
+        )
 
     def recurse_children(self, block, fn):
         """

From 3884fa2fe89bff4f9012fe2f552e051a706e338f Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Sun, 17 Mar 2024 18:57:31 -0700
Subject: [PATCH 17/28] test: add tests for the Studio search REST API

---
 .../content/search/tests/__init__.py          |  0
 .../content/search/tests/test_views.py        | 81 +++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 openedx/core/djangoapps/content/search/tests/__init__.py
 create mode 100644 openedx/core/djangoapps/content/search/tests/test_views.py

diff --git a/openedx/core/djangoapps/content/search/tests/__init__.py b/openedx/core/djangoapps/content/search/tests/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/openedx/core/djangoapps/content/search/tests/test_views.py b/openedx/core/djangoapps/content/search/tests/test_views.py
new file mode 100644
index 000000000000..825d1e2924d5
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/tests/test_views.py
@@ -0,0 +1,81 @@
+"""
+Tests for the Studio content search REST API.
+"""
+from django.test import override_settings
+from rest_framework.test import APITestCase, APIClient
+from unittest import mock
+
+from common.djangoapps.student.tests.factories import UserFactory
+from openedx.core.djangolib.testing.utils import CacheIsolationTestCase, skip_unless_cms
+
+STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/"
+
+
+@skip_unless_cms
+class StudioSearchViewTest(CacheIsolationTestCase, APITestCase):
+    """
+    General tests for the Studio search REST API.
+    """
+
+    @classmethod
+    def setUpTestData(cls):  # lint-amnesty, pylint: disable=super-method-not-called
+        cls.staff = UserFactory.create(
+            username='staff', email='staff@example.com', is_staff=True, password='staff_pass'
+        )
+        cls.student = UserFactory.create(
+            username='student', email='student@example.com', is_staff=False, password='student_pass'
+        )
+
+    def setUp(self):
+        super().setUp()
+        self.client = APIClient()
+
+    @override_settings(MEILISEARCH_ENABLED=False)
+    def test_studio_search_unathenticated_disabled(self):
+        """
+        Whether or not Meilisearch is enabled, the API endpoint requires authentication.
+        """
+        result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL)
+        assert result.status_code == 401
+
+    @override_settings(MEILISEARCH_ENABLED=True)
+    def test_studio_search_unathenticated_enabled(self):
+        """
+        Whether or not Meilisearch is enabled, the API endpoint requires authentication.
+        """
+        result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL)
+        assert result.status_code == 401
+
+    @override_settings(MEILISEARCH_ENABLED=False)
+    def test_studio_search_disabled(self):
+        """
+        When Meilisearch is disabled, the Studio search endpoint gives a 404
+        """
+        self.client.login(username='student', password='student_pass')
+        result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL)
+        assert result.status_code == 404
+    
+    @override_settings(MEILISEARCH_ENABLED=True)
+    def test_studio_search_student_forbidden(self):
+        """
+        Until we implement fine-grained permissions, only global staff can use
+        the Studio search endpoint.
+        """
+        self.client.login(username='student', password='student_pass')
+        result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL)
+        assert result.status_code == 403
+
+    @override_settings(MEILISEARCH_ENABLED=True)
+    @mock.patch('openedx.core.djangoapps.content.search.views._get_meili_api_key_uid')
+    def test_studio_search_staff(self, mock_get_api_key_uid):
+        """
+        Global staff can get a restricted API key for Meilisearch using the REST
+        API.
+        """
+        self.client.login(username='staff', password='staff_pass')
+        mock_get_api_key_uid.return_value = "3203d764-370f-4e99-a917-d47ab7f29739"
+        result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL)
+        assert result.status_code == 200
+        assert result.data["index_name"] == "studio_content"
+        assert result.data["url"].startswith("http")
+        assert result.data["api_key"] and isinstance(result.data["api_key"], str)

From 6be622e3a8563070e553bbfe61c5adf01a35176f Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Sun, 17 Mar 2024 19:47:37 -0700
Subject: [PATCH 18/28] docs: added ADR

---
 .../docs/decisions/0001-meilisearch.rst       | 151 ++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100644 openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst

diff --git a/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst b/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst
new file mode 100644
index 000000000000..1f355409da0e
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/docs/decisions/0001-meilisearch.rst
@@ -0,0 +1,151 @@
+Studio Content Search Powered by Meilisearch
+############################################
+
+Status
+******
+
+Draft
+
+
+Context
+*******
+
+Existing search functionality
+=============================
+
+The Open edX platform currently implements many different forms of search. For
+example, users can search for course content, library content, forum posts, and
+more. Most of the search functionality in the core platform is powered by the
+Elasticsearch search engine (though other functionality developed by 2U, such as
+in edx-enterprise, is powered by Algolia).
+
+Most uses of Elasticsearch in Open edX use
+`edx-search <https://github.com/openedx/edx-search>`_ which provides a partial
+abstraction over Elasticsearch. The edx-search library formerly used
+`django-haystack <https://django-haystack.readthedocs.io/>`_ as an abstraction
+layer across search engines, but "that was ripped out after the package was
+abandoned upstream and it became an obstacle to upgrades and efficiently
+utilizing Elasticsearch (the abstraction layer imposed significant limits)"
+(thanks to Jeremy Bowman for this context). Due to these changes, the current
+edx-search API is a mix of abstractions and direct usage of the Elasticsearch
+API, which makes it confusing and difficult to work with. In addition, each
+usage of edx-search has been implemented fairly differently. See
+`State of edx-search <https://openedx.atlassian.net/wiki/spaces/AC/pages/3884744738/State+of+edx-search+2023>`_
+for details (thanks to Andy Shultz).
+
+Other platform components use Elasticsearch more directly:
+
+* ``course-discovery`` and ``edx-notes-api`` do not use ``edx-search``, but are
+  very tied to Elasticsearch via the use of ``django-elasticsearch-dsl`` and
+  ``django-elasticsearch-drf``.
+* ``cs_comments_service`` uses Elasticsearch via the official ruby gems.
+
+Problems with Elasticsearch
+===========================
+
+At the same time, there are many problems with the current reliance on
+Elasticsearch:
+
+1. In 2021, the license of Elasticsearch changed from Apache 2.0 to a more
+   restrictive license that prohibits providing "the products to others as a
+   managed service". Consequently, AWS forked the search engine to create
+   OpenSearch and no longer offers Elasticsearch as a service. This is
+   problematic for many Open edX operators that use AWS and prefer to avoid
+   any third-party services.
+2. Elasticsearch is very resource-intensive and often uses more than a gigabyte
+   of memory just for small search use cases.
+3. Elasticsearch has poor support for multi-tenancy, which multiplies the
+   problem of resource usage for organizations with many small Open edX sites.
+4. The existing usage of edx-search/Elasticsearch routes all search requests and
+   result processing through edxapp (the LMS) or other IDAs, increasing the
+   load on those applications.
+
+Need for Studio Search
+======================
+
+At the time of this ADR, we have a goal to implement new search functionality in
+Studio, to support various course authoring workflows.
+
+Meilisearch
+===========
+
+Meilisearch ("MAY-lee search") is a new, promising search engine that offers a
+compelling alternative to Elasticsearch. It is open source, feature rich, and
+very fast and memory efficient (written in Rust, uses orders of magnitude less
+memory than Elasticsearch for small datasets). It has a simple API with an
+official python driver, and has official integrations with the popular
+Instantsearch frontend library from Algolia. It has strong support for
+multi-tenancy, and allows creating restricted API keys that incorporate a user's
+permissions, so that search requests can be made directly from the user to
+Meilisearch, rather than routing them through Django. Initial testing has shown
+it to be much more developer friendly than Elasticsearch/OpenSearch.
+
+At the time of writing, there are only two known concerns with Meilisearch:
+
+1. It doesn't (yet) support High Availability via replication, although this is
+   planned and under development. It does have other features to support high
+   availability, such as very low restart time (in ms).
+2. It doesn't support boolean operators in keyword search ("red AND panda"),
+   though it does of course support boolean operators in filters. This is a
+   product decision aimed at keeping the user experience simple, and is unlikely
+   to change.
+
+
+Decision
+********
+
+1. We will implement the new Studio search functionality using Meilisearch,
+   as an experiment and to evaluate it more thoroughly.
+2. The Studio search functionality will be disabled by default in the next
+   Open edX release (Redwood), so that Meilisearch will not be a requirement
+   for any default nor existing features. This will also allow us to evaluate it
+   before deciding to embrace it or replace it.
+3. We will keep the Meilisearch-specific code isolated to the
+   new ``content/search`` Django app, so it's relatively easy to swap out later
+   if this experiment doesn't pan out.
+4. We will not use ``edx-search`` for the new search functionality.
+
+
+Consequences
+************
+
+1. Organizations that wish to try out the new Studio Search functionality in
+   the Redwood release will have to install and configure Meilisearch.
+2. Building both the backend and frontend components of the Studio search
+   project will be much faster and simpler than if we used ElasticSearch,
+   edx-search, OpenSearch, django-haystack, etc.
+3. Keyword search with boolean operators will not be supported in any of the new
+   search features.
+
+
+Alternatives Considered
+***********************
+
+OpenSearch Only
+===============
+
+Moving existing search functionality to OpenSearch is a possibility, but though
+it mostly addresses the licensing issue, it doesn't solve the problems of
+resource usage, API complexity, edx-search API complexity, lack of Instantsearch
+integration, and poor multi-tenancy.
+
+OpenSearch and Elasticsearch
+============================
+
+When OpenSearch was originally forked from Elasticsearch, it was completely API
+compatible, but over time they have developed along divergent paths. Regardless
+of whether ElasticSearch and OpenSearch are actually wire-compatible, recent
+versions of all the official ElasticSearch clients have been made to actively
+reject connections to OpenSearch, which is why you generally won't find client
+libraries that work with both engines, and why there are OpenSearch forks of
+everything on the client side as well as the server side.
+
+As there is no ready-to-use abstraction layer that would allow us to comfortably
+support both, and no interest in maintaining one ourselves, this is not an
+appealing option.
+
+Algolia
+=======
+
+Algolia is a great search engine service, but as it is a proprietary product, it
+is not suitable as a requirement for an open source platform like Open edX.

From 9b77442587ac23bbbdcc37f69a54288770f04bbc Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Mon, 18 Mar 2024 09:40:05 -0700
Subject: [PATCH 19/28] chore: fix quality issue

---
 openedx/core/djangoapps/content/search/tests/test_views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openedx/core/djangoapps/content/search/tests/test_views.py b/openedx/core/djangoapps/content/search/tests/test_views.py
index 825d1e2924d5..198971609e97 100644
--- a/openedx/core/djangoapps/content/search/tests/test_views.py
+++ b/openedx/core/djangoapps/content/search/tests/test_views.py
@@ -54,7 +54,7 @@ def test_studio_search_disabled(self):
         self.client.login(username='student', password='student_pass')
         result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL)
         assert result.status_code == 404
-    
+
     @override_settings(MEILISEARCH_ENABLED=True)
     def test_studio_search_student_forbidden(self):
         """

From 91d0358f7b420f523055e072e9c634ccdd082386 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <mail@bradenm.com>
Date: Mon, 18 Mar 2024 14:17:25 -0700
Subject: [PATCH 20/28] fix: handle case where there are no library/course
 blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Rômulo Penido <romulo@opencraft.com>
---
 .../content/search/management/commands/reindex_studio.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index 462c0a732e5f..34f182b211a3 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -88,7 +88,9 @@ def handle(self, *args, **options):
                     doc = searchable_doc_for_library_block(metadata)
                     docs.append(doc)
                     num_blocks_done += 1
-                # Add all the docs in this library at once (usually faster than adding one at a time):
+                if docs:
+                    # Add all the docs in this library at once (usually faster than adding one at a time):
+                    self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
                 self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
                 num_contexts_done += 1
 
@@ -108,8 +110,9 @@ def add_with_children(block):
 
                 self.recurse_children(course, add_with_children)
 
-                # Add all the docs in this course at once (usually faster than adding one at a time):
-                self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
+                if docs:
+                    # Add all the docs in this course at once (usually faster than adding one at a time):
+                    self.wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
                 num_contexts_done += 1
                 num_blocks_done += len(docs)
 

From 0af6939608941235c44ef5e481b3f47f4d322400 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Mon, 18 Mar 2024 15:01:17 -0700
Subject: [PATCH 21/28] fix: hash used for ID wasn't stable

---
 openedx/core/djangoapps/content/search/documents.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index aee51a53e65e..3e8f42848cfd 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -1,7 +1,8 @@
 """
-Utilities related to searching content libraries
+Utilities related to indexing content for search
 """
 from __future__ import annotations
+import hashlib
 import logging
 
 from django.utils.text import slugify
@@ -64,7 +65,13 @@ def _meili_id_from_opaque_key(usage_key: UsageKey) -> str:
     requirement, we transform them to a similar slug ID string that does.
     """
     # The slugified key _may_ not be unique so we append a hashed number too
-    return slugify(str(usage_key)) + "-" + str(hash(str(usage_key)) % 1_000)
+    key_bin = str(usage_key).encode()
+    try:
+        suffix = hashlib.sha1(key_bin, usedforsecurity=False).hexdigest()[:7]
+    except TypeError:
+        # Remove this when we don't need to support Python 3.8 and older
+        suffix = hashlib.sha1(key_bin).hexdigest()[:7]
+    return slugify(str(usage_key)) + "-" + suffix
 
 
 def _fields_from_block(block) -> dict:

From 106cfd071c874b2d08efaa6d9533f0cb2cd8500a Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Mon, 18 Mar 2024 15:01:28 -0700
Subject: [PATCH 22/28] fix: breadcrumbs weren't using fallback display_names

---
 openedx/core/djangoapps/content/search/documents.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index 3e8f42848cfd..7c299e05ea2d 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -103,7 +103,7 @@ class implementation returns only:
                 # this would be very inefficient. Better to recurse the tree top-down with the parent blocks loaded.
                 log.warning(f"Updating Studio search index for XBlock {block.usage_key} but ancestors weren't cached.")
             cur_block = cur_block.get_parent()
-            block_data[Fields.breadcrumbs].insert(0, {"display_name": cur_block.display_name})
+            block_data[Fields.breadcrumbs].insert(0, {"display_name": xblock_api.get_block_display_name(cur_block)})
     try:
         content_data = block.index_dictionary()
         # Will be something like:

From 1393d4139f4ee18692cbf242df8b32d1a303909c Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Mon, 18 Mar 2024 15:01:39 -0700
Subject: [PATCH 23/28] test: expand tests

---
 .../content/search/tests/test_documents.py    | 66 +++++++++++++++++++
 .../content/search/tests/test_views.py        |  7 +-
 2 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 openedx/core/djangoapps/content/search/tests/test_documents.py

diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py
new file mode 100644
index 000000000000..3f31111f7280
--- /dev/null
+++ b/openedx/core/djangoapps/content/search/tests/test_documents.py
@@ -0,0 +1,66 @@
+"""
+Tests for the Studio content search documents (what gets stored in the index)
+"""
+from openedx.core.djangolib.testing.utils import skip_unless_cms
+from xmodule.modulestore.django import modulestore
+from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
+from xmodule.modulestore.tests.factories import BlockFactory, ToyCourseFactory
+
+from ..documents import searchable_doc_for_course_block
+
+STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/"
+
+
+@skip_unless_cms
+class StudioDocumentsTest(SharedModuleStoreTestCase):
+    """
+    Tests for the Studio content search documents (what gets stored in the
+    search index)
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.store = modulestore()
+        cls.toy_course = ToyCourseFactory.create()  # See xmodule/modulestore/tests/sample_courses.py
+        cls.toy_course_key = cls.toy_course.id
+        # Create a problem in library
+        cls.problem_block = BlockFactory.create(
+            category="problem",
+            parent_location=cls.toy_course_key.make_usage_key("vertical", "vertical_test"),
+            display_name='Test Problem',
+            data="<problem>What is a test?<multiplechoiceresponse></multiplechoiceresponse></problem>",
+        )
+
+    def setUp(self):
+        super().setUp()
+
+    def test_problem_block(self):
+        """
+        Test how a problem block gets represented in the search index
+        """
+        # block_usage_key = self.toy_course_key.make_usage_key("problem", "test_problem")
+        block = self.store.get_item(self.problem_block.usage_key)
+        doc = searchable_doc_for_course_block(block)
+        assert doc == {
+            # Note the 'id' has been stripped of special characters to meet Meilisearch requirements.
+            # The '-8516ed8' suffix is deterministic based on the original usage key.
+            "id": "block-v1edxtoy2012_falltypeproblemblocktest_problem-8516ed8",
+            "type": "course_block",
+            "block_type": "problem",
+            "usage_key": "block-v1:edX+toy+2012_Fall+type@problem+block@Test_Problem",
+            "block_id": "Test_Problem",
+            "context_key": "course-v1:edX+toy+2012_Fall",
+            "org": "edX",
+            "display_name": "Test Problem",
+            "breadcrumbs": [
+                {"display_name": "Toy Course"},
+                {"display_name": "chapter"},
+                {"display_name": "sequential"},
+                {"display_name": "vertical"},
+            ],
+            "content": {
+                "capa_content": "What is a test?",
+                "problem_types": ["multiplechoiceresponse"],
+            },
+        }
diff --git a/openedx/core/djangoapps/content/search/tests/test_views.py b/openedx/core/djangoapps/content/search/tests/test_views.py
index 198971609e97..e117cf1b969a 100644
--- a/openedx/core/djangoapps/content/search/tests/test_views.py
+++ b/openedx/core/djangoapps/content/search/tests/test_views.py
@@ -6,19 +6,20 @@
 from unittest import mock
 
 from common.djangoapps.student.tests.factories import UserFactory
-from openedx.core.djangolib.testing.utils import CacheIsolationTestCase, skip_unless_cms
+from openedx.core.djangolib.testing.utils import skip_unless_cms
 
 STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/"
 
 
 @skip_unless_cms
-class StudioSearchViewTest(CacheIsolationTestCase, APITestCase):
+class StudioSearchViewTest(APITestCase):
     """
     General tests for the Studio search REST API.
     """
 
     @classmethod
-    def setUpTestData(cls):  # lint-amnesty, pylint: disable=super-method-not-called
+    def setUpClass(cls):
+        super().setUpClass()
         cls.staff = UserFactory.create(
             username='staff', email='staff@example.com', is_staff=True, password='staff_pass'
         )

From 5a1429e713ddb61b3fabd3e1503b1aafdbc0a9e2 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Mon, 18 Mar 2024 18:34:16 -0700
Subject: [PATCH 24/28] fix: lint issues

---
 openedx/core/djangoapps/content/search/documents.py       | 8 ++------
 .../djangoapps/content/search/tests/test_documents.py     | 3 ---
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index 7c299e05ea2d..68eebcfa0d76 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -64,13 +64,9 @@ def _meili_id_from_opaque_key(usage_key: UsageKey) -> str:
     hyphens (-) and underscores (_). Since our opaque keys don't meet this
     requirement, we transform them to a similar slug ID string that does.
     """
-    # The slugified key _may_ not be unique so we append a hashed number too
+    # The slugified key _may_ not be unique so we append a hashed string to make it unique:
     key_bin = str(usage_key).encode()
-    try:
-        suffix = hashlib.sha1(key_bin, usedforsecurity=False).hexdigest()[:7]
-    except TypeError:
-        # Remove this when we don't need to support Python 3.8 and older
-        suffix = hashlib.sha1(key_bin).hexdigest()[:7]
+    suffix = hashlib.sha1(key_bin).hexdigest()[:7]  # When we use Python 3.9+, should add usedforsecurity=False here.
     return slugify(str(usage_key)) + "-" + suffix
 
 
diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py
index 3f31111f7280..9394ca254bb2 100644
--- a/openedx/core/djangoapps/content/search/tests/test_documents.py
+++ b/openedx/core/djangoapps/content/search/tests/test_documents.py
@@ -32,9 +32,6 @@ def setUpClass(cls):
             data="<problem>What is a test?<multiplechoiceresponse></multiplechoiceresponse></problem>",
         )
 
-    def setUp(self):
-        super().setUp()
-
     def test_problem_block(self):
         """
         Test how a problem block gets represented in the search index

From 69879e090c78ddcd9f842d90d0e45bd255264fc3 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Mon, 18 Mar 2024 19:21:59 -0700
Subject: [PATCH 25/28] test: expand tests

---
 .../content/search/tests/test_documents.py    | 88 ++++++++++++++++++-
 .../core/djangoapps/content_tagging/api.py    |  3 +
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py
index 9394ca254bb2..78ed68039c51 100644
--- a/openedx/core/djangoapps/content/search/tests/test_documents.py
+++ b/openedx/core/djangoapps/content/search/tests/test_documents.py
@@ -1,6 +1,8 @@
 """
 Tests for the Studio content search documents (what gets stored in the index)
 """
+from organizations.models import Organization
+from openedx.core.djangoapps.content_tagging import api as tagging_api
 from openedx.core.djangolib.testing.utils import skip_unless_cms
 from xmodule.modulestore.django import modulestore
 from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
@@ -24,6 +26,8 @@ def setUpClass(cls):
         cls.store = modulestore()
         cls.toy_course = ToyCourseFactory.create()  # See xmodule/modulestore/tests/sample_courses.py
         cls.toy_course_key = cls.toy_course.id
+        # Get references to some blocks in the toy course
+        cls.html_block_key = cls.toy_course_key.make_usage_key("html", "toyjumpto")
         # Create a problem in library
         cls.problem_block = BlockFactory.create(
             category="problem",
@@ -32,11 +36,29 @@ def setUpClass(cls):
             data="<problem>What is a test?<multiplechoiceresponse></multiplechoiceresponse></problem>",
         )
 
+        # Create a couple taxonomies and some tags
+        cls.org = Organization.objects.create(name="edX", short_name="edX")
+        cls.difficulty_tags = tagging_api.create_taxonomy(name="Difficulty", orgs=[cls.org], allow_multiple=False)
+        tagging_api.add_tag_to_taxonomy(cls.difficulty_tags, tag="Easy")
+        tagging_api.add_tag_to_taxonomy(cls.difficulty_tags, tag="Normal")
+        tagging_api.add_tag_to_taxonomy(cls.difficulty_tags, tag="Difficult")
+
+        cls.subject_tags = tagging_api.create_taxonomy(name="Subject", orgs=[cls.org], allow_multiple=True)
+        tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Linguistics")
+        tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Asian Languages", parent_tag_value="Linguistics")
+        tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Chinese", parent_tag_value="Asian Languages")
+        tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Hypertext")
+        tagging_api.add_tag_to_taxonomy(cls.subject_tags, tag="Jump Links", parent_tag_value="Hypertext")
+
+        # Tag stuff:
+        tagging_api.tag_object(cls.problem_block.usage_key, cls.difficulty_tags, tags=["Easy"])
+        tagging_api.tag_object(cls.html_block_key, cls.subject_tags, tags=["Chinese", "Jump Links"])
+        tagging_api.tag_object(cls.html_block_key, cls.difficulty_tags, tags=["Normal"])
+
     def test_problem_block(self):
         """
         Test how a problem block gets represented in the search index
         """
-        # block_usage_key = self.toy_course_key.make_usage_key("problem", "test_problem")
         block = self.store.get_item(self.problem_block.usage_key)
         doc = searchable_doc_for_course_block(block)
         assert doc == {
@@ -60,4 +82,68 @@ def test_problem_block(self):
                 "capa_content": "What is a test?",
                 "problem_types": ["multiplechoiceresponse"],
             },
+            # See https://blog.meilisearch.com/nested-hierarchical-facets-guide/
+            # and https://www.algolia.com/doc/api-reference/widgets/hierarchical-menu/js/
+            # For details on why the hierarchical tag data is in this format.
+            "tags": {
+                "taxonomy": ["Difficulty"],
+                "level0": ["Difficulty > Easy"],
+            },
+        }
+
+    def test_html_block(self):
+        """
+        Test how an HTML block gets represented in the search index
+        """
+        block = self.store.get_item(self.html_block_key)
+        doc = searchable_doc_for_course_block(block)
+        assert doc == {
+            "id": "block-v1edxtoy2012_falltypehtmlblocktoyjumpto-b0b4a10",
+            "type": "course_block",
+            "block_type": "html",
+            "usage_key": "block-v1:edX+toy+2012_Fall+type@html+block@toyjumpto",
+            "block_id": "toyjumpto",
+            "context_key": "course-v1:edX+toy+2012_Fall",
+            "org": "edX",
+            "display_name": "Text",
+            "breadcrumbs": [
+                {"display_name": "Toy Course"},
+                {"display_name": "Overview"},
+                {"display_name": "Toy Videos"},
+            ],
+            "content": {
+                "html_content": (
+                    "This is a link to another page and some Chinese 四節比分和七年前 Some more Chinese 四節比分和七年前 "
+                ),
+            },
+            "tags": {
+                "taxonomy": ["Difficulty", "Subject"],
+                "level0": ["Difficulty > Normal", "Subject > Hypertext", "Subject > Linguistics"],
+                "level1": ["Subject > Hypertext > Jump Links", "Subject > Linguistics > Asian Languages"],
+                "level2": ["Subject > Linguistics > Asian Languages > Chinese"],
+            },
+        }
+
+    def test_video_block_untagged(self):
+        """
+        Test how a video block gets represented in the search index.
+        """
+        block_usage_key = self.toy_course_key.make_usage_key("video", "Welcome")
+        block = self.store.get_item(block_usage_key)
+        doc = searchable_doc_for_course_block(block)
+        assert doc == {
+            "id": "block-v1edxtoy2012_falltypevideoblockwelcome-b47fb14",
+            "type": "course_block",
+            "block_type": "video",
+            "usage_key": "block-v1:edX+toy+2012_Fall+type@video+block@Welcome",
+            "block_id": "Welcome",
+            "context_key": "course-v1:edX+toy+2012_Fall",
+            "org": "edX",
+            "display_name": "Welcome",
+            "breadcrumbs": [
+                {"display_name": "Toy Course"},
+                {"display_name": "Overview"},
+            ],
+            "content": {},
+            # This video has no tags.
         }
diff --git a/openedx/core/djangoapps/content_tagging/api.py b/openedx/core/djangoapps/content_tagging/api.py
index c167eba3a629..e50d96fe2ce2 100644
--- a/openedx/core/djangoapps/content_tagging/api.py
+++ b/openedx/core/djangoapps/content_tagging/api.py
@@ -200,6 +200,9 @@ def set_object_tags(
 
 # Expose the oel_tagging APIs
 
+add_tag_to_taxonomy = oel_tagging.add_tag_to_taxonomy
+update_tag_in_taxonomy = oel_tagging.update_tag_in_taxonomy
+delete_tags_from_taxonomy = oel_tagging.delete_tags_from_taxonomy
 get_taxonomy = oel_tagging.get_taxonomy
 get_taxonomies = oel_tagging.get_taxonomies
 get_tags = oel_tagging.get_tags

From 1b5dfdf3f89058e542fdb6de8be1cd8bbe78b074 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <mail@bradenm.com>
Date: Tue, 19 Mar 2024 10:58:09 -0700
Subject: [PATCH 26/28] docs: fix typo pointed out in review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Rômulo Penido <romulo@opencraft.com>
---
 openedx/core/djangoapps/content/search/documents.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index 68eebcfa0d76..e149854dde57 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -206,7 +206,7 @@ def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) ->
 def searchable_doc_for_course_block(block) -> dict:
     """
     Generate a dictionary document suitable for ingestion into a search engine
-    like Meilisearch or Elasticsearch, so that the given library block can be
+    like Meilisearch or Elasticsearch, so that the given course block can be
     found using faceted search.
     """
     doc = _fields_from_block(block)

From ec0c4a875d7bc4766ff02c50a5e36d8ec6cd3237 Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Thu, 21 Mar 2024 11:17:56 -0700
Subject: [PATCH 27/28] docs: clarify code with comments, indicate command is
 experimental

---
 cms/envs/common.py                            |  2 +-
 .../djangoapps/content/search/documents.py    | 49 ++++++++++++-------
 .../management/commands/reindex_studio.py     | 20 +++++++-
 .../content/search/tests/test_documents.py    |  6 +--
 4 files changed, 53 insertions(+), 24 deletions(-)

diff --git a/cms/envs/common.py b/cms/envs/common.py
index 1459d3bda4a8..e3e68df36d3c 100644
--- a/cms/envs/common.py
+++ b/cms/envs/common.py
@@ -1777,7 +1777,7 @@
     'openedx.core.djangoapps.content_tagging',
 
     # Search
-    'openedx.core.djangoapps.content.search.apps.ContentSearchConfig',
+    'openedx.core.djangoapps.content.search',
 
     'openedx.features.course_duration_limits',
     'openedx.features.content_type_gating',
diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
index e149854dde57..fc1388f82455 100644
--- a/openedx/core/djangoapps/content/search/documents.py
+++ b/openedx/core/djangoapps/content/search/documents.py
@@ -2,7 +2,7 @@
 Utilities related to indexing content for search
 """
 from __future__ import annotations
-import hashlib
+from hashlib import blake2b
 import logging
 
 from django.utils.text import slugify
@@ -37,9 +37,10 @@ class Fields:
     # See https://blog.meilisearch.com/nested-hierarchical-facets-guide/
     # and https://www.algolia.com/doc/api-reference/widgets/hierarchical-menu/js/
     # For details on the format of the hierarchical tag data.
+    # We currently have a hard-coded limit of 4 levels of tags in the search index (level0..level3).
     tags = "tags"
-    tags_taxonomy = "taxonomy"
-    tags_level0 = "level0"
+    tags_taxonomy = "taxonomy"  # subfield of tags, i.e. tags.taxonomy
+    tags_level0 = "level0"  # subfield of tags, i.e. tags.level0
     tags_level1 = "level1"
     tags_level2 = "level2"
     tags_level3 = "level3"
@@ -48,6 +49,10 @@ class Fields:
     # Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on.
     content = "content"
 
+    # Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword
+    # search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio'
+    # command (changing those settings on an large active index is not recommended).
+
 
 class DocType:
     """
@@ -63,10 +68,13 @@ def _meili_id_from_opaque_key(usage_key: UsageKey) -> str:
     integer or a string composed of alphanumeric characters (a-z A-Z 0-9),
     hyphens (-) and underscores (_). Since our opaque keys don't meet this
     requirement, we transform them to a similar slug ID string that does.
+
+    In the future, with Learning Core's data models in place for courseware,
+    we could use PublishableEntity's primary key / UUID instead.
     """
     # The slugified key _may_ not be unique so we append a hashed string to make it unique:
     key_bin = str(usage_key).encode()
-    suffix = hashlib.sha1(key_bin).hexdigest()[:7]  # When we use Python 3.9+, should add usedforsecurity=False here.
+    suffix = blake2b(key_bin, digest_size=4).hexdigest()  # When we use Python 3.9+, should add usedforsecurity=False
     return slugify(str(usage_key)) + "-" + suffix
 
 
@@ -141,6 +149,13 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
                 "level2": ["Location > North America > Canada > Vancouver"],
             }
         }
+
+    Note: despite what you might expect, because this is only used for the
+    filtering/refinement UI, it's fine if this is a one-way transformation.
+    It's not necessary to be able to re-construct the exact tag IDs nor taxonomy
+    IDs from this data that's stored in the search index. It's just a bunch of
+    strings in a particular format that the frontend knows how to render to
+    support hierarchical refinement by tag.
     """
     # Note that we could improve performance for indexing many components from the same library/course,
     # if we used get_all_object_tags() to load all the tags for the library in a single query rather than loading the
@@ -159,15 +174,24 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
             result[Fields.tags_taxonomy].append(obj_tag.name)
         # Taxonomy name plus each level of tags, in a list:
         parts = [obj_tag.name] + obj_tag.get_lineage()  # e.g. ["Location", "North America", "Canada", "Vancouver"]
-        parts = [part.replace(" > ", " _ ") for part in parts]  # Escape our separator
+        parts = [part.replace(" > ", " _ ") for part in parts]  # Escape our separator.
+        # Now we build each level (tags.level0, tags.level1, etc.) as applicable.
+        # We have a hard-coded limit of 4 levels of tags for now (see Fields.tags above).
+        # A tag like "Difficulty: Hard" will only result in one level (tags.level0)
+        # But a tag like "Location: North America > Canada > Vancouver" would result in three levels (tags.level0:
+        #   "North America", tags.level1: "North America > Canada", tags.level2: "North America > Canada > Vancouver")
+        # See the comments above on "Field.tags" for an explanation of why we use this format (basically it's the format
+        # required by the Instantsearch frontend).
         for level in range(4):
+            # We use '>' as a separator because it's the default for the Instantsearch frontend library, and our
+            # preferred separator (\t) used in the database is ignored by Meilisearch since it's whitespace.
             new_value = " > ".join(parts[0:level + 2])
             if f"level{level}" not in result:
                 result[f"level{level}"] = [new_value]
             elif new_value not in result[f"level{level}"]:
                 result[f"level{level}"].append(new_value)
             if len(parts) == level + 2:
-                break
+                break  # We have all the levels for this tag now (e.g. parts=["Difficulty", "Hard"] -> need level0 only)
 
     return {Fields.tags: result}
 
@@ -184,18 +208,7 @@ def searchable_doc_for_library_block(metadata: lib_api.LibraryXBlockMetadata) ->
         block = xblock_api.load_block(metadata.usage_key, user=None)
     except Exception as err:  # pylint: disable=broad-except
         log.exception(f"Failed to load XBlock {metadata.usage_key}: {err}")
-        # Even though we couldn't load the block, we can still include basic data about it in the index, from 'metadata'
-        doc.update({
-            Fields.id: _meili_id_from_opaque_key(metadata.usage_key),
-            Fields.usage_key: str(metadata.usage_key),
-            Fields.block_id: str(metadata.usage_key.block_id),
-            Fields.display_name: metadata.display_name,
-            Fields.type: metadata.usage_key.block_type,
-            Fields.context_key: str(metadata.usage_key.context_key),
-            Fields.org: str(metadata.usage_key.context_key.org),
-        })
-    else:
-        doc.update(_fields_from_block(block))
+    doc.update(_fields_from_block(block))
     doc.update(_tags_for_content_object(metadata.usage_key))
     doc[Fields.type] = DocType.library_block
     # Add the breadcrumbs. In v2 libraries, the library itself is not a "parent" of the XBlocks so we add it here:
diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index 34f182b211a3..d2df30b2a016 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -9,7 +9,7 @@
 import time
 
 from django.conf import settings
-from django.core.management import BaseCommand
+from django.core.management import BaseCommand, CommandError
 
 from openedx.core.djangoapps.content_libraries import api as lib_api
 from openedx.core.djangoapps.content.search.documents import (
@@ -28,13 +28,24 @@
 
 class Command(MeiliCommandMixin, BaseCommand):
     """
-    Build or re-build the search index for courses (in Studio, i.e. Draft mode)
+    Build or re-build the search index for courses and libraries (in Studio, i.e. Draft mode)
+
+    This is experimental and not recommended for production use.
     """
 
+    def add_arguments(self, parser):
+        parser.add_argument('--experimental', action='store_true')
+        parser.set_defaults(experimental=False)
+
     def handle(self, *args, **options):
         """
         Build a new search index for Studio, containing content from courses and libraries
         """
+        if not options["experimental"]:
+            raise CommandError(
+                "This command is experimental and not recommended for production. "
+                "Use the --experimental argument to acknowledge and run it."
+            )
         start_time = time.perf_counter()
         client = self.get_meilisearch_client()
         store = modulestore()
@@ -59,6 +70,11 @@ def handle(self, *args, **options):
         index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME
         with self.using_temp_index(index_name) as temp_index_name:
             ############## Configure the index ##############
+
+            # The following index settings are best changed on an empty index.
+            # Changing them on a populated index will "re-index all documents in the index, which can take some time"
+            # and use more RAM. Instead, we configure an empty index then populate it one course/library at a time.
+
             # Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique):
             client.index(temp_index_name).update_distinct_attribute(Fields.usage_key)
             # Mark which attributes can be used for filtering/faceted search:
diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py
index 78ed68039c51..97b2fb5d033f 100644
--- a/openedx/core/djangoapps/content/search/tests/test_documents.py
+++ b/openedx/core/djangoapps/content/search/tests/test_documents.py
@@ -64,7 +64,7 @@ def test_problem_block(self):
         assert doc == {
             # Note the 'id' has been stripped of special characters to meet Meilisearch requirements.
             # The '-8516ed8' suffix is deterministic based on the original usage key.
-            "id": "block-v1edxtoy2012_falltypeproblemblocktest_problem-8516ed8",
+            "id": "block-v1edxtoy2012_falltypeproblemblocktest_problem-f46b6f1e",
             "type": "course_block",
             "block_type": "problem",
             "usage_key": "block-v1:edX+toy+2012_Fall+type@problem+block@Test_Problem",
@@ -98,7 +98,7 @@ def test_html_block(self):
         block = self.store.get_item(self.html_block_key)
         doc = searchable_doc_for_course_block(block)
         assert doc == {
-            "id": "block-v1edxtoy2012_falltypehtmlblocktoyjumpto-b0b4a10",
+            "id": "block-v1edxtoy2012_falltypehtmlblocktoyjumpto-efb9c601",
             "type": "course_block",
             "block_type": "html",
             "usage_key": "block-v1:edX+toy+2012_Fall+type@html+block@toyjumpto",
@@ -132,7 +132,7 @@ def test_video_block_untagged(self):
         block = self.store.get_item(block_usage_key)
         doc = searchable_doc_for_course_block(block)
         assert doc == {
-            "id": "block-v1edxtoy2012_falltypevideoblockwelcome-b47fb14",
+            "id": "block-v1edxtoy2012_falltypevideoblockwelcome-0c9fd626",
             "type": "course_block",
             "block_type": "video",
             "usage_key": "block-v1:edX+toy+2012_Fall+type@video+block@Welcome",

From 1ff4b756074de333e31bb2b041c17f7c6612d52a Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Thu, 21 Mar 2024 12:08:50 -0700
Subject: [PATCH 28/28] perf: pre-fetch all of the blocks in the course

---
 .../content/search/management/commands/reindex_studio.py       | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
index d2df30b2a016..0f52eea51d4d 100644
--- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
+++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py
@@ -118,6 +118,9 @@ def handle(self, *args, **options):
                 )
                 docs = []
 
+                # Pre-fetch the course with all of its children:
+                course = store.get_course(course.id, depth=None)
+
                 def add_with_children(block):
                     """ Recursively index the given XBlock/component """
                     doc = searchable_doc_for_course_block(block)