Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport: Redwood] fix: reindex_studio was crashing if instance had too many courses #34936

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 30 additions & 29 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.cache import cache
from django.core.paginator import Paginator
from meilisearch import Client as MeilisearchClient
from meilisearch.errors import MeilisearchError
from meilisearch.models.task import TaskInfo
Expand All @@ -21,10 +22,9 @@
from common.djangoapps.student.roles import GlobalStaff
from rest_framework.request import Request
from common.djangoapps.student.role_helpers import get_course_roles
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import get_access_ids_for_request

from openedx.core.djangoapps.content_libraries import api as lib_api
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.django import modulestore

from .documents import (
Expand Down Expand Up @@ -292,9 +292,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:

# Get the list of courses
status_cb("Counting courses...")
with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred):
all_courses = store.get_courses()
num_courses = len(all_courses)
num_courses = CourseOverview.objects.count()

# Some counters so we can track our progress as indexing progresses:
num_contexts = num_courses + num_libraries
Expand Down Expand Up @@ -358,30 +356,33 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:

############## Courses ##############
status_cb("Indexing courses...")
for course in all_courses:
status_cb(
f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})"
)
docs = []

# Pre-fetch the course with all of its children:
course = store.get_course(course.id, depth=None)

def add_with_children(block):
""" Recursively index the given XBlock/component """
doc = searchable_doc_for_course_block(block)
doc.update(searchable_doc_tags(block.usage_key))
docs.append(doc) # pylint: disable=cell-var-from-loop
_recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop

# Index course children
_recurse_children(course, add_with_children)

if docs:
# Add all the docs in this course at once (usually faster than adding one at a time):
_wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
num_contexts_done += 1
num_blocks_done += len(docs)
# To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses:
paginator = Paginator(CourseOverview.objects.only('id', 'display_name'), 1000)
for p in paginator.page_range:
for course in paginator.page(p).object_list:
status_cb(
f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})"
)
docs = []

# Pre-fetch the course with all of its children:
course = store.get_course(course.id, depth=None)

def add_with_children(block):
""" Recursively index the given XBlock/component """
doc = searchable_doc_for_course_block(block)
doc.update(searchable_doc_tags(block.usage_key))
docs.append(doc) # pylint: disable=cell-var-from-loop
_recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop

# Index course children
_recurse_children(course, add_with_children)

if docs:
# Add all the docs in this course at once (usually faster than adding one at a time):
_wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
num_contexts_done += 1
num_blocks_done += len(docs)

status_cb(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses and libraries.")

Expand Down
3 changes: 3 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from common.djangoapps.student.tests.factories import UserFactory
from openedx.core.djangoapps.content_libraries import api as library_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.content.course_overviews.api import CourseOverview
from openedx.core.djangolib.testing.utils import skip_unless_cms
from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, ModuleStoreTestCase

Expand Down Expand Up @@ -106,6 +107,8 @@ def setUp(self):
"content": {},
"access_id": course_access.id,
}
# Make sure the CourseOverview for the course is created:
CourseOverview.get_from_id(self.course.id)

# Create a content library:
self.library = library_api.create_library(
Expand Down
Loading