Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict search results based on user permissions [FC-0040] #34471

Merged
merged 17 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.utils.text import slugify
from opaque_keys.edx.keys import UsageKey, LearningContextKey

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api
Expand All @@ -29,6 +30,7 @@ class Fields:
block_type = "block_type"
context_key = "context_key"
org = "org"
access_id = "access_id" # .models.SearchAccess.id
# breadcrumbs: an array of {"display_name": "..."} entries. First one is the name of the course/library itself.
# After that is the name of any parent Section/Subsection/Unit/etc.
# It's a list of dictionaries because for now we just include the name of each but in future we may add their IDs.
Expand Down Expand Up @@ -78,6 +80,14 @@ def _meili_id_from_opaque_key(usage_key: UsageKey) -> str:
return slugify(str(usage_key)) + "-" + suffix


def _meili_access_id_from_context_key(context_key: LearningContextKey) -> int:
"""
Retrieve the numeric access id for the given course/library context.
"""
access, _ = SearchAccess.objects.get_or_create(context_key=context_key)
return access.id


def _fields_from_block(block) -> dict:
"""
Given an XBlock instance, call its index_dictionary() method to load any
Expand All @@ -96,6 +106,7 @@ class implementation returns only:
# This is called context_key so it's the same for courses and libraries
Fields.context_key: str(block.usage_key.context_key), # same as lib_key
Fields.org: str(block.usage_key.context_key.org),
Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key),
Fields.breadcrumbs: []
}
# Get the breadcrumbs (course, section, subsection, etc.):
Expand Down
35 changes: 35 additions & 0 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
Signal/event handlers for content search
"""
from django.db.models.signals import post_delete
from django.dispatch import receiver
from openedx_events.content_authoring.data import ContentLibraryData, CourseData
from openedx_events.content_authoring.signals import CONTENT_LIBRARY_CREATED, CONTENT_LIBRARY_DELETED, COURSE_CREATED

from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import SearchAccess


@receiver(COURSE_CREATED)
def create_course_search_access(course: CourseData, **kwargs):
bradenmacdonald marked this conversation as resolved.
Show resolved Hide resolved
"""Creates a SearchAccess instance for new courses."""
SearchAccess.objects.get_or_create(context_key=course.course_key)


# Using post_delete here because there is no COURSE_DELETED event defined.
@receiver(post_delete, sender=CourseOverview)
def delete_course_search_access(sender, instance, **kwargs): # pylint: disable=unused-argument
"""Deletes the SearchAccess instance for deleted CourseOverview"""
SearchAccess.objects.filter(context_key=instance.id).delete()


@receiver(CONTENT_LIBRARY_CREATED)
def create_library_search_access(content_library: ContentLibraryData, **kwargs):
"""Creates a SearchAccess instance for new content libraries"""
SearchAccess.objects.get_or_create(context_key=content_library.library_key)


@receiver(CONTENT_LIBRARY_DELETED)
def delete_library_search_access(content_library: ContentLibraryData, **kwargs):
"""Deletes the SearchAccess instance for deleted content libraries"""
SearchAccess.objects.filter(context_key=content_library.library_key).delete()
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def handle(self, *args, **options):
Fields.org,
Fields.tags,
Fields.type,
Fields.access_id,
])
# Mark which attributes are used for keyword search, in order of importance:
client.index(temp_index_name).update_searchable_attributes([
Expand Down
49 changes: 49 additions & 0 deletions openedx/core/djangoapps/content/search/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Generated by Django 4.2.10 on 2024-04-02 04:55

from django.db import migrations, models
from opaque_keys.edx.django.models import LearningContextKeyField
from opaque_keys.edx.locator import LibraryLocatorV2


def populate_search_access(apps, schema_editor):
"""
Adds SearchAccess rows for all existing CourseOverview and ContentLibrary instances.
"""
if schema_editor.connection.alias != "default":
return

SearchAccess = apps.get_model("search", "SearchAccess")

CourseOverview = apps.get_model("course_overviews", "CourseOverview")
for course in CourseOverview.objects.only('id'):
SearchAccess.objects.create(context_key=course.id)

ContentLibrary = apps.get_model("content_libraries", "ContentLibrary")
for lib in ContentLibrary.objects.select_related("org").only("org", "slug"):
# ContentLibrary.library_key property is not available here, so we have to assemble it.
lib_key = LibraryLocatorV2(org=lib.org.short_name, slug=lib.slug)
SearchAccess.objects.create(context_key=lib_key)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're concerned about the performance of this data migration, I can remove it and add a management command instead.

Copy link
Contributor Author

@pomegranited pomegranited Apr 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI @rpenido -- I decided to remove the data migration (ef89591): running reindex_studio handles this for us.



class Migration(migrations.Migration):

initial = True

dependencies = [
('course_overviews', '0001_initial'),
('content_libraries', '0001_initial'),
]

operations = [
migrations.CreateModel(
name='SearchAccess',
fields=[
('id', models.BigAutoField(help_text='Numeric ID for each Course / Library context. This ID will generally require fewer bits than the full LearningContextKey, allowing more courses and libraries to be represented in content search filters.', primary_key=True, serialize=False)),
('context_key', LearningContextKeyField(max_length=255, unique=True)),
],
),
migrations.RunPython(
populate_search_access,
reverse_code=migrations.RunPython.noop,
),
]
Empty file.
56 changes: 56 additions & 0 deletions openedx/core/djangoapps/content/search/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Database models for content search"""

from __future__ import annotations

from typing import List

from django.db import models
from django.utils.translation import gettext_lazy as _
from opaque_keys.edx.django.models import LearningContextKeyField
from rest_framework.request import Request

from cms.djangoapps.contentstore.views.course import get_courses_accessible_to_user
from openedx.core.djangoapps.content_libraries.api import get_libraries_for_user


class SearchAccess(models.Model):
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
"""
Stores a numeric ID for each ContextKey.

We use this shorter ID instead of the full ContextKey when determining a user's access to search-indexed course and
library content because:

a) in some deployments, users may be granted access to more than 1_000 individual courses, and
b) the search filter request is stored in the JWT, which is limited to 8Kib.
"""
id = models.BigAutoField(
primary_key=True,
help_text=_(
"Numeric ID for each Course / Library context. This ID will generally require fewer bits than the full "
"LearningContextKey, allowing more courses and libraries to be represented in content search filters."
),
)
context_key = LearningContextKeyField(
max_length=255, unique=True, null=False,
)


def get_access_ids_for_request(request: Request) -> List[int]:
"""
Returns the SearchAccess.id values that the user has read access to.
"""
courses, _ = get_courses_accessible_to_user(request)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want to get the IDs only for course-level assignments here. If the user is staff in orgA and doesn't have any course-level role, this list should be empty. I'm not sure if we have a function for that.

Maybe use what we did in _get_course_user_orgs in the tagging rules, but return the course ids?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done with 6a3c30a.

course_clause = models.Q(context_key__in=[
course.id for course in courses
])
libraries = get_libraries_for_user(user=request.user)
library_clause = models.Q(context_key__in=[
lib.library_key for lib in libraries
])

# Sort by descending access ID to simulate prioritizing the "most recently created context keys".
return list(
SearchAccess.objects.filter(
course_clause | library_clause
).order_by('-id').values_list("id", flat=True)
)
14 changes: 13 additions & 1 deletion openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@
from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
from xmodule.modulestore.tests.factories import BlockFactory, ToyCourseFactory

from ..documents import searchable_doc_for_course_block
try:
# This import errors in the lms because content.search is not an installed app there.
from ..documents import searchable_doc_for_course_block
from ..models import SearchAccess
except RuntimeError:
searchable_doc_for_course_block = lambda x: x
SearchAccess = {}


STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/"

Expand All @@ -26,6 +33,8 @@ def setUpClass(cls):
cls.store = modulestore()
cls.toy_course = ToyCourseFactory.create() # See xmodule/modulestore/tests/sample_courses.py
cls.toy_course_key = cls.toy_course.id
cls.toy_course_access_id = SearchAccess.objects.get(context_key=cls.toy_course_key).id

# Get references to some blocks in the toy course
cls.html_block_key = cls.toy_course_key.make_usage_key("html", "toyjumpto")
# Create a problem in library
Expand Down Expand Up @@ -71,6 +80,7 @@ def test_problem_block(self):
"block_id": "Test_Problem",
"context_key": "course-v1:edX+toy+2012_Fall",
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Test Problem",
"breadcrumbs": [
{"display_name": "Toy Course"},
Expand Down Expand Up @@ -105,6 +115,7 @@ def test_html_block(self):
"block_id": "toyjumpto",
"context_key": "course-v1:edX+toy+2012_Fall",
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Text",
"breadcrumbs": [
{"display_name": "Toy Course"},
Expand Down Expand Up @@ -139,6 +150,7 @@ def test_video_block_untagged(self):
"block_id": "Welcome",
"context_key": "course-v1:edX+toy+2012_Fall",
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Welcome",
"breadcrumbs": [
{"display_name": "Toy Course"},
Expand Down
Loading
Loading