Skip to content

Commit

Permalink
feat: restrict Studio search results based on user permissions (#34471)
Browse files Browse the repository at this point in the history
* feat: adds SearchAccess model

Stores a numeric ID for each course + library, which will generally be
shorter than the full context_key, so we can pack more of them into the
the Meilisearch search filter.

Also:

* Adds data migration pre-populates the SearchAccess model from the existing
  CourseOverview and ContentLibrary records
* Adds signal handlers to add/remove SearchAccess entries when content
  is created or deleted.
* Adds get_access_ids_for_request() helper method for use in views.
* Adds tests.

* test: can't import content.search in lms tests

* feat: use SearchAccess in documents and views

* Adds an access_id field to the document, which stores the
  SearchAccess.id for the block's context.
* Use the requesting user's allowed access_ids to filter search results
  to documents with those access_ids.
* Since some users have a lot of individual access granted, limit the
  number of access_ids in the filter to a large number (1_000)
* Updates tests to demonstrate.

* test: can't import content.search or content_staging in lms tests

* fix: make access_id field filterable

* fix: use SearchAccess.get_or_create in signal handlers

In theory, we shouldn't have to do this, because the CREATE and DELETE
events should keep the SearchAccess table up-to-date.

But in practice, signals can be missed (or in tests, they may be
disabled). So we assume that it's ok to re-use a SearchAccess.id created
for a given course or library context_key.

* refactor: refactors the view tests to make them clearer

Uses helper methods and decorators to wrap the settings and patches used
by multiple view tests.

* feat: adds org filters to meilisearch filter

* Uses content_tagging.rules.get_user_orgs to fetch the user's
  content-related orgs for use in the meilisearch filter.
* Limits the number of orgs used to 1_000 to keep token size down

* refactor: removes data migration

Users should use the reindex_studio management command to populate SearchAccess.

* refactor: adds functions to common.djangoapps.student.role_helpers

to allow general access to the user's RoleCache without having to access
private attributes of User or RoleCache.

Related changes:

* Moves some functionality from openedx.core.djangoapps.enrollments.data.get_user_roles
  to this new helper method.
* Use these new helper method in content_tagging.rules

* fix: get_access_ids_for_request only returns individual access

instead of all course keys that the user can read.

Org- and GlobalStaff access checks will handle the rest.

* fix: use org-level permissions when generating search filter

Also refactors tests to demonstrate this change for OrgStaff and
OrgInstructor users.

* refactor: remove SearchAccess creation signal handlers

Lets SearchAccess entries be created on demand during search indexing.

* feat: omit access_ids from the search filter that are covered by the user's org roles

---------

Co-authored-by: Rômulo Penido <[email protected]>
  • Loading branch information
pomegranited and rpenido authored Apr 17, 2024
1 parent 96f6349 commit d672110
Show file tree
Hide file tree
Showing 14 changed files with 718 additions and 59 deletions.
38 changes: 37 additions & 1 deletion common/djangoapps/student/role_helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""
Helpers for student roles
"""
from __future__ import annotations

from django.contrib.auth import get_user_model

from openedx.core.djangoapps.django_comment_common.models import (
FORUM_ROLE_ADMINISTRATOR,
Expand All @@ -12,15 +14,20 @@
)
from openedx.core.lib.cache_utils import request_cached
from common.djangoapps.student.roles import (
CourseAccessRole,
CourseBetaTesterRole,
CourseInstructorRole,
CourseStaffRole,
GlobalStaff,
OrgInstructorRole,
OrgStaffRole
OrgStaffRole,
RoleCache,
)


User = get_user_model()


@request_cached()
def has_staff_roles(user, course_key):
"""
Expand All @@ -40,3 +47,32 @@ def has_staff_roles(user, course_key):
is_org_instructor, is_global_staff, has_forum_role]):
return True
return False


@request_cached()
def get_role_cache(user: User) -> RoleCache:
"""
Returns a populated RoleCache for the given user.
The returned RoleCache is also cached on the provided `user` to improve performance on future roles checks.
:param user: User
:return: All roles for all courses that this user has access to.
"""
# pylint: disable=protected-access
if not hasattr(user, '_roles'):
user._roles = RoleCache(user)
return user._roles


@request_cached()
def get_course_roles(user: User) -> list[CourseAccessRole]:
"""
Returns a list of all course-level roles that this user has.
:param user: User
:return: All roles for all courses that this user has access to.
"""
# pylint: disable=protected-access
role_cache = get_role_cache(user)
return list(role_cache._roles)
28 changes: 28 additions & 0 deletions common/djangoapps/student/tests/test_roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from opaque_keys.edx.keys import CourseKey

from common.djangoapps.student.roles import (
CourseAccessRole,
CourseBetaTesterRole,
CourseInstructorRole,
CourseRole,
Expand All @@ -23,6 +24,7 @@
OrgStaffRole,
RoleCache
)
from common.djangoapps.student.role_helpers import get_course_roles, has_staff_roles
from common.djangoapps.student.tests.factories import AnonymousUserFactory, InstructorFactory, StaffFactory, UserFactory


Expand All @@ -48,6 +50,32 @@ def test_global_staff(self):
assert not GlobalStaff().has_user(self.course_instructor)
assert GlobalStaff().has_user(self.global_staff)

def test_has_staff_roles(self):
assert has_staff_roles(self.global_staff, self.course_key)
assert has_staff_roles(self.course_staff, self.course_key)
assert has_staff_roles(self.course_instructor, self.course_key)
assert not has_staff_roles(self.student, self.course_key)

def test_get_course_roles(self):
assert not list(get_course_roles(self.student))
assert not list(get_course_roles(self.global_staff))
assert list(get_course_roles(self.course_staff)) == [
CourseAccessRole(
user=self.course_staff,
course_id=self.course_key,
org=self.course_key.org,
role=CourseStaffRole.ROLE,
)
]
assert list(get_course_roles(self.course_instructor)) == [
CourseAccessRole(
user=self.course_instructor,
course_id=self.course_key,
org=self.course_key.org,
role=CourseInstructorRole.ROLE,
)
]

def test_group_name_case_sensitive(self):
uppercase_course_id = "ORG/COURSE/NAME"
lowercase_course_id = uppercase_course_id.lower()
Expand Down
11 changes: 11 additions & 0 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.utils.text import slugify
from opaque_keys.edx.keys import UsageKey, LearningContextKey

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api
Expand All @@ -29,6 +30,7 @@ class Fields:
block_type = "block_type"
context_key = "context_key"
org = "org"
access_id = "access_id" # .models.SearchAccess.id
# breadcrumbs: an array of {"display_name": "..."} entries. First one is the name of the course/library itself.
# After that is the name of any parent Section/Subsection/Unit/etc.
# It's a list of dictionaries because for now we just include the name of each but in future we may add their IDs.
Expand Down Expand Up @@ -78,6 +80,14 @@ def _meili_id_from_opaque_key(usage_key: UsageKey) -> str:
return slugify(str(usage_key)) + "-" + suffix


def _meili_access_id_from_context_key(context_key: LearningContextKey) -> int:
"""
Retrieve the numeric access id for the given course/library context.
"""
access, _ = SearchAccess.objects.get_or_create(context_key=context_key)
return access.id


def _fields_from_block(block) -> dict:
"""
Given an XBlock instance, call its index_dictionary() method to load any
Expand All @@ -96,6 +106,7 @@ class implementation returns only:
# This is called context_key so it's the same for courses and libraries
Fields.context_key: str(block.usage_key.context_key), # same as lib_key
Fields.org: str(block.usage_key.context_key.org),
Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key),
Fields.breadcrumbs: []
}
# Get the breadcrumbs (course, section, subsection, etc.):
Expand Down
23 changes: 23 additions & 0 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Signal/event handlers for content search
"""
from django.db.models.signals import post_delete
from django.dispatch import receiver
from openedx_events.content_authoring.data import ContentLibraryData
from openedx_events.content_authoring.signals import CONTENT_LIBRARY_DELETED

from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import SearchAccess


# Using post_delete here because there is no COURSE_DELETED event defined.
@receiver(post_delete, sender=CourseOverview)
def delete_course_search_access(sender, instance, **kwargs): # pylint: disable=unused-argument
"""Deletes the SearchAccess instance for deleted CourseOverview"""
SearchAccess.objects.filter(context_key=instance.id).delete()


@receiver(CONTENT_LIBRARY_DELETED)
def delete_library_search_access(content_library: ContentLibraryData, **kwargs):
"""Deletes the SearchAccess instance for deleted content libraries"""
SearchAccess.objects.filter(context_key=content_library.library_key).delete()
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def handle(self, *args, **options):
Fields.org,
Fields.tags,
Fields.type,
Fields.access_id,
])
# Mark which attributes are used for keyword search, in order of importance:
client.index(temp_index_name).update_searchable_attributes([
Expand Down
25 changes: 25 additions & 0 deletions openedx/core/djangoapps/content/search/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 4.2.10 on 2024-04-02 04:55

from django.db import migrations, models
from opaque_keys.edx.django.models import LearningContextKeyField
from opaque_keys.edx.locator import LibraryLocatorV2


class Migration(migrations.Migration):

initial = True

dependencies = [
('course_overviews', '0001_initial'),
('content_libraries', '0001_initial'),
]

operations = [
migrations.CreateModel(
name='SearchAccess',
fields=[
('id', models.BigAutoField(help_text='Numeric ID for each Course / Library context. This ID will generally require fewer bits than the full LearningContextKey, allowing more courses and libraries to be represented in content search filters.', primary_key=True, serialize=False)),
('context_key', LearningContextKeyField(max_length=255, unique=True)),
],
),
]
Empty file.
67 changes: 67 additions & 0 deletions openedx/core/djangoapps/content/search/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Database models for content search"""

from __future__ import annotations

from django.db import models
from django.utils.translation import gettext_lazy as _
from opaque_keys.edx.django.models import LearningContextKeyField
from rest_framework.request import Request

from common.djangoapps.student.role_helpers import get_course_roles
from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole
from openedx.core.djangoapps.content_libraries.api import get_libraries_for_user


class SearchAccess(models.Model):
"""
Stores a numeric ID for each ContextKey.
We use this shorter ID instead of the full ContextKey when determining a user's access to search-indexed course and
library content because:
a) in some deployments, users may be granted access to more than 1_000 individual courses, and
b) the search filter request is stored in the JWT, which is limited to 8Kib.
"""
id = models.BigAutoField(
primary_key=True,
help_text=_(
"Numeric ID for each Course / Library context. This ID will generally require fewer bits than the full "
"LearningContextKey, allowing more courses and libraries to be represented in content search filters."
),
)
context_key = LearningContextKeyField(
max_length=255, unique=True, null=False,
)


def get_access_ids_for_request(request: Request, omit_orgs: list[str] = None) -> list[int]:
"""
Returns a list of SearchAccess.id values for courses and content libraries that the requesting user has been
individually grated access to.
Omits any courses/libraries with orgs in the `omit_orgs` list.
"""
omit_orgs = omit_orgs or []

course_roles = get_course_roles(request.user)
course_clause = models.Q(context_key__in=[
role.course_id
for role in course_roles
if (
role.role in [CourseInstructorRole.ROLE, CourseStaffRole.ROLE]
and role.org not in omit_orgs
)
])

libraries = get_libraries_for_user(user=request.user)
library_clause = models.Q(context_key__in=[
lib.library_key for lib in libraries
if lib.library_key.org not in omit_orgs
])

# Sort by descending access ID to simulate prioritizing the "most recently created context keys".
return list(
SearchAccess.objects.filter(
course_clause | library_clause
).order_by('-id').values_list("id", flat=True)
)
23 changes: 22 additions & 1 deletion openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@
from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
from xmodule.modulestore.tests.factories import BlockFactory, ToyCourseFactory

from ..documents import searchable_doc_for_course_block
try:
# This import errors in the lms because content.search is not an installed app there.
from ..documents import searchable_doc_for_course_block
from ..models import SearchAccess
except RuntimeError:
searchable_doc_for_course_block = lambda x: x
SearchAccess = {}


STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/"

Expand All @@ -26,6 +33,7 @@ def setUpClass(cls):
cls.store = modulestore()
cls.toy_course = ToyCourseFactory.create() # See xmodule/modulestore/tests/sample_courses.py
cls.toy_course_key = cls.toy_course.id

# Get references to some blocks in the toy course
cls.html_block_key = cls.toy_course_key.make_usage_key("html", "toyjumpto")
# Create a problem in library
Expand Down Expand Up @@ -55,6 +63,16 @@ def setUpClass(cls):
tagging_api.tag_object(str(cls.html_block_key), cls.subject_tags, tags=["Chinese", "Jump Links"])
tagging_api.tag_object(str(cls.html_block_key), cls.difficulty_tags, tags=["Normal"])

@property
def toy_course_access_id(self):
"""
Returns the SearchAccess.id created for the toy course.
This SearchAccess object is created when documents are added to the search index, so this method must be called
after this step, or risk a DoesNotExist error.
"""
return SearchAccess.objects.get(context_key=self.toy_course_key).id

def test_problem_block(self):
"""
Test how a problem block gets represented in the search index
Expand All @@ -71,6 +89,7 @@ def test_problem_block(self):
"block_id": "Test_Problem",
"context_key": "course-v1:edX+toy+2012_Fall",
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Test Problem",
"breadcrumbs": [
{"display_name": "Toy Course"},
Expand Down Expand Up @@ -105,6 +124,7 @@ def test_html_block(self):
"block_id": "toyjumpto",
"context_key": "course-v1:edX+toy+2012_Fall",
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Text",
"breadcrumbs": [
{"display_name": "Toy Course"},
Expand Down Expand Up @@ -139,6 +159,7 @@ def test_video_block_untagged(self):
"block_id": "Welcome",
"context_key": "course-v1:edX+toy+2012_Fall",
"org": "edX",
"access_id": self.toy_course_access_id,
"display_name": "Welcome",
"breadcrumbs": [
{"display_name": "Toy Course"},
Expand Down
Loading

0 comments on commit d672110

Please sign in to comment.