Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: export tagged course as csv #34091

Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
52bc665
feat: export tagged course as csv
rpenido Jan 25, 2024
f275908
docs: add comment
rpenido Jan 25, 2024
a870dfd
fix: add select_related to ObjectTag query
rpenido Jan 26, 2024
4dd027a
fix: always use objecttag.value
rpenido Jan 26, 2024
5fb03aa
docs: change comment position
rpenido Jan 26, 2024
8f1238e
refactor: rename serializer to a more sane name
rpenido Jan 26, 2024
bdedf93
refactor: remove download param
rpenido Jan 26, 2024
04ca072
refactor: create a new view to export objecttags
rpenido Jan 26, 2024
a8a6e7e
Merge branch 'master' into rpenido/fal-3610-download-course-tag-sprea…
rpenido Jan 26, 2024
b01d6d4
refactor: change api and view structure
rpenido Jan 29, 2024
0871537
Merge branch 'master' into rpenido/fal-3610-download-course-tag-sprea…
rpenido Jan 29, 2024
18a8425
docs: remove old comment
rpenido Jan 29, 2024
38ae353
docs: revert view docstring
rpenido Jan 30, 2024
b65a6c8
fix: remove include_children query param
rpenido Jan 30, 2024
f5ac3a6
fix: removing .all() call from queryset
rpenido Jan 30, 2024
debf254
refactor: method rename
rpenido Jan 30, 2024
9726d6d
fix: filter deleted tags
rpenido Jan 30, 2024
c8c12bb
fix: pylint
rpenido Jan 30, 2024
1f11b17
fix: quote string in csv export
rpenido Jan 30, 2024
484c042
test: add querycount
rpenido Jan 30, 2024
6b6ba34
fix: pylint
rpenido Jan 30, 2024
9096454
fix: pylint..
rpenido Jan 30, 2024
a688689
fix: pylint
rpenido Jan 30, 2024
30e06d0
test: compare results to hardcoded strings
pomegranited Jan 30, 2024
35a3d2b
test: Adds "deleted" object tags to ensure they are omitted from results
pomegranited Jan 30, 2024
1e13f54
test: adds untagged blocks with children
pomegranited Jan 30, 2024
e9335c8
revert: undo removed property
rpenido Jan 30, 2024
ab1a69e
style: fix camelCase
rpenido Jan 30, 2024
db9116d
refactor: remove xblock from TaggedContent and include_children param
rpenido Jan 30, 2024
9b3dee8
fix: remove UsageKey
rpenido Jan 30, 2024
821e216
fix: removing unused import
rpenido Jan 31, 2024
6207915
refactor: cleaning code
rpenido Jan 31, 2024
35bc860
test: refactors tests so shared data can be re-used
pomegranited Jan 31, 2024
7a28742
refactor: refactoring api, helper and view code
rpenido Jan 31, 2024
fabb729
docs: add comment about ObjectTag query
rpenido Jan 31, 2024
548d57c
test: use CourseFactory and BlockFactory
pomegranited Jan 30, 2024
a1d41fd
test: fix variable name
rpenido Feb 1, 2024
f07b841
fix: delete unwanted file
rpenido Feb 1, 2024
233135a
test: fix query count
rpenido Feb 1, 2024
ac98812
test: fix expected value with new tags
rpenido Feb 1, 2024
309ce94
fix: use variables from outer function
rpenido Feb 1, 2024
32cdf93
test: use UserFactory
rpenido Feb 1, 2024
4ed7570
style: removed unused imports
rpenido Feb 1, 2024
ee5bc3d
chore: trigger CI
rpenido Feb 1, 2024
4bc8ca7
Merge branch 'master' into rpenido/fal-3610-download-course-tag-sprea…
rpenido Feb 2, 2024
6d4c23a
fix: disable default staff user from module store mixin
rpenido Feb 2, 2024
da3fdf9
style: fix case
rpenido Feb 5, 2024
726b7ef
Merge branch 'jill/rpenido/fal-3610-download-course-tag-spreadsheet' …
rpenido Feb 6, 2024
fd5a542
docs: adds removed docstring
rpenido Feb 6, 2024
79a1786
Merge branch 'master' into rpenido/fal-3610-download-course-tag-sprea…
rpenido Feb 8, 2024
dfe43be
fix: cleaning merged code
rpenido Feb 8, 2024
5245264
style: run isort
rpenido Feb 8, 2024
c82e9cb
refactor: use taxonomy.export_id in header
rpenido Feb 9, 2024
01b9b5f
refactor: change `object_id` to `context_id`
rpenido Feb 15, 2024
f87fc4c
Merge branch 'master' into rpenido/fal-3610-download-course-tag-sprea…
rpenido Feb 15, 2024
779cc98
chore: trigger CI
rpenido Feb 15, 2024
4a3d092
Merge branch 'master' into rpenido/fal-3610-download-course-tag-sprea…
rpenido Feb 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 126 additions & 5 deletions openedx/core/djangoapps/content_tagging/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,22 @@
"""
from __future__ import annotations

from itertools import groupby

import openedx_tagging.core.tagging.api as oel_tagging
from django.db.models import Q, QuerySet, Exists, OuterRef
from openedx_tagging.core.tagging.models import Taxonomy
from opaque_keys.edx.keys import CourseKey, UsageKey
from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy
from organizations.models import Organization
from xmodule.modulestore.django import modulestore

from .models import ContentObjectTag, TaxonomyOrg
from .types import ContentKey
from .types import (
ContentKey,
ObjectTagByObjectIdDict,
TaggedContent,
TaxonomyDict,
)


def create_taxonomy(
Expand Down Expand Up @@ -128,24 +137,30 @@ def get_unassigned_taxonomies(enabled=True) -> QuerySet:
def get_content_tags(
object_key: ContentKey,
taxonomy_id: int | None = None,
) -> QuerySet:
) -> QuerySet[ContentObjectTag]:
"""
Generates a list of content tags for a given object.

Pass taxonomy to limit the returned object_tags to a specific taxonomy.
"""
return oel_tagging.get_object_tags(

tags = oel_tagging.get_object_tags(
object_id=str(object_key),
taxonomy_id=taxonomy_id,
object_tag_class=ContentObjectTag,
)

# Add a generic type to get_object_tags to fix this
return tags # type: ignore


# FixMe: The following method (tag_content_object) is only used in tasks.py for auto-tagging. To tag object we are
# using oel_tagging.tag_object and checking permissions via rule overrides.
def tag_content_object(
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
object_key: ContentKey,
taxonomy: Taxonomy,
tags: list,
) -> QuerySet:
) -> QuerySet[ContentObjectTag]:
"""
This is the main API to use when you want to add/update/delete tags from a content object (e.g. an XBlock or
course).
Expand Down Expand Up @@ -175,6 +190,112 @@ def tag_content_object(
return get_content_tags(object_key, taxonomy_id=taxonomy.id)


def get_content_tags_for_object(
content_key: ContentKey,
include_children: bool,
) -> tuple[TaggedContent, TaxonomyDict]:
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns the object with the tags associated with it. If include_children is True, then it will also include
the children of the object and their tags.
"""

def _get_object_tags(content_key: ContentKey, include_children: bool) -> QuerySet[ObjectTag]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not good practice to have an inner function with parameters that have the same names as the outer function. It's unclear if they have the same or different values.

"""
Return the tags for the object and its children using a single db query.
"""
content_key_str = str(content_key)
if not include_children:
return ObjectTag.objects.filter(object_id=content_key_str).select_related("tag__taxonomy").all()

# We use a block_id_prefix (i.e. the modified course id) to get the tags for the children of the Content
# (course) in a single db query.
# ToDo: Add support for other content types (like LibraryContent and LibraryBlock)
if isinstance(content_key, UsageKey):
course_key_str = str(content_key.course_key)
block_id_prefix = course_key_str.replace("course-v1:", "block-v1:", 1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't make sense to me. Given a random leaf XBlock A in a course, this will still try to load the ObjectTags for all other XBlocks in the course, none of which are children of XBlock A. I think you can just remove this whole branch of the if.

Copy link
Contributor Author

@rpenido rpenido Jan 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This optimization attempted to load all required ObjectTags beforehand with a single query. The other alternatives were:

  • run a query for each xblock to get its tags
  • create an id list and query the object tags after catching all blocks and iterate over the tree again to associate ObjectTags -> XBlock

This is not necessary for a leaf XBlock, but it would help if the XBlock has children.

This should go anyway because we will remove the UsageKey option.

elif isinstance(content_key, CourseKey):
course_key_str = str(content_key)
block_id_prefix = str(content_key).replace("course-v1:", "block-v1:", 1)
else:
raise NotImplementedError(f"Invalid content_key: {type(content_key)} -> {content_key}")

return ObjectTag.objects.filter(Q(object_id__startswith=block_id_prefix) | Q(object_id=course_key_str)) \
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
.select_related("tag__taxonomy").all()
pomegranited marked this conversation as resolved.
Show resolved Hide resolved

def _group_object_tags_by_objectid_taxonomy(
all_object_tags: list[ObjectTag]
) -> tuple[ObjectTagByObjectIdDict, TaxonomyDict]:
"""
Returns a tuple with a dictionary of grouped object tags for all blocks and a dictionary of taxonomies.
"""

groupedObjectTags: ObjectTagByObjectIdDict = {}
bradenmacdonald marked this conversation as resolved.
Show resolved Hide resolved
taxonomies: TaxonomyDict = {}

for object_id, block_tags in groupby(all_object_tags, lambda x: x.object_id):
groupedObjectTags[object_id] = {}
for taxonomy_id, taxonomy_tags in groupby(block_tags, lambda x: x.tag.taxonomy_id):
object_tags_list = list(taxonomy_tags)
groupedObjectTags[object_id][taxonomy_id] = object_tags_list

if taxonomy_id not in taxonomies:
taxonomies[taxonomy_id] = object_tags_list[0].tag.taxonomy

return groupedObjectTags, taxonomies

def _get_object_with_tags(
content_key: ContentKey,
include_children: bool,
objectTagCache: ObjectTagByObjectIdDict,
store
) -> TaggedContent:
"""
Returns the object with the tags associated with it. If include_children is True, then it will also include
the children of the object and their tags.
"""
if isinstance(content_key, CourseKey):
xblock = store.get_course(content_key)
elif isinstance(content_key, UsageKey):
xblock = store.get_item(content_key)
else:
raise NotImplementedError(f"Invalid content_key: {type(content_key)} -> {content_key}")

tagged_xblock = TaggedContent(
xblock=xblock,
object_tags=objectTagCache.get(str(content_key), {}),
children=None,
)

if not include_children:
return tagged_xblock

blocks = [tagged_xblock]

while blocks:
block = blocks.pop()
block.children = []

if block.xblock.has_children:
for child_id in block.xblock.children:
child = TaggedContent(
xblock=store.get_item(child_id),
object_tags=objectTagCache.get(str(child_id), {}),
children=None,
)
block.children.append(child)

blocks.append(child)

return tagged_xblock

store = modulestore()

all_blocks_tag_records = list(_get_object_tags(content_key, include_children))
objectTagCache, taxonomies = _group_object_tags_by_objectid_taxonomy(all_blocks_tag_records)

return _get_object_with_tags(content_key, include_children, objectTagCache, store), taxonomies


# Expose the oel_tagging APIs

get_taxonomy = oel_tagging.get_taxonomy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,10 @@ def get_all_orgs(self, obj) -> bool:
class Meta:
model = TaxonomySerializer.Meta.model
fields = TaxonomySerializer.Meta.fields + ["orgs", "all_orgs"]
read_only_fields = ["orgs", "all_orgs"]
bradenmacdonald marked this conversation as resolved.
Show resolved Hide resolved


class ExportContentTagsQueryParamsSerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer for the query params for the export objecttags GET view
"""
include_children = serializers.BooleanField(required=False, default=True)
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from __future__ import annotations

from urllib.parse import parse_qs, urlparse
import json
from io import BytesIO
from urllib.parse import parse_qs, urlparse
from unittest.mock import MagicMock

import abc
Expand Down Expand Up @@ -39,12 +40,15 @@
from openedx.core.djangolib.testing.utils import skip_unless_cms
from openedx.core.lib import blockstore_api

from ....tests.test_api import TaggedCourseMixin

User = get_user_model()

TAXONOMY_ORG_LIST_URL = "/api/content_tagging/v1/taxonomies/"
TAXONOMY_ORG_DETAIL_URL = "/api/content_tagging/v1/taxonomies/{pk}/"
TAXONOMY_ORG_UPDATE_ORG_URL = "/api/content_tagging/v1/taxonomies/{pk}/orgs/"
OBJECT_TAG_UPDATE_URL = "/api/content_tagging/v1/object_tags/{object_id}/?taxonomy={taxonomy_id}"
OBJECT_TAGS_EXPORT_URL = "/api/content_tagging/v1/object_tags/{object_id}/export/"
OBJECT_TAGS_URL = "/api/content_tagging/v1/object_tags/{object_id}/"
TAXONOMY_TEMPLATE_URL = "/api/content_tagging/v1/taxonomies/import/{filename}"
TAXONOMY_CREATE_IMPORT_URL = "/api/content_tagging/v1/taxonomies/import/"
Expand Down Expand Up @@ -1624,6 +1628,91 @@ def test_object_tags_query_count(self):
assert response.data[object_id]["taxonomies"][0]["tags"] == expected_tags


@skip_unless_cms
@ddt.ddt
class TestContentObjectChildrenExportView(TaggedCourseMixin, APITestCase): # type: ignore[misc]
"""
Tests exporting course children with tags
"""
def setUp(self):
super().setUp()
self.user = User.objects.create(
username="user",
email="[email protected]",
)
self.staff = User.objects.create(
username="staff",
email="[email protected]",
is_staff=True,
)

self.staffA = User.objects.create(
username="staffA",
email="[email protected]",
)
update_org_role(self.staff, OrgStaffRole, self.staffA, [self.orgA.short_name])

@ddt.data(
"staff",
"staffA",
)
def test_export_course(self, user_attr) -> None:
url = OBJECT_TAGS_EXPORT_URL.format(object_id=str(self.course.id))

user = getattr(self, user_attr)
self.client.force_authenticate(user=user)
response = self.client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response.headers['Content-Type'] == 'text/csv'

expected_csv = (
"Name,Type,ID,Taxonomy 1,Taxonomy 2\r\n"
'Test Course,course,course-v1:orgA+test_course+test_run,Tag 1.1,\r\n'
' test sequential,sequential,block-v1:orgA+test_course+test_run+type@sequential+block@test_sequential,'
'"Tag 1.1, Tag 1.2",Tag 2.1\r\n'
' test vertical1,vertical,block-v1:orgA+test_course+test_run+type@vertical+block@test_vertical1,'
',Tag 2.2\r\n'
' test vertical2,vertical,block-v1:orgA+test_course+test_run+type@vertical+block@test_vertical2,,\r\n'
' Text,html,block-v1:orgA+test_course+test_run+type@html+block@test_html,,Tag 2.1\r\n'
)

zip_content = BytesIO(b"".join(response.streaming_content)).getvalue() # type: ignore[attr-defined]
assert zip_content == expected_csv.encode()

def test_export_course_no_children(self) -> None:
url = OBJECT_TAGS_EXPORT_URL.format(object_id=str(self.course.id))

self.client.force_authenticate(user=self.staff)
response = self.client.get(url, {"include_children": False})
assert response.status_code == status.HTTP_200_OK
assert response.headers['Content-Type'] == 'text/csv'

expected_csv = (
"Name,Type,ID,Taxonomy 1\r\n"
'Test Course,course,course-v1:orgA+test_course+test_run,Tag 1.1\r\n'
)

zip_content = BytesIO(b"".join(response.streaming_content)).getvalue() # type: ignore[attr-defined]
assert zip_content == expected_csv.encode()

def test_export_course_anoymous_forbidden(self) -> None:
url = OBJECT_TAGS_EXPORT_URL.format(object_id=str(self.course.id))
response = self.client.get(url)
assert response.status_code == status.HTTP_403_FORBIDDEN

def test_export_course_user_forbidden(self) -> None:
url = OBJECT_TAGS_EXPORT_URL.format(object_id=str(self.course.id))
self.client.force_authenticate(user=self.user)
response = self.client.get(url)
assert response.status_code == status.HTTP_403_FORBIDDEN

def test_export_course_invalid_id(self) -> None:
url = OBJECT_TAGS_EXPORT_URL.format(object_id="invalid")
self.client.force_authenticate(user=self.staff)
response = self.client.get(url)
assert response.status_code == status.HTTP_400_BAD_REQUEST


@skip_unless_cms
@ddt.ddt
class TestDownloadTemplateView(APITestCase):
Expand All @@ -1635,20 +1724,20 @@ class TestDownloadTemplateView(APITestCase):
("template.json", "application/json"),
)
@ddt.unpack
def test_download(self, filename, content_type):
def test_download(self, filename, content_type) -> None:
url = TAXONOMY_TEMPLATE_URL.format(filename=filename)
response = self.client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response.headers['Content-Type'] == content_type
assert response.headers['Content-Disposition'] == f'attachment; filename="{filename}"'
assert int(response.headers['Content-Length']) > 0

def test_download_not_found(self):
def test_download_not_found(self) -> None:
url = TAXONOMY_TEMPLATE_URL.format(filename="template.txt")
response = self.client.get(url)
assert response.status_code == status.HTTP_404_NOT_FOUND

def test_download_method_not_allowed(self):
def test_download_method_not_allowed(self) -> None:
url = TAXONOMY_TEMPLATE_URL.format(filename="template.txt")
response = self.client.post(url)
assert response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED
Expand Down
4 changes: 4 additions & 0 deletions openedx/core/djangoapps/content_tagging/rest_api/v1/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,9 @@
oel_tagging_views_import.TemplateView.as_view(),
name="taxonomy-import-template",
),
path(
"object_tags/<str:object_id>/export/",
rpenido marked this conversation as resolved.
Show resolved Hide resolved
views.ObjectTagExportView.as_view(),
),
path('', include(router.urls))
]
Loading
Loading