Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the "get object tags" API [FC-0036] #111

Merged
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions openedx_tagging/core/tagging/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@
"""
from __future__ import annotations

from django.db import transaction
from django.db.models import QuerySet
from django.db import models, transaction
from django.db.models import F, QuerySet, Value
from django.db.models.functions import Coalesce, Concat, Lower
from django.utils.translation import gettext as _

from .data import TagDataQuerySet
from .models import ObjectTag, Tag, Taxonomy
from .models.utils import ConcatNull

# Export this as part of the API
TagDoesNotExist = Tag.DoesNotExist
Expand Down Expand Up @@ -165,8 +167,20 @@ def get_object_tags(
filters = {"taxonomy_id": taxonomy_id} if taxonomy_id else {}
tags = (
object_tag_class.objects.filter(object_id=object_id, **filters)
.select_related("tag", "taxonomy")
.order_by("id")
# Preload related objects, including data for the "get_lineage" method on ObjectTag/Tag:
.select_related("taxonomy", "tag", "tag__parent", "tag__parent__parent")
# Sort the tags within each taxonomy in "tree order". See Taxonomy._get_filtered_tags_deep for details on this:
.annotate(sort_key=Lower(Concat(
ConcatNull(F("tag__parent__parent__parent__value"), Value("\t")),
ConcatNull(F("tag__parent__parent__value"), Value("\t")),
ConcatNull(F("tag__parent__value"), Value("\t")),
Coalesce(F("tag__value"), F("_value")),
Value("\t"),
output_field=models.CharField(),
)))
.annotate(taxonomy_name=Coalesce(F("taxonomy__name"), F("_name")))
# Sort first by taxonomy name, then by tag value in tree order:
.order_by("taxonomy_name", "sort_key")
)
return tags

Expand Down
19 changes: 19 additions & 0 deletions openedx_tagging/core/tagging/migrations/0013_tag_parent_blank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 3.2.22 on 2023-10-30 21:51

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('oel_tagging', '0012_language_taxonomy'),
]

operations = [
migrations.AlterField(
model_name='tag',
name='parent',
field=models.ForeignKey(blank=True, default=None, help_text='Tag that lives one level up from the current tag, forming a hierarchy.', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='children', to='oel_tagging.tag'),
),
]
49 changes: 40 additions & 9 deletions openedx_tagging/core/tagging/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class Tag(models.Model):
parent = models.ForeignKey(
"self",
null=True,
blank=True,
default=None,
on_delete=models.CASCADE,
related_name="children",
Expand Down Expand Up @@ -100,18 +101,30 @@ def get_lineage(self) -> Lineage:
Queries and returns the lineage of the current tag as a list of Tag.value strings.

The root Tag.value is first, followed by its child.value, and on down to self.value.

Performance note: may perform as many as TAXONOMY_MAX_DEPTH select queries.
"""
lineage: Lineage = []
tag: Tag | None = self
depth = TAXONOMY_MAX_DEPTH
while tag and depth > 0:
lineage.insert(0, tag.value)
tag = tag.parent
depth -= 1
lineage: Lineage = [self.value]
next_ancestor = self.get_next_ancestor()
while next_ancestor:
lineage.insert(0, next_ancestor.value)
next_ancestor = next_ancestor.get_next_ancestor()
return lineage

def get_next_ancestor(self) -> Tag | None:
"""
Fetch the parent of this Tag.

While doing so, preload several ancestors at the same time, so we can
use fewer database queries than the basic approach of iterating through
parent.parent.parent...
"""
if self.parent_id is None:
return None
if not Tag.parent.is_cached(self): # pylint: disable=no-member
# Parent is not yet loaded. Retrieve our parent, grandparent, and great-grandparent in one query.
# This is not actually changing the parent, just loading it and caching it.
self.parent = Tag.objects.select_related("parent", "parent__parent").get(pk=self.parent_id)
return self.parent

@cached_property
def depth(self) -> int:
"""
Expand Down Expand Up @@ -149,6 +162,20 @@ def child_count(self) -> int:
return self.taxonomy.tag_set.filter(parent=self).count()
return 0

def clean(self):
"""
Validate this tag before saving
"""
# Don't allow leading or trailing whitespace:
self.value = self.value.strip()
if self.external_id:
self.external_id = self.external_id.strip()
# Don't allow \t (tab) character at all, as we use it for lineage in database queries
if "\t" in self.value:
raise ValidationError("Tags in a taxonomy cannot contain a TAB character.")
if self.external_id and "\t" in self.external_id:
raise ValidationError("Tag external ID cannot contain a TAB character.")


class Taxonomy(models.Model):
"""
Expand Down Expand Up @@ -534,6 +561,7 @@ def add_tag(
tag = Tag.objects.create(
taxonomy=self, value=tag_value, parent=parent, external_id=external_id
)
tag.full_clean()

return tag

Expand Down Expand Up @@ -802,6 +830,9 @@ def clean(self):
raise ValidationError("Invalid _value - empty string")
if self.taxonomy and self.taxonomy.name != self._name:
raise ValidationError("ObjectTag's _name is out of sync with Taxonomy.name")
if "," in self.object_id or "*" in self.object_id:
# Some APIs may use these characters to allow wildcard matches or multiple matches in the future.
raise ValidationError("Object ID contains invalid characters")

def get_lineage(self) -> Lineage:
"""
Expand Down
48 changes: 44 additions & 4 deletions openedx_tagging/core/tagging/rest_api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"""
from __future__ import annotations

from typing import Any

from rest_framework import serializers
from rest_framework.reverse import reverse

Expand Down Expand Up @@ -61,22 +63,60 @@ class ObjectTagListQueryParamsSerializer(serializers.Serializer): # pylint: dis
)


class ObjectTagSerializer(serializers.ModelSerializer):
class ObjectTagMinimalSerializer(serializers.ModelSerializer):
"""
Serializer for the ObjectTag model.
Minimal serializer for the ObjectTag model.
"""

class Meta:
model = ObjectTag
fields = [
fields = ["value", "lineage"]

lineage = serializers.ListField(child=serializers.CharField(), source="get_lineage")


class ObjectTagSerializer(ObjectTagMinimalSerializer):
"""
Serializer for the ObjectTag model.
"""
class Meta:
bradenmacdonald marked this conversation as resolved.
Show resolved Hide resolved
fields = ObjectTagMinimalSerializer.Meta.fields + [
# The taxonomy name
"name",
"value",
"taxonomy_id",
# If the Tag or Taxonomy has been deleted, this ObjectTag shouldn't be shown to users.
"is_deleted",
]


class ObjectTagsByTaxonomySerializer(serializers.ModelSerializer):
"""
Serialize a group of ObjectTags, grouped by taxonomy
"""
def to_representation(self, instance: list[ObjectTag]) -> dict:
"""
Convert this list of ObjectTags to the serialized dictionary, grouped by Taxonomy
"""
by_object: dict[str, dict[str, Any]] = {}
for obj_tag in instance:
if obj_tag.object_id not in by_object:
by_object[obj_tag.object_id] = {
"taxonomies": []
}
taxonomies = by_object[obj_tag.object_id]["taxonomies"]
tax_entry = next((t for t in taxonomies if t["taxonomy_id"] == obj_tag.taxonomy_id), None)
if tax_entry is None:
tax_entry = {
"name": obj_tag.name,
"taxonomy_id": obj_tag.taxonomy_id,
"editable": (not obj_tag.taxonomy.cast().system_defined) if obj_tag.taxonomy else False,
"tags": []
}
taxonomies.append(tax_entry)
tax_entry["tags"].append(ObjectTagMinimalSerializer(obj_tag).data)
return by_object


class ObjectTagUpdateBodySerializer(serializers.Serializer): # pylint: disable=abstract-method
"""
Serializer of the body for the ObjectTag UPDATE view
Expand Down
1 change: 1 addition & 0 deletions openedx_tagging/core/tagging/rest_api/v1/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
router = DefaultRouter()
router.register("taxonomies", views.TaxonomyView, basename="taxonomy")
router.register("object_tags", views.ObjectTagView, basename="object_tag")
router.register("object_tag_counts", views.ObjectTagCountsView, basename="object_tag_counts")

urlpatterns = [
path("", include(router.urls)),
Expand Down
92 changes: 71 additions & 21 deletions openedx_tagging/core/tagging/rest_api/v1/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"""
from __future__ import annotations

from typing import Any

from django.db import models
from django.http import Http404, HttpResponse
from rest_framework import mixins, status
Expand All @@ -26,12 +28,13 @@
from ...data import TagDataQuerySet
from ...import_export.api import export_tags
from ...import_export.parsers import ParserFormat
from ...models import Taxonomy
from ...models import ObjectTag, Taxonomy
from ...rules import ObjectTagPermissionItem
from ..paginators import TAGS_THRESHOLD, DisabledTagsPagination, TagsPagination
from .permissions import ObjectTagObjectPermissions, TagObjectPermissions, TaxonomyObjectPermissions
from .serializers import (
ObjectTagListQueryParamsSerializer,
ObjectTagsByTaxonomySerializer,
ObjectTagSerializer,
ObjectTagUpdateBodySerializer,
ObjectTagUpdateQueryParamsSerializer,
Expand Down Expand Up @@ -265,10 +268,6 @@ class ObjectTagView(
* 400 - Invalid query parameter
* 403 - Permission denied

**Create Query Returns**
* 403 - Permission denied
* 405 - Method not allowed

**Update Parameters**
* object_id (required): - The Object ID to add ObjectTags for.

Expand Down Expand Up @@ -306,20 +305,20 @@ def get_queryset(self) -> models.QuerySet:
taxonomy = taxonomy.cast()
taxonomy_id = taxonomy.id

perm = "oel_tagging.view_objecttag"
perm_obj = ObjectTagPermissionItem(
taxonomy=taxonomy,
object_id=object_id,
)

if not self.request.user.has_perm(
perm,
# The obj arg expects a model, but we are passing an object
perm_obj, # type: ignore[arg-type]
):
raise PermissionDenied(
"You do not have permission to view object tags for this taxonomy or object_id."
)
if object_id.endswith("*") or "," in object_id: # pylint: disable=no-else-raise
raise ValidationError("Retrieving tags from multiple objects is not yet supported.")
# Note: This API is actually designed so that in the future it can be extended to return tags for multiple
# objects, e.g. if object_id.endswith("*") then it results in a object_id__startswith query. However, for
# now we have no use case for that so we retrieve tags for one object at a time.
else:
if not self.request.user.has_perm(
"oel_tagging.view_objecttag",
# The obj arg expects a model, but we are passing an object
ObjectTagPermissionItem(taxonomy=taxonomy, object_id=object_id), # type: ignore[arg-type]
):
raise PermissionDenied(
"You do not have permission to view object tags for this taxonomy or object_id."
)

return get_object_tags(object_id, taxonomy_id)

Expand All @@ -335,8 +334,13 @@ def retrieve(self, request, *args, **kwargs) -> Response:
behavior we want.
"""
object_tags = self.filter_queryset(self.get_queryset())
serializer = ObjectTagSerializer(object_tags, many=True)
return Response(serializer.data)
serializer = ObjectTagsByTaxonomySerializer(list(object_tags))
response_data = serializer.data
if self.kwargs["object_id"] not in response_data:
# For consistency, the key with the object_id should always be present in the response, even if there
# are no tags at all applied to this object.
response_data[self.kwargs["object_id"]] = {"taxonomies": []}
return Response(response_data)

def update(self, request, *args, **kwargs) -> Response:
"""
Expand Down Expand Up @@ -405,6 +409,52 @@ def update(self, request, *args, **kwargs) -> Response:
return self.retrieve(request, object_id)


@view_auth_classes
class ObjectTagCountsView(
mixins.RetrieveModelMixin,
GenericViewSet,
):
"""
View to retrieve the count of ObjectTags for all matching object IDs.

This API does NOT bother doing any permission checks as the "# of tags" is not considered sensitive information.

**Retrieve Parameters**
* object_id_pattern (required): - The Object ID to retrieve ObjectTags for. Can contain '*' at the end
for wildcard matching, or use ',' to separate multiple object IDs.

**Retrieve Example Requests**
GET api/tagging/v1/object_tag_counts/:object_id_pattern

**Retrieve Query Returns**
* 200 - Success
"""

serializer_class = ObjectTagSerializer
lookup_field = "object_id_pattern"

def retrieve(self, request, *args, **kwargs) -> Response:
"""
Retrieve the counts of object tags that belong to a given object_id pattern

Note: We override `retrieve` here instead of `list` because we are
passing in the Object ID (object_id) in the path (as opposed to passing
it in as a query_param) to retrieve the ObjectTag counts.
"""
# This API does NOT bother doing any permission checks as the # of tags is not considered sensitive information.
object_id_pattern = self.kwargs["object_id_pattern"]
qs: Any = ObjectTag.objects
if object_id_pattern.endswith("*"):
qs = qs.filter(object_id__startswith=object_id_pattern[0:len(object_id_pattern) - 1])
elif "*" in object_id_pattern:
raise ValidationError("Wildcard matches are only supported if the * is at the end.")
else:
qs = qs.filter(object_id__in=object_id_pattern.split(","))

qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id")
return Response({row["object_id"]: row["num_tags"] for row in qs})


@view_auth_classes
class TaxonomyTagsView(ListAPIView, RetrieveUpdateDestroyAPIView):
"""
Expand Down
Loading
Loading