From e9a2ff5588297cf0d1498a6293bce198e718ca93 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 9 Nov 2023 12:29:00 -0800 Subject: [PATCH 1/3] feat: Make get_object_tag_counts available as a python API --- openedx_tagging/core/tagging/api.py | 20 +++++++++++++++++++ .../core/tagging/rest_api/v1/views.py | 19 ++++++------------ .../openedx_tagging/core/tagging/test_api.py | 18 +++++++++++++++++ .../core/tagging/test_views.py | 8 ++++++++ 4 files changed, 52 insertions(+), 13 deletions(-) diff --git a/openedx_tagging/core/tagging/api.py b/openedx_tagging/core/tagging/api.py index 8c5ed764..c9dc1b58 100644 --- a/openedx_tagging/core/tagging/api.py +++ b/openedx_tagging/core/tagging/api.py @@ -12,6 +12,8 @@ """ from __future__ import annotations +from typing import Any + from django.db import models, transaction from django.db.models import F, QuerySet, Value from django.db.models.functions import Coalesce, Concat, Lower @@ -185,6 +187,24 @@ def get_object_tags( return tags +def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]: + """ + Given an object ID, a "starts with" glob pattern like + "course-v1:foo+bar+baz@*", or a list of "comma,separated,IDs", return a + dict of matching object IDs and how many tags each object has. + """ + qs: Any = ObjectTag.objects + if object_id_pattern.endswith("*"): + qs = qs.filter(object_id__startswith=object_id_pattern[0:len(object_id_pattern) - 1]) + elif "*" in object_id_pattern: + raise ValueError("Wildcard matches are only supported if the * is at the end.") + else: + qs = qs.filter(object_id__in=object_id_pattern.split(",")) + + qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id") + return {row["object_id"]: row["num_tags"] for row in qs} + + def delete_object_tags(object_id: str): """ Delete all ObjectTag entries for a given object. diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py index 170a80c5..a531e5bc 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/views.py +++ b/openedx_tagging/core/tagging/rest_api/v1/views.py @@ -3,8 +3,6 @@ """ from __future__ import annotations -from typing import Any - from django.db import models from django.http import Http404, HttpResponse from rest_framework import mixins, status @@ -20,6 +18,7 @@ add_tag_to_taxonomy, create_taxonomy, delete_tags_from_taxonomy, + get_object_tag_counts, get_object_tags, get_taxonomies, get_taxonomy, @@ -29,7 +28,7 @@ from ...data import TagDataQuerySet from ...import_export.api import export_tags, get_last_import_log, import_tags from ...import_export.parsers import ParserFormat -from ...models import ObjectTag, Taxonomy +from ...models import Taxonomy from ...rules import ObjectTagPermissionItem from ..paginators import TAGS_THRESHOLD, DisabledTagsPagination, TagsPagination from .permissions import ObjectTagObjectPermissions, TagObjectPermissions, TaxonomyObjectPermissions @@ -517,16 +516,10 @@ def retrieve(self, request, *args, **kwargs) -> Response: """ # This API does NOT bother doing any permission checks as the # of tags is not considered sensitive information. object_id_pattern = self.kwargs["object_id_pattern"] - qs: Any = ObjectTag.objects - if object_id_pattern.endswith("*"): - qs = qs.filter(object_id__startswith=object_id_pattern[0:len(object_id_pattern) - 1]) - elif "*" in object_id_pattern: - raise ValidationError("Wildcard matches are only supported if the * is at the end.") - else: - qs = qs.filter(object_id__in=object_id_pattern.split(",")) - - qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id") - return Response({row["object_id"]: row["num_tags"] for row in qs}) + try: + return Response(get_object_tag_counts(object_id_pattern)) + except ValueError as err: + raise ValidationError(err.args[0]) from err @view_auth_classes diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index a8c2e756..665a6b8e 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -673,3 +673,21 @@ def test_autocomplete_tags_closed_omit_object(self) -> None: # "Bacteria (children: 2)", # does not contain "cha" but a child does # " Archaebacteria (children: 0)", ] + + def test_get_object_tag_counts(self) -> None: + """ + Smoke test of get_object_tag_counts + """ + obj1 = "object_id1" + obj2 = "object_id2" + other = "other_object" + # Give each object 1-2 tags: + tagging_api.tag_object(object_id=obj1, taxonomy=self.taxonomy, tags=["DPANN"]) + tagging_api.tag_object(object_id=obj2, taxonomy=self.taxonomy, tags=["Chordata"]) + tagging_api.tag_object(object_id=obj2, taxonomy=self.free_text_taxonomy, tags=["has a notochord"]) + tagging_api.tag_object(object_id=other, taxonomy=self.free_text_taxonomy, tags=["other"]) + + assert tagging_api.get_object_tag_counts(obj1) == {obj1: 1} + assert tagging_api.get_object_tag_counts(obj2) == {obj2: 2} + assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}") == {obj1: 1, obj2: 2} + assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 2} diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index c11de1a2..a6886ae1 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -1025,6 +1025,14 @@ def check(object_id_pattern: str): "course07-unit02-problem02": 2, } + def test_get_counts_invalid_spec(self): + """ + Test handling of an invalid object tag pattern + """ + result = self.client.get(OBJECT_TAG_COUNTS_URL.format(object_id_pattern="abc*def")) + assert result.status_code == status.HTTP_400_BAD_REQUEST + assert "Wildcard matches are only supported if the * is at the end." in str(result.content) + class TestTaxonomyTagsView(TestTaxonomyViewMixin): """ From b42dfabd00389133d7e03447ccb3970071156f74 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 9 Nov 2023 14:04:05 -0800 Subject: [PATCH 2/3] feat: APIs should not return deleted/disabled tags by default --- openedx_tagging/core/tagging/api.py | 20 +++- .../openedx_tagging/core/tagging/test_api.py | 112 ++++++++++++------ .../core/tagging/test_models.py | 6 +- .../core/tagging/test_views.py | 3 + 4 files changed, 101 insertions(+), 40 deletions(-) diff --git a/openedx_tagging/core/tagging/api.py b/openedx_tagging/core/tagging/api.py index c9dc1b58..64d5c6a8 100644 --- a/openedx_tagging/core/tagging/api.py +++ b/openedx_tagging/core/tagging/api.py @@ -159,6 +159,7 @@ def resync_object_tags(object_tags: QuerySet | None = None) -> int: def get_object_tags( object_id: str, taxonomy_id: int | None = None, + include_deleted: bool = False, object_tag_class: type[ObjectTag] = ObjectTag ) -> QuerySet[ObjectTag]: """ @@ -167,8 +168,16 @@ def get_object_tags( Pass taxonomy_id to limit the returned object_tags to a specific taxonomy. """ filters = {"taxonomy_id": taxonomy_id} if taxonomy_id else {} + base_qs = ( + object_tag_class.objects + .filter(object_id=object_id, **filters) + .exclude(taxonomy__enabled=False) # Exclude if the whole taxonomy is disabled + ) + if not include_deleted: + base_qs = base_qs.exclude(taxonomy_id=None) # Exclude if the whole taxonomy was deleted + base_qs = base_qs.exclude(tag_id=None, taxonomy__allow_free_text=False) # Exclude if just the tag is deleted tags = ( - object_tag_class.objects.filter(object_id=object_id, **filters) + base_qs # Preload related objects, including data for the "get_lineage" method on ObjectTag/Tag: .select_related("taxonomy", "tag", "tag__parent", "tag__parent__parent") # Sort the tags within each taxonomy in "tree order". See Taxonomy._get_filtered_tags_deep for details on this: @@ -192,7 +201,11 @@ def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]: Given an object ID, a "starts with" glob pattern like "course-v1:foo+bar+baz@*", or a list of "comma,separated,IDs", return a dict of matching object IDs and how many tags each object has. + + Deleted tags and disabled taxonomies are excluded from the counts, even if + ObjectTag data about them is present. """ + # Note: in the future we may add an option to exclude system taxonomies from the count. qs: Any = ObjectTag.objects if object_id_pattern.endswith("*"): qs = qs.filter(object_id__startswith=object_id_pattern[0:len(object_id_pattern) - 1]) @@ -200,7 +213,10 @@ def get_object_tag_counts(object_id_pattern: str) -> dict[str, int]: raise ValueError("Wildcard matches are only supported if the * is at the end.") else: qs = qs.filter(object_id__in=object_id_pattern.split(",")) - + # Don't include deleted tags or disabled taxonomies: + qs = qs.exclude(taxonomy_id=None) # The whole taxonomy was deleted + qs = qs.exclude(taxonomy__enabled=False) # The whole taxonomy is disabled + qs = qs.exclude(tag_id=None, taxonomy__allow_free_text=False) # The taxonomy exists but the tag is deleted qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id") return {row["object_id"]: row["num_tags"] for row in qs} diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index 665a6b8e..4173c5a1 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -265,7 +265,7 @@ def test_resync_object_tags(self) -> None: tagging_api.tag_object(open_taxonomy, ["foo", "bar"], object_id) # Free text tags # At first, none of these will be deleted: - assert [(t.value, t.is_deleted) for t in tagging_api.get_object_tags(object_id)] == [ + assert [(t.value, t.is_deleted) for t in tagging_api.get_object_tags(object_id, include_deleted=True)] == [ ("bar", False), ("foo", False), (self.archaea.value, False), @@ -275,7 +275,7 @@ def test_resync_object_tags(self) -> None: # Delete "bacteria" from the taxonomy: tagging_api.delete_tags_from_taxonomy(self.taxonomy, [self.bacteria.value], with_subtags=True) - assert [(t.value, t.is_deleted) for t in tagging_api.get_object_tags(object_id)] == [ + assert [(t.value, t.is_deleted) for t in tagging_api.get_object_tags(object_id, include_deleted=True)] == [ ("bar", False), ("foo", False), (self.archaea.value, False), @@ -293,7 +293,7 @@ def test_resync_object_tags(self) -> None: assert changed == 1 # Now the tag is not deleted: - assert [(t.value, t.is_deleted) for t in tagging_api.get_object_tags(object_id)] == [ + assert [(t.value, t.is_deleted) for t in tagging_api.get_object_tags(object_id, include_deleted=True)] == [ ("bar", False), ("foo", False), (self.archaea.value, False), @@ -342,19 +342,21 @@ def test_tag_object(self): assert object_tag.name == self.taxonomy.name assert object_tag.object_id == "biology101" - def test_tag_object_free_text(self): - self.taxonomy.allow_free_text = True + def test_tag_object_free_text(self) -> None: + """ + Test tagging an object using a free text taxonomy + """ tagging_api.tag_object( - self.taxonomy, - ["Eukaryota Xenomorph"], - "biology101", + object_id="biology101", + taxonomy=self.free_text_taxonomy, + tags=["Eukaryota Xenomorph"], ) object_tags = tagging_api.get_object_tags("biology101") assert len(object_tags) == 1 object_tag = object_tags[0] object_tag.full_clean() # Should not raise any ValidationErrors - assert object_tag.taxonomy == self.taxonomy - assert object_tag.name == self.taxonomy.name + assert object_tag.taxonomy == self.free_text_taxonomy + assert object_tag.name == self.free_text_taxonomy.name assert object_tag._value == "Eukaryota Xenomorph" # pylint: disable=protected-access assert object_tag.get_lineage() == ["Eukaryota Xenomorph"] assert object_tag.object_id == "biology101" @@ -561,32 +563,42 @@ def test_tag_object_limit(self) -> None: assert exc.exception assert "Cannot add more than 100 tags to" in str(exc.exception) - def test_get_object_tags(self) -> None: - # Alpha tag has no taxonomy (as if the taxonomy had been deleted) - alpha = ObjectTag(object_id="abc") - alpha.name = self.taxonomy.name - alpha.value = "alpha" - alpha.save() - # Beta tag has a closed taxonomy - beta = ObjectTag.objects.create( - object_id="abc", - taxonomy=self.taxonomy, - tag=self.taxonomy.tag_set.get(value="Protista"), - ) - - # Fetch all the tags for a given object ID - assert list( - tagging_api.get_object_tags(object_id="abc") - ) == [ - alpha, - beta, + def test_get_object_tags_deleted_disabled(self) -> None: + """ + Test that get_object_tags doesn't return tags from disabled taxonomies + or tags that have been deleted or taxonomies that have been deleted. + """ + obj_id = "object_id1" + self.taxonomy.allow_multiple = True + self.taxonomy.save() + disabled_taxonomy = tagging_api.create_taxonomy("Disabled Taxonomy", allow_free_text=True) + tagging_api.tag_object(object_id=obj_id, taxonomy=self.taxonomy, tags=["DPANN", "Chordata"]) + tagging_api.tag_object(object_id=obj_id, taxonomy=self.language_taxonomy, tags=["English"]) + tagging_api.tag_object(object_id=obj_id, taxonomy=self.free_text_taxonomy, tags=["has a notochord"]) + tagging_api.tag_object(object_id=obj_id, taxonomy=disabled_taxonomy, tags=["disabled tag"]) + + def get_object_tags(): + return [f"{ot.name}: {'>'.join(ot.get_lineage())}" for ot in tagging_api.get_object_tags(obj_id)] + + # Before deleting/disabling: + assert get_object_tags() == [ + "Disabled Taxonomy: disabled tag", + "Free Text: has a notochord", + "Languages: English", + "Life on Earth: Archaea>DPANN", + "Life on Earth: Eukaryota>Animalia>Chordata", ] - # Fetch all the tags for a given object ID + taxonomy - assert list( - tagging_api.get_object_tags(object_id="abc", taxonomy_id=self.taxonomy.pk) - ) == [ - beta, + # Now delete and disable things: + disabled_taxonomy.enabled = False + disabled_taxonomy.save() + self.free_text_taxonomy.delete() + tagging_api.delete_tags_from_taxonomy(self.taxonomy, ["DPANN"], with_subtags=False) + + # Now retrieve the tags again: + assert get_object_tags() == [ + "Languages: English", + "Life on Earth: Eukaryota>Animalia>Chordata", ] @ddt.data( @@ -676,7 +688,7 @@ def test_autocomplete_tags_closed_omit_object(self) -> None: def test_get_object_tag_counts(self) -> None: """ - Smoke test of get_object_tag_counts + Basic test of get_object_tag_counts """ obj1 = "object_id1" obj2 = "object_id2" @@ -691,3 +703,33 @@ def test_get_object_tag_counts(self) -> None: assert tagging_api.get_object_tag_counts(obj2) == {obj2: 2} assert tagging_api.get_object_tag_counts(f"{obj1},{obj2}") == {obj1: 1, obj2: 2} assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 2} + + def test_get_object_tag_counts_deleted_disabled(self) -> None: + """ + Test that get_object_tag_counts doesn't "count" disabled taxonomies or + deleted tags. + """ + obj1 = "object_id1" + obj2 = "object_id2" + # Give each object 2 tags: + tagging_api.tag_object(object_id=obj1, taxonomy=self.taxonomy, tags=["DPANN"]) + tagging_api.tag_object(object_id=obj1, taxonomy=self.language_taxonomy, tags=["English"]) + tagging_api.tag_object(object_id=obj2, taxonomy=self.taxonomy, tags=["Chordata"]) + tagging_api.tag_object(object_id=obj2, taxonomy=self.free_text_taxonomy, tags=["has a notochord"]) + + # Before we delete tags / disable taxonomies, the counts are two each: + assert tagging_api.get_object_tag_counts("object_*") == {obj1: 2, obj2: 2} + # Delete the "DPANN" tag from self.taxonomy: + tagging_api.delete_tags_from_taxonomy(self.taxonomy, tags=["DPANN"], with_subtags=False) + assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 2} + # Disable the free text taxonomy: + self.free_text_taxonomy.enabled = False + self.free_text_taxonomy.save() + assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 1} + + # But, by the way, if we re-enable the taxonomy and restore the tag, the counts return: + self.free_text_taxonomy.enabled = True + self.free_text_taxonomy.save() + assert tagging_api.get_object_tag_counts("object_*") == {obj1: 1, obj2: 2} + tagging_api.add_tag_to_taxonomy(self.taxonomy, "DPANN", parent_tag_value="Archaea") + assert tagging_api.get_object_tag_counts("object_*") == {obj1: 2, obj2: 2} diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index 88bfeaad..367b735b 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -766,7 +766,7 @@ def test_is_deleted(self): api.tag_object(self.free_text_taxonomy, ["foo", "bar", "tribble"], object_id) # Free text tags # At first, none of these will be deleted: - assert [(t.value, t.is_deleted) for t in api.get_object_tags(object_id)] == [ + assert [(t.value, t.is_deleted) for t in api.get_object_tags(object_id, include_deleted=True)] == [ ("bar", False), ("foo", False), ("tribble", False), @@ -777,7 +777,7 @@ def test_is_deleted(self): # Delete "bacteria" from the taxonomy: api.delete_tags_from_taxonomy(self.taxonomy, ["Bacteria"], with_subtags=True) - assert [(t.value, t.is_deleted) for t in api.get_object_tags(object_id)] == [ + assert [(t.value, t.is_deleted) for t in api.get_object_tags(object_id, include_deleted=True)] == [ ("bar", False), ("foo", False), ("tribble", False), @@ -788,7 +788,7 @@ def test_is_deleted(self): # Then delete the whole free text taxonomy self.free_text_taxonomy.delete() - assert [(t.value, t.is_deleted) for t in api.get_object_tags(object_id)] == [ + assert [(t.value, t.is_deleted) for t in api.get_object_tags(object_id, include_deleted=True)] == [ ("bar", True), # <--- Deleted, but the value is preserved ("foo", True), # <--- Deleted, but the value is preserved ("tribble", True), # <--- Deleted, but the value is preserved diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index a6886ae1..2dd3593d 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -926,6 +926,9 @@ def test_tag_object_clear(self, user_attr, taxonomy_flags, expected_status): assert response.data[object_id]["taxonomies"] == [] else: # Make sure the object tags are unchanged: + if not self.taxonomy.enabled: # The taxonomy has to be enabled for us to see the tags though: + self.taxonomy.enabled = True + self.taxonomy.save() assert [ot.value for ot in api.get_object_tags(object_id=object_id)] == ["Fungi"] @ddt.data( From 9c19cf3816dd7f3adfa5e2bca05ec89647ce675a Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Mon, 13 Nov 2023 12:27:25 -0800 Subject: [PATCH 3/3] chore: Version bump --- openedx_learning/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index a5b838c5..ade40e2c 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -1,4 +1,4 @@ """ Open edX Learning ("Learning Core"). """ -__version__ = "0.3.3" +__version__ = "0.3.4"