diff --git a/anonymiser/models.py b/anonymiser/models.py index 5efac38..46e4180 100644 --- a/anonymiser/models.py +++ b/anonymiser/models.py @@ -7,6 +7,8 @@ from django.db import models +from .settings import AUTO_REDACT_FIELD_FUNCS + # (old_value, new_value) tuple AnonymisationResult: TypeAlias = tuple[Any, Any] @@ -27,6 +29,12 @@ def get_model_fields(model: type[models.Model]) -> list[models.Field]: ] +def auto_redact(field: type[models.Field]) -> Any: + if func := AUTO_REDACT_FIELD_FUNCS.get(field.__class__): + return func(field) + return None + + class _ModelBase: # Override with the model to be anonymised model: type[models.Model] @@ -37,6 +45,19 @@ def get_model_fields(self) -> list[models.Field]: raise NotImplementedError("model must be set") return get_model_fields(self.model) + def exclude_from_anonymisation(self, queryset: models.QuerySet) -> models.QuerySet: + """ + Override in subclasses to exclude any objects from anonymisation. + + Canonical example is to exclude certain users from anonymisation + - in this case the UserAnonymiser would override this method to + exclude e.g. is_staff=True users. + + Default is a noop. + + """ + return queryset + class AnonymiserBase(_ModelBase): """Base class for anonymisation functions.""" @@ -93,7 +114,7 @@ def anonymise_object(self, obj: models.Model) -> None: def anonymise_queryset(self, queryset: Iterator[models.Model]) -> int: """Anonymise all objects in the queryset (and SAVE).""" count = 0 - for obj in queryset: + for obj in self.exclude_from_anonymisation(queryset): self.anonymise_object(obj) obj.save() count += 1 @@ -139,13 +160,18 @@ def is_field_redaction_auto(self, field: models.Field) -> bool: keys, unique fields, or in the auto_redact_exclude list. """ - return ( - self.auto_redact - and isinstance(field, (models.CharField, models.TextField)) - and not field.choices - and not field.primary_key - and not getattr(field, "unique", False) - and field.name not in self.auto_redact_exclude + if not self.auto_redact: + return False + if field.name in self.auto_redact_exclude: + return False + if field.primary_key: + return False + if field.choices: + return False + if isinstance(field, models.UUIDField): + return self.auto_redact + return isinstance(field, tuple(AUTO_REDACT_FIELD_FUNCS.keys())) and not getattr( + field, "unique", False ) def is_field_redaction_custom(self, field: models.Field) -> bool: @@ -158,7 +184,7 @@ def is_field_redacted(self, field: models.Field) -> bool: field ) - def auto_field_redactions(self) -> dict[str, str]: + def auto_field_redactions(self) -> dict[str, object | None]: """ Return a dict of redaction_values for all text fields. @@ -167,16 +193,8 @@ def auto_field_redactions(self) -> dict[str, str]: unique field. """ - - def _max_length(f: models.Field) -> int: - if isinstance(f, models.CharField): - return f.max_length - if isinstance(f, models.TextField): - return 400 - raise ValueError("Field must be CharField or TextField") - return { - f.name: _max_length(f) * "X" + f.name: auto_redact(f) for f in self.get_model_fields() if self.is_field_redaction_auto(f) } diff --git a/anonymiser/settings.py b/anonymiser/settings.py new file mode 100644 index 0000000..7d7f1c2 --- /dev/null +++ b/anonymiser/settings.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import Any, Callable + +from django.conf import settings as django_settings +from django.db import models +from django.utils import timezone + +from .db.functions import GenerateUuid4 + + +def default_redact_charfield(field: models.CharField) -> str: + return "X" * field.max_length + + +def default_redact_textfield(field: models.TextField) -> str: + return "X" * 400 + + +def default_redact_datefield(field: models.DateField) -> str: + return timezone.now().date().isoformat() + + +def default_redact_datetimefield(field: models.DateTimeField) -> str: + return timezone.now().isoformat() + + +def default_redact_jsonfield(field: models.JSONField) -> dict[str, Any]: + return {} + + +def default_redact_uuidfield(field: models.UUIDField) -> str: + return GenerateUuid4() + + +AUTO_REDACT_FIELD_FUNCS: dict[ + type[models.Model], + Callable[[type[models.Field]], Any], +] = { + models.CharField: default_redact_charfield, + models.TextField: default_redact_textfield, + models.DateField: default_redact_datefield, + models.DateTimeField: default_redact_datetimefield, + models.JSONField: default_redact_jsonfield, + models.UUIDField: default_redact_uuidfield, +} + +# update map with any new field types or overrides declared in settings +AUTO_REDACT_FIELD_FUNCS.update( + getattr(django_settings, "ANONYMISER_AUTO_REDACT_FIELD_FUNCS", {}) +) diff --git a/tests/test_models.py b/tests/test_models.py index 146bbe2..6fba8db 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,5 +1,6 @@ from unittest import mock +import freezegun import pytest from django.db import models @@ -19,8 +20,8 @@ ("biography", UserAnonymiser.FieldRedactionStrategy.AUTO), ("location", UserAnonymiser.FieldRedactionStrategy.AUTO), # date / UUID not redacted automatically - ("date_of_birth", UserAnonymiser.FieldRedactionStrategy.NONE), - ("uuid", UserAnonymiser.FieldRedactionStrategy.NONE), + ("date_of_birth", UserAnonymiser.FieldRedactionStrategy.AUTO), + ("uuid", UserAnonymiser.FieldRedactionStrategy.AUTO), ], ) def test_model_fields_redaction_strategy( @@ -121,7 +122,7 @@ def test_redact_queryset_two( (False, "London", "I am a test user"), ], ) - def test_redact_queryset__auto_redact( + def test_redact_queryset__auto_redact_with_override( self, user: User, user_redacter: UserRedacter, @@ -154,6 +155,7 @@ def test_redact_queryset__field_overrides__postgres( user.refresh_from_db() assert user.uuid != uuid + @freezegun.freeze_time("2021-01-01") @mock.patch.object(UserRedacter, "get_model_fields") def test_auto_redact( self, mock_get_fields: mock.Mock, user_redacter: UserRedacter @@ -163,6 +165,8 @@ def test_auto_redact( models.CharField(name="char_field", max_length=255), # redact to 400 chars models.TextField(name="text_field"), + # redact to 400 chars + models.DateTimeField(name="date_field"), # don't redact (choices) models.CharField(name="choices", max_length=255, choices=[("a", "A")]), # don't redact (unique) @@ -176,6 +180,7 @@ def test_auto_redact( assert user_redacter.auto_field_redactions() == { "char_field": 255 * "X", "text_field": 400 * "X", + "date_field": "2021-01-01", }