Skip to content

Commit

Permalink
Add default redaction for more field types
Browse files Browse the repository at this point in the history
  • Loading branch information
hugorodgerbrown committed Sep 30, 2023
1 parent d6e4897 commit e1b68d5
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 21 deletions.
54 changes: 36 additions & 18 deletions anonymiser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

from django.db import models

from .settings import AUTO_REDACT_FIELD_FUNCS

# (old_value, new_value) tuple
AnonymisationResult: TypeAlias = tuple[Any, Any]

Expand All @@ -27,6 +29,12 @@ def get_model_fields(model: type[models.Model]) -> list[models.Field]:
]


def auto_redact(field: type[models.Field]) -> Any:
if func := AUTO_REDACT_FIELD_FUNCS.get(field.__class__):
return func(field)
return None


class _ModelBase:
# Override with the model to be anonymised
model: type[models.Model]
Expand All @@ -37,6 +45,19 @@ def get_model_fields(self) -> list[models.Field]:
raise NotImplementedError("model must be set")
return get_model_fields(self.model)

def exclude_from_anonymisation(self, queryset: models.QuerySet) -> models.QuerySet:
"""
Override in subclasses to exclude any objects from anonymisation.
Canonical example is to exclude certain users from anonymisation
- in this case the UserAnonymiser would override this method to
exclude e.g. is_staff=True users.
Default is a noop.
"""
return queryset


class AnonymiserBase(_ModelBase):
"""Base class for anonymisation functions."""
Expand Down Expand Up @@ -93,7 +114,7 @@ def anonymise_object(self, obj: models.Model) -> None:
def anonymise_queryset(self, queryset: Iterator[models.Model]) -> int:
"""Anonymise all objects in the queryset (and SAVE)."""
count = 0
for obj in queryset:
for obj in self.exclude_from_anonymisation(queryset):
self.anonymise_object(obj)
obj.save()
count += 1
Expand Down Expand Up @@ -139,13 +160,18 @@ def is_field_redaction_auto(self, field: models.Field) -> bool:
keys, unique fields, or in the auto_redact_exclude list.
"""
return (
self.auto_redact
and isinstance(field, (models.CharField, models.TextField))
and not field.choices
and not field.primary_key
and not getattr(field, "unique", False)
and field.name not in self.auto_redact_exclude
if not self.auto_redact:
return False
if field.name in self.auto_redact_exclude:
return False
if field.primary_key:
return False
if field.choices:
return False
if isinstance(field, models.UUIDField):
return self.auto_redact
return isinstance(field, tuple(AUTO_REDACT_FIELD_FUNCS.keys())) and not getattr(
field, "unique", False
)

def is_field_redaction_custom(self, field: models.Field) -> bool:
Expand All @@ -158,7 +184,7 @@ def is_field_redacted(self, field: models.Field) -> bool:
field
)

def auto_field_redactions(self) -> dict[str, str]:
def auto_field_redactions(self) -> dict[str, object | None]:
"""
Return a dict of redaction_values for all text fields.
Expand All @@ -167,16 +193,8 @@ def auto_field_redactions(self) -> dict[str, str]:
unique field.
"""

def _max_length(f: models.Field) -> int:
if isinstance(f, models.CharField):
return f.max_length
if isinstance(f, models.TextField):
return 400
raise ValueError("Field must be CharField or TextField")

return {
f.name: _max_length(f) * "X"
f.name: auto_redact(f)
for f in self.get_model_fields()
if self.is_field_redaction_auto(f)
}
Expand Down
51 changes: 51 additions & 0 deletions anonymiser/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

from typing import Any, Callable

from django.conf import settings as django_settings
from django.db import models
from django.utils import timezone

from .db.functions import GenerateUuid4


def default_redact_charfield(field: models.CharField) -> str:
return "X" * field.max_length


def default_redact_textfield(field: models.TextField) -> str:
return "X" * 400


def default_redact_datefield(field: models.DateField) -> str:
return timezone.now().date().isoformat()


def default_redact_datetimefield(field: models.DateTimeField) -> str:
return timezone.now().isoformat()


def default_redact_jsonfield(field: models.JSONField) -> dict[str, Any]:
return {}


def default_redact_uuidfield(field: models.UUIDField) -> str:
return GenerateUuid4()


AUTO_REDACT_FIELD_FUNCS: dict[
type[models.Model],
Callable[[type[models.Field]], Any],
] = {
models.CharField: default_redact_charfield,
models.TextField: default_redact_textfield,
models.DateField: default_redact_datefield,
models.DateTimeField: default_redact_datetimefield,
models.JSONField: default_redact_jsonfield,
models.UUIDField: default_redact_uuidfield,
}

# update map with any new field types or overrides declared in settings
AUTO_REDACT_FIELD_FUNCS.update(
getattr(django_settings, "ANONYMISER_AUTO_REDACT_FIELD_FUNCS", {})
)
11 changes: 8 additions & 3 deletions tests/test_models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest import mock

import freezegun
import pytest
from django.db import models

Expand All @@ -19,8 +20,8 @@
("biography", UserAnonymiser.FieldRedactionStrategy.AUTO),
("location", UserAnonymiser.FieldRedactionStrategy.AUTO),
# date / UUID not redacted automatically
("date_of_birth", UserAnonymiser.FieldRedactionStrategy.NONE),
("uuid", UserAnonymiser.FieldRedactionStrategy.NONE),
("date_of_birth", UserAnonymiser.FieldRedactionStrategy.AUTO),
("uuid", UserAnonymiser.FieldRedactionStrategy.AUTO),
],
)
def test_model_fields_redaction_strategy(
Expand Down Expand Up @@ -121,7 +122,7 @@ def test_redact_queryset_two(
(False, "London", "I am a test user"),
],
)
def test_redact_queryset__auto_redact(
def test_redact_queryset__auto_redact_with_override(
self,
user: User,
user_redacter: UserRedacter,
Expand Down Expand Up @@ -154,6 +155,7 @@ def test_redact_queryset__field_overrides__postgres(
user.refresh_from_db()
assert user.uuid != uuid

@freezegun.freeze_time("2021-01-01")
@mock.patch.object(UserRedacter, "get_model_fields")
def test_auto_redact(
self, mock_get_fields: mock.Mock, user_redacter: UserRedacter
Expand All @@ -163,6 +165,8 @@ def test_auto_redact(
models.CharField(name="char_field", max_length=255),
# redact to 400 chars
models.TextField(name="text_field"),
# redact to 400 chars
models.DateTimeField(name="date_field"),
# don't redact (choices)
models.CharField(name="choices", max_length=255, choices=[("a", "A")]),
# don't redact (unique)
Expand All @@ -176,6 +180,7 @@ def test_auto_redact(
assert user_redacter.auto_field_redactions() == {
"char_field": 255 * "X",
"text_field": 400 * "X",
"date_field": "2021-01-01",
}


Expand Down

0 comments on commit e1b68d5

Please sign in to comment.