Skip to content

Commit

Permalink
Refactor model field summary data
Browse files Browse the repository at this point in the history
  • Loading branch information
hugorodgerbrown committed Sep 24, 2023
1 parent 789257a commit 4b3baa5
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 156 deletions.
78 changes: 10 additions & 68 deletions anonymiser/models.py
Original file line number Diff line number Diff line change
@@ -1,76 +1,29 @@
from __future__ import annotations

import logging
from collections import namedtuple
from dataclasses import dataclass
from enum import StrEnum
from enum import StrEnum # 3.11 only
from typing import Any, Iterator, TypeAlias

from django.db import models

# (old_value, new_value) tuple
AnonymisationResult: TypeAlias = tuple[Any, Any]

# Store info about the field and whether it is anonymisable
FieldSummaryTuple = namedtuple(
"FieldSummaryTuple", ("app", "model", "field", "type", "is_anonymisable")
)

logger = logging.getLogger(__name__)


@dataclass
class ModelFieldSummary:
def get_model_fields(model: type[models.Model]) -> list[models.Field]:
"""
Store info about the field and whether it is anonymisable.
Return a list of fields on the model.
This is used to generate a summary of the fields on a model, and how
they are anonymised / redacted - used to generate the documentation.
Removes any related_name fields.
"""

# python rejects "model" as a field name, so we use "app_model"
app_model: models.Model
model_field: models.Field
anonymiser: ModelAnonymiser | None

@property
def model_label(self) -> str:
return self.app_model._meta.label

@property
def app(self) -> str:
return self.app_model._meta.app_label

@property
def model(self) -> str:
return self.app_model._meta.object_name or ""

@property
def field_name(self) -> str:
return self.model_field.name

@property
def field_type(self) -> str:
return self.model_field.__class__.__name__

@property
def is_anonymised(self) -> bool:
if self.anonymiser:
return self.anonymiser.is_field_anonymised(self.model_field)
return False

@property
def is_redacted(self) -> bool:
if self.anonymiser:
return self.anonymiser.is_field_redacted(self.model_field)
return False

@property
def redaction_strategy(self) -> RedacterBase.FieldRedactionStratgy:
if self.anonymiser:
return self.anonymiser.field_redaction_strategy(self.model_field)
return RedacterBase.FieldRedactionStratgy.NONE
return [
f
for f in model._meta.get_fields()
if not isinstance(f, models.ForeignObjectRel)
]


class _ModelBase:
Expand All @@ -81,11 +34,7 @@ def get_model_fields(self) -> list[models.Field]:
"""Return a list of fields on the model."""
if not self.model:
raise NotImplementedError("model must be set")
return [
f
for f in self.model._meta.get_fields()
if not isinstance(f, models.ForeignObjectRel)
]
return get_model_fields(self.model)


class AnonymiserBase(_ModelBase):
Expand Down Expand Up @@ -277,10 +226,3 @@ class ModelAnonymiser(AnonymiserBase, RedacterBase):
for models that do not need to be anonymised.
"""

def get_model_field_summary(self) -> list[ModelFieldSummary]:
"""Return a list of ModelFieldSummary objects for the model."""
return [
ModelFieldSummary(app_model=self.model, model_field=f, anonymiser=self)
for f in self.get_model_fields()
]
80 changes: 74 additions & 6 deletions anonymiser/registry.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import dataclasses
import logging
import threading
from collections import defaultdict

from django.apps import apps
from django.db import models

from .models import ModelAnonymiser, ModelFieldSummary
from .models import ModelAnonymiser

lock = threading.Lock()
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -35,22 +36,89 @@ def register(anonymiser: type[ModelAnonymiser]) -> None:


def anonymisable_models() -> list[type[models.Model]]:
return list(_registry.keys())
with lock:
return list(_registry.keys())


def not_anonymisable_models() -> list[type[models.Model]]:
with lock:
return [m for m in apps.get_models() if m not in _registry]


def anonymisers() -> list[type[ModelAnonymiser]]:
return list(_registry.values())
with lock:
return list(_registry.values())


def get_model_anonymiser(
model: type[models.Model],
) -> ModelAnonymiser | None:
"""Return newly instantiated anonymiser for model."""
if anonymiser := _registry.get(model):
return anonymiser()
with lock:
if anonymiser := _registry.get(model):
return anonymiser()
return None


@dataclasses.dataclass
class ModelFieldSummary:
"""
Store info about the field and whether it is anonymisable.
This is used to generate a summary of the fields on a model, and how
they are anonymised / redacted - used to generate the documentation.
"""

field: models.Field
anonymiser: ModelAnonymiser | None = dataclasses.field(init=False)

def __post_init__(self) -> None:
self.anonymiser = get_model_anonymiser(self.model)

@property
def model(self) -> type[models.Model]:
return self.field.model

@property
def app_name(self) -> str:
return self.model._meta.app_label

@property
def model_name(self) -> str:
return self.model._meta.model_name

@property
def model_label(self) -> str:
return self.model._meta.label

@property
def field_name(self) -> str:
return self.field.name

@property
def field_type(self) -> str:
return self.field.__class__.__name__

@property
def is_anonymised(self) -> bool:
if self.anonymiser:
return self.anonymiser.is_field_anonymised(self.field)
return False

@property
def is_redacted(self) -> bool:
if self.anonymiser:
return self.anonymiser.is_field_redacted(self.field)
return False

@property
def redaction_strategy(self) -> ModelAnonymiser.FieldRedactionStratgy:
if self.anonymiser:
return self.anonymiser.field_redaction_strategy(self.field)
return ModelAnonymiser.FieldRedactionStratgy.NONE


def get_all_model_fields(
anonymised_only: bool = False,
) -> dict[str, list[ModelFieldSummary]]:
Expand All @@ -70,7 +138,7 @@ def get_all_model_fields(
if anonymised_only and not anonymiser:
continue
for f in m._meta.get_fields():
output[m._meta.label].append(ModelFieldSummary(m, f, anonymiser))
output[m._meta.label].append(ModelFieldSummary(f))
# sort fields by type then name - easier to scan.
output[m._meta.label].sort(key=lambda d: f"{d.field_type}.{d.field_name}")
return dict(output)
4 changes: 2 additions & 2 deletions anonymiser/templates/display_model_anonymisation.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
**DEMO PURPOSES ONLY**
## Model field anonymisation
App | Model | Field | Type | Anonymise | Redact
--- | --- | --- | --- | --- | ---{% for model,fields in model_fields.items %}{% for field in fields %}
{{ field.app }} | {{ field.model }} | {{ field.field_name }} | {{ field.field_type }} | {{ field.is_anonymised|default:"-" }} | {{ field.redaction_strategy|default:"-"|upper }}{% endfor %}{% endfor %}
--- | --- | --- | --- | --- | ---{% for model,fields in model_fields.items %}{% for field in fields %}
{{ field.app_name }} | {{ field.model_name }} | {{ field.field_name }} | {{ field.field_type }} | {{ field.is_anonymised|default:"-" }} | {{ field.redaction_strategy|default:"-"|upper }}{% endfor %}{% endfor %}
90 changes: 45 additions & 45 deletions tests/model_anonymisation.md
Original file line number Diff line number Diff line change
@@ -1,48 +1,48 @@
**DEMO PURPOSES ONLY**
## Model field anonymisation
App | Model | Field | Type | Anonymise | Redact
--- | --- | --- | --- | --- | ---
admin | LogEntry | id | AutoField | - | -
admin | LogEntry | object_repr | CharField | - | -
admin | LogEntry | action_time | DateTimeField | - | -
admin | LogEntry | content_type | ForeignKey | - | -
admin | LogEntry | user | ForeignKey | - | -
admin | LogEntry | action_flag | PositiveSmallIntegerField | - | -
admin | LogEntry | change_message | TextField | - | -
admin | LogEntry | object_id | TextField | - | -
auth | Group | id | AutoField | - | -
auth | Group | name | CharField | - | -
auth | Group | permissions | ManyToManyField | - | -
auth | Group | user | ManyToManyRel | - | -
auth | Permission | id | AutoField | - | -
auth | Permission | codename | CharField | - | -
auth | Permission | name | CharField | - | -
auth | Permission | content_type | ForeignKey | - | -
auth | Permission | group | ManyToManyRel | - | -
auth | Permission | user | ManyToManyRel | - | -
contenttypes | ContentType | id | AutoField | - | -
contenttypes | ContentType | app_label | CharField | - | -
contenttypes | ContentType | model | CharField | - | -
contenttypes | ContentType | logentry | ManyToOneRel | - | -
contenttypes | ContentType | permission | ManyToOneRel | - | -
sessions | Session | session_key | CharField | - | -
sessions | Session | expire_date | DateTimeField | - | -
sessions | Session | session_data | TextField | - | -
tests | User | id | AutoField | - | -
tests | User | is_active | BooleanField | - | -
tests | User | is_staff | BooleanField | - | -
tests | User | is_superuser | BooleanField | - | -
tests | User | first_name | CharField | True | CUSTOM
tests | User | last_name | CharField | - | AUTO
tests | User | location | CharField | - | AUTO
tests | User | password | CharField | - | AUTO
tests | User | username | CharField | - | -
tests | User | date_of_birth | DateField | - | -
tests | User | date_joined | DateTimeField | - | -
tests | User | last_login | DateTimeField | - | -
tests | User | email | EmailField | - | AUTO
tests | User | groups | ManyToManyField | - | -
tests | User | user_permissions | ManyToManyField | - | -
tests | User | logentry | ManyToOneRel | - | -
tests | User | biography | TextField | - | AUTO
tests | User | uuid | UUIDField | - | -
--- | --- | --- | --- | --- | ---
admin | logentry | id | AutoField | - | -
admin | logentry | object_repr | CharField | - | -
admin | logentry | action_time | DateTimeField | - | -
admin | logentry | content_type | ForeignKey | - | -
admin | logentry | user | ForeignKey | - | -
admin | logentry | action_flag | PositiveSmallIntegerField | - | -
admin | logentry | change_message | TextField | - | -
admin | logentry | object_id | TextField | - | -
auth | group | id | AutoField | - | -
auth | group | name | CharField | - | -
auth | group | permissions | ManyToManyField | - | -
auth | group | user | ManyToManyRel | - | -
auth | permission | id | AutoField | - | -
auth | permission | codename | CharField | - | -
auth | permission | name | CharField | - | -
auth | permission | content_type | ForeignKey | - | -
auth | permission | group | ManyToManyRel | - | -
auth | permission | user | ManyToManyRel | - | -
contenttypes | contenttype | id | AutoField | - | -
contenttypes | contenttype | app_label | CharField | - | -
contenttypes | contenttype | model | CharField | - | -
contenttypes | contenttype | logentry | ManyToOneRel | - | -
contenttypes | contenttype | permission | ManyToOneRel | - | -
sessions | session | session_key | CharField | - | -
sessions | session | expire_date | DateTimeField | - | -
sessions | session | session_data | TextField | - | -
tests | user | id | AutoField | - | -
tests | user | is_active | BooleanField | - | -
tests | user | is_staff | BooleanField | - | -
tests | user | is_superuser | BooleanField | - | -
tests | user | first_name | CharField | True | CUSTOM
tests | user | last_name | CharField | - | AUTO
tests | user | location | CharField | - | AUTO
tests | user | password | CharField | - | AUTO
tests | user | username | CharField | - | -
tests | user | date_of_birth | DateField | - | -
tests | user | date_joined | DateTimeField | - | -
tests | user | last_login | DateTimeField | - | -
tests | user | email | EmailField | - | AUTO
tests | user | groups | ManyToManyField | - | -
tests | user | user_permissions | ManyToManyField | - | -
tests | user | logentry | ManyToOneRel | - | -
tests | user | biography | TextField | - | AUTO
tests | user | uuid | UUIDField | - | -
47 changes: 13 additions & 34 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,23 @@
from django.db import models

from anonymiser.db.functions import GenerateUuid4
from anonymiser.models import ModelFieldSummary
from anonymiser.registry import ModelFieldSummary

from .anonymisers import BadUserAnonymiser, UserAnonymiser, UserRedacter
from .models import User


def test_model_fields_summary(user_anonymiser: UserAnonymiser) -> None:
f = lambda field_name: User._meta.get_field(field_name)
assert user_anonymiser.get_model_field_summary() == [
ModelFieldSummary(User, f("id"), user_anonymiser),
ModelFieldSummary(User, f("password"), user_anonymiser),
ModelFieldSummary(User, f("last_login"), user_anonymiser),
ModelFieldSummary(User, f("is_superuser"), user_anonymiser),
ModelFieldSummary(User, f("username"), user_anonymiser),
ModelFieldSummary(User, f("first_name"), user_anonymiser),
ModelFieldSummary(User, f("last_name"), user_anonymiser),
ModelFieldSummary(User, f("email"), user_anonymiser),
ModelFieldSummary(User, f("is_staff"), user_anonymiser),
ModelFieldSummary(User, f("is_active"), user_anonymiser),
ModelFieldSummary(User, f("date_joined"), user_anonymiser),
ModelFieldSummary(User, f("uuid"), user_anonymiser),
ModelFieldSummary(User, f("location"), user_anonymiser),
ModelFieldSummary(User, f("biography"), user_anonymiser),
ModelFieldSummary(User, f("date_of_birth"), user_anonymiser),
ModelFieldSummary(User, f("groups"), user_anonymiser),
ModelFieldSummary(User, f("user_permissions"), user_anonymiser),
]


def test_model_fields_data(user_anonymiser: UserAnonymiser) -> None:
mfs = ModelFieldSummary(User, User._meta.get_field("first_name"), user_anonymiser)
assert mfs.app == "tests"
assert mfs.model == "User"
assert mfs.field_name == "first_name"
assert mfs.field_type == "CharField"
assert mfs.is_anonymised is True
assert mfs.is_redacted is True
assert mfs.redaction_strategy == user_anonymiser.FieldRedactionStratgy.CUSTOM
# def test_model_fields_data() -> None:
# mfs = ModelFieldSummary(User._meta.get_field("first_name"))
# assert mfs.app_label == "tests"
# assert mfs.model == User
# assert mfs.model_label == "tests.User"
# assert mfs.field_name == "first_name"
# assert mfs.field_type == "CharField"
# assert mfs.anonymiser.__class__ == UserAnonymiser
# assert mfs.is_anonymised is True
# assert mfs.is_redacted is True
# assert mfs.redaction_strategy == UserAnonymiser.FieldRedactionStratgy.CUSTOM


@pytest.mark.parametrize(
Expand All @@ -61,7 +40,7 @@ def test_model_fields_redaction_strategy(
field_name: str, strategy: str, user_anonymiser: UserAnonymiser
) -> None:
field = User._meta.get_field(field_name)
mfs = ModelFieldSummary(User, field, user_anonymiser)
mfs = ModelFieldSummary(field)
assert mfs.redaction_strategy == strategy


Expand Down
Loading

0 comments on commit 4b3baa5

Please sign in to comment.