From bf377001b39b6cb77b8cbb382e809547bf47440d Mon Sep 17 00:00:00 2001 From: Hugo Rodger-Brown Date: Mon, 18 Sep 2023 12:30:09 +0100 Subject: [PATCH] Update redaction --- anonymiser/models.py | 68 ++++++++++++++++++++++++++++++++--------- pyproject.toml | 2 ++ tests/test_models_pg.py | 3 +- 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/anonymiser/models.py b/anonymiser/models.py index 610c199..3679f55 100644 --- a/anonymiser/models.py +++ b/anonymiser/models.py @@ -69,7 +69,7 @@ class BaseAnonymiser: # override with a list of fields to exclude from anonymisation report exclude_rules = (lambda f: f.is_relation or isinstance(f, models.AutoField),) - # field_name: redaction_value. redactoin_value can be a static value or a + # field_name: redaction_value. redaction_value can be a static value or a # callable, such as a function (e.g. F expression) or a class (e.g. Func). field_redactions: dict[str, Any] = {} @@ -160,20 +160,58 @@ def post_anonymise_object( """ pass - # def collect_redactions(self) -> dict[str, Any]: - # """ - # Return a dict of field names to redaction functions. + def auto_field_redactions(self) -> dict[str, str]: + """ + Return a dict of redaction_values for all text fields. + + This is used to "auto-redact" all char/text fields with "X" - if + the field does not use choices, and is not a primary key or + unique field. + + """ - # This is used by the redact_queryset method to redact fields that - # support redaction. Each value can be a static value or a callable, - # such as a function (e.g. F expression) or a class (e.g. Func). + def _max_length(f: models.Field) -> int: + if isinstance(f, models.CharField): + return f.max_length + if isinstance(f, models.TextField): + return 400 + raise ValueError("Field must be CharField or TextField") - # """ - # return { - # f.name: getattr(self, f"redact_{f.name}") - # for f in self.get_model_fields() - # if hasattr(self, f"redact_{f.name}") - # } + return { + f.name: _max_length(f) * "X" + for f in self.get_model_fields() + if isinstance(f, (models.CharField, models.TextField)) + and not f.choices + and not f.primary_key + and not getattr(f, "unique", False) + } + + def redact_queryset( + self, + queryset: models.QuerySet[models.Model], + auto_redact: bool = True, + **field_overrides: Any, + ) -> int: + """ + Redact a queryset (and SAVE). - def redact_queryset(self, queryset: models.QuerySet[models.Model]) -> int: - return queryset.update(**self.field_redactions) + The `auto_redact` parameter will automatically redact all text + fields with "X" if they are not already covered in the + field_redactions dict. + + The `field_overrides` parameter allows you to pass in a dict of + field_name: redaction_value to override any other redactions. + + The redactions cascade in the following order: + + - auto_redactions (all non-choice text fields) + - field_redactions (static values set on the anonymiser) + - field_overrides (values passed in to method) + + """ + redactions: dict[str, Any] = {} + if auto_redact: + redactions.update(self.auto_field_redactions()) + redactions.update(self.field_redactions) + redactions.update(field_overrides) + return queryset.update(**redactions) diff --git a/pyproject.toml b/pyproject.toml index abec7ac..b980b3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,8 @@ python = "^3.10" django = "^3.2 || ^4.0 || ^5.0" [tool.poetry.dev-dependencies] +dj-database-url = "*" +psycopg2-binary = "*" black = "*" coverage = "*" freezegun = "*" diff --git a/tests/test_models_pg.py b/tests/test_models_pg.py index 11e9619..47a5ffb 100644 --- a/tests/test_models_pg.py +++ b/tests/test_models_pg.py @@ -45,7 +45,8 @@ def test_redact_queryset_one( uuid = user.uuid assert user_anonymiser.redact_queryset(User.objects.all()) == 1 user.refresh_from_db() - assert user.first_name == user_anonymiser.field_redactions["first_name"] + assert user.first_name == "FIRST_NAME" + assert user.last_name == "LAST_NAME" assert user.uuid != uuid def test_redact_queryset_two(