From 6d5bbacc2e956891d8cbd2b2481df8c80dabbabb Mon Sep 17 00:00:00 2001 From: brayo Date: Thu, 4 Jul 2024 14:21:41 +0300 Subject: [PATCH] docs: replace renaming events with a redaction use case --- src/examples/querying-data.rst | 3 +-- src/examples/raw_events.py | 48 +++++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/examples/querying-data.rst b/src/examples/querying-data.rst index accbadb..a060dc1 100644 --- a/src/examples/querying-data.rst +++ b/src/examples/querying-data.rst @@ -84,10 +84,9 @@ It is possible to fetch the raw events from a bucket. This is useful if you want Example fetching raw events from the "aw-watcher-window_" bucket: This is an example that you can run in a Python to fetch raw events posted by the window watcher. - The scripts sums the time spent on each window title and later renames some of the titles. + The scripts sums the time spent on each window title and showcases a data redaction use case. .. literalinclude:: raw_events.py - .. note:: Inserting events with the same id, will overwrite the previous event with the same id. This is useful if you want to update an event, but if you want to keep both events you should use a different id. .. TODO `Bucket REST API <./rest.html#get-events>`_ diff --git a/src/examples/raw_events.py b/src/examples/raw_events.py index 7c2d210..93013e3 100644 --- a/src/examples/raw_events.py +++ b/src/examples/raw_events.py @@ -1,6 +1,9 @@ #!/usr/bin/env python3 from aw_client import ActivityWatchClient from aw_core.models import Event +import re +from typing import Pattern, cast, List, Set +from copy import deepcopy client = ActivityWatchClient("test-client", testing=True) @@ -23,10 +26,43 @@ print(time_spent) -# Rename app "Code" to "VS Code" in the events -for event in events: - if event.data["app"] == "Code": - event.data["app"] = "VS Code" - new_event = Event(timestamp=event.timestamp, duration=event.duration, data=event.data, id=event.id) # Reuses the same id - client.insert_event(bucket_id, new_event) # replace the old event with the new one +# sensitive data pattern +pattern = re.compile(r"Binance|Metamask|TrustWallet|Trust Wallet") + +# what to replace sensitive data with +REDACTED = "REDACTED" + +def _redact_event(e: Event, pattern: Pattern) -> Event: + e = deepcopy(e) + for k, v in e.data.items(): + if isinstance(v, str): + if pattern.findall(v.lower()): + e.data[k] = REDACTED + return e + +def _find_sensitive(el: List[Event], pattern: Pattern) -> Set: + sensitive_ids = set() + for e in el: + if _check_event(e, pattern): + sensitive_ids.add(e.id) + return sensitive_ids + +def _check_event(e: Event, pattern: Pattern) -> bool: + for k, v in e.data.items(): + if isinstance(v, str): + if pattern.findall(v.lower()): + return True + return False + +sensitive_ids = _find_sensitive(events, pattern) +for e in events: + print(f"Event id: {e.id}") + if e.id in sensitive_ids: + e_before = e + e = _redact_event(e, pattern) + print(f"\nData before: {e_before.data}") + print(f"Data after: {e.data}") + client.delete_event(bucket_id, cast(int, e_before.id)) + client.insert_event(bucket_id, e) + print("Redacted event")