Skip to content

Commit

Permalink
adding tests for rulenodeprocessor. move fallback logic for rule repl…
Browse files Browse the repository at this point in the history
…ace to initializaiotn
  • Loading branch information
davidx33 committed Nov 22, 2024
1 parent 4774c97 commit 9a46b6c
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 10 deletions.
13 changes: 4 additions & 9 deletions python/langsmith/anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ def __init__(self, rules: List[StringNodeRule]):
"pattern": rule["pattern"]
if isinstance(rule["pattern"], re.Pattern)
else re.compile(rule["pattern"]),
"replace": rule.get("replace"),
"replace": rule["replace"]
if isinstance(rule.get("replace"), str)
else "[redacted]",
}
for rule in rules
]
Expand All @@ -106,14 +108,7 @@ def mask_nodes(self, nodes: List[StringNode]) -> List[StringNode]:
for item in nodes:
new_value = item["value"]
for rule in self.rules:
new_value = rule["pattern"].sub(
(
rule["replace"]
if isinstance(rule["replace"], str)
else "[redacted]"
),
new_value,
)
new_value = rule["pattern"].sub(rule["replace"], new_value)
if new_value != item["value"]:
result.append(StringNode(value=new_value, path=item["path"]))
return result
Expand Down
48 changes: 47 additions & 1 deletion python/tests/unit_tests/test_anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pydantic import BaseModel

from langsmith import Client, traceable, tracing_context
from langsmith.anonymizer import StringNodeRule, create_anonymizer
from langsmith.anonymizer import StringNodeRule, create_anonymizer, RuleNodeProcessor

EMAIL_REGEX = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
UUID_REGEX = re.compile(
Expand Down Expand Up @@ -139,3 +139,49 @@ def my_func(body: str, from_: MyInput) -> MyOutput:
if "inputs" in patched_data:
assert patched_data["inputs"] == expected_inputs
assert patched_data["outputs"] == expected_outputs

def test_rule_node_processor_scrub_sensitive_info():
rules = [
StringNodeRule(pattern=re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), replace="[ssn]"),
StringNodeRule(
pattern=re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"),
replace="[email]",
),
StringNodeRule(
pattern=re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"), replace="[phone]"
),
]
processor = RuleNodeProcessor(rules)

nodes = [
{"value": "My SSN is 123-45-6789.", "path": ["field1"]},
{"value": "Contact me at [email protected].", "path": ["field2"]},
{"value": "Call me on 123-456-7890.", "path": ["field3"]},
]

expected = [
{"value": "My SSN is [ssn].", "path": ["field1"]},
{"value": "Contact me at [email].", "path": ["field2"]},
{"value": "Call me on [phone].", "path": ["field3"]},
]

result = processor.mask_nodes(nodes)

assert result == expected

def test_rule_node_processor_default_replace():
rules = [
StringNodeRule(pattern=re.compile(r"sensitive")),
]
processor = RuleNodeProcessor(rules)

nodes = [
{"value": "This contains sensitive data", "path": ["field1"]},
]

expected = [
{"value": "This contains [redacted] data", "path": ["field1"]},
]

result = processor.mask_nodes(nodes)
assert result == expected

0 comments on commit 9a46b6c

Please sign in to comment.