From ee3ef60c90a80d9d22f08f49cd5f8f71ac6da2fb Mon Sep 17 00:00:00 2001 From: cmdoret Date: Mon, 5 Aug 2024 17:23:32 +0200 Subject: [PATCH 01/20] test: update example config --- tests/data/config.yaml | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/data/config.yaml b/tests/data/config.yaml index d61b42a..159412d 100644 --- a/tests/data/config.yaml +++ b/tests/data/config.yaml @@ -1,15 +1,21 @@ +# Invert the matching rules for subjects and objects. +invert: false + # hash URIs of people and online accounts -replace_uri_of_nodes_with_type: +subjects: + of_type: - "http://xmlns.com/foaf/0.1/Person" # All nodes which are rdf:type Person - "http://xmlns.com/foaf/OnlineAccount" # "" OnlineAccount -# hash name only for instances of person and online account -replace_values_of_subject_predicate: - "http://xmlns.com/foaf/OnlineAccount": +objects: + # hash accesscode values for all nodes + on_predicate: + - "http://schema.org/accessCode" + #on_type: # NOTE: not currently supported + #- "http://example.org/UserAccount" + # hash name only for instances of person and online account + on_type_predicate: + "http://xmlns.com/foaf/OnlineAccount": - "http://schema.org/name" - "http://xmlns.com/foaf/0.1/Person": + "http://xmlns.com/foaf/0.1/Person": - "http://schema.org/name" - -# hash accesscode values for all nodes -replace_value_of_predicate: - - "http://schema.org/accessCode" From 1a2a7d68eb47351c134127f0b56800e69a30fc7f Mon Sep 17 00:00:00 2001 From: cmdoret Date: Mon, 5 Aug 2024 18:44:09 +0200 Subject: [PATCH 02/20] refactor(rules): rework matching + adapt to new fmt --- src/rules.rs | 186 ++++++++++++++++++++++++++++----------------------- 1 file changed, 103 insertions(+), 83 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index a3ffdca..371eb90 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -5,109 +5,118 @@ use serde::{Deserialize, Serialize}; use crate::model::TripleMask; #[derive(Serialize, Deserialize, Debug, Default)] -pub struct Rules { +struct SubjectRules { // Replace values of nodes with a certain type. - pub replace_uri_of_nodes_with_type: HashSet, - - // Replace values of `subject` & `predicate`. - pub replace_values_of_subject_predicate: HashMap>, + of_type: HashSet, +} +#[derive(Serialize, Deserialize, Debug, Default)] +struct ObjectRules { // Replace values in matched `predicates`. - pub replace_value_of_predicate: HashSet, + on_predicate: HashSet, + // Replace values of predicates for specific types + on_type_predicate: HashMap>, } -pub fn match_type_rule_named_node( - is_subject: bool, - n: &NamedNode, - mask: TripleMask, - rules: &Rules, - type_map: &HashMap, -) -> TripleMask { - let iri_type = if let Some(v) = type_map.get(&n.iri) { - v - } else { - // Not in the type map. - return mask; - }; +#[derive(Serialize, Deserialize, Debug, Default)] +pub struct Rules { + // Invert all matchings + pub invert: bool, - if !rules.replace_uri_of_nodes_with_type.contains(iri_type) { - // Not in the rules. - return mask; + pub subjects: SubjectRules, + + pub objects: ObjectRules, + +} + +/// Check if the type of input instance URI is in the rules. +fn match_type(subject: &str, rules: &Rules, type_map: &HashMap) -> bool { + match type_map.get(subject) { + Some(v) => rules.subjects.of_type.contains(v), + None => false, } +} - return if is_subject { - mask | TripleMask::SUBJECT - } else { - mask | TripleMask::OBJECT - }; +/// Check if the predicate URI is in the rules. +fn match_predicate(predicate: &str, rules: &Rules) -> bool { + rules.objects.on_predicate.contains(predicate) } -pub fn match_type_rule_subject( - subject: &Subject, - mask: TripleMask, +/// Check if the combination of subject type and predicate URIs is in the rules. +fn match_type_predicate( + subject: &str, + predicate: &str, type_map: &HashMap, rules: &Rules, -) -> TripleMask { - match subject { - Subject::NamedNode(n) => { - return mask | match_type_rule_named_node(true, n, mask, rules, type_map); - } - Subject::BlankNode(_) => return mask, +) -> bool { + + let subject_type = match type_map.get(subject) { + None => return false, + Some(v) => v + }; + let preds = rules.objects.on_type_predicate.get(subject_type); + if preds.is_none() || !preds.unwrap().contains(predicate) { + return false } + + return true } -pub fn match_type_rule_object( - object: &Term, - mask: TripleMask, - type_map: &HashMap, +pub fn match_subject_rules( + triple: &Triple, rules: &Rules, + type_map: &HashMap, ) -> TripleMask { - match object { + let pseudo_subject = match &triple.subject { + Subject::NamedNode(n) => { + match_type(&n.iri, rules, type_map) + }, + _ => false, + }; + let pseudo_object = match &triple.object { Term::NamedNode(n) => { - return mask | match_type_rule_named_node(false, n, mask, rules, type_map); - } - _ => return mask, - } -} + match_type(&n.iri, rules, type_map) + }, + _ => false, + }; -pub fn match_predicate_rule(predicate: &NamedNode, mask: TripleMask, rules: &Rules) -> TripleMask { - let NamedNode { iri: i } = predicate; + let mut mask = TripleMask::default(); + if pseudo_subject { + mask = mask | TripleMask::SUBJECT; + }; + if pseudo_object { + mask = mask | TripleMask::OBJECT; + }; - if rules.replace_value_of_predicate.contains(i) { - return mask | TripleMask::OBJECT; - } else { - return mask; - } + return mask } -pub fn match_subject_predicate_rule( - subject: &Subject, - predicate: &NamedNode, - mask: TripleMask, - type_map: &HashMap, +pub fn match_object_rules( + triple: &Triple, rules: &Rules, + type_map: &HashMap, ) -> TripleMask { - match subject { - Subject::NamedNode(n) => { - let subject_type = if let Some(v) = type_map.get(&n.iri) { - v + let pseudo_object = match &triple.object { + Term::NamedNode(n) => { + if match_predicate(&n.iri, rules) { + true } else { - // Not in the type map. - return mask; - }; - - let preds = rules.replace_values_of_subject_predicate.get(subject_type); - if preds.is_none() || !preds.unwrap().contains(&predicate.iri) { - // Not in the rules. - return mask; + match_type_predicate(&n.iri, &triple.predicate.iri, type_map, rules) } + }, + _ => false, + }; - return mask | TripleMask::OBJECT; - } - Subject::BlankNode(_) => return mask, - } + let mask = if pseudo_object { + TripleMask::OBJECT + } else { + TripleMask::default() + }; + + return mask } + #[cfg(test)] mod tests { use super::*; @@ -116,32 +125,43 @@ mod tests { fn set_type_rule(t: &str) -> Rules { let mut rules = Rules::default(); - rules.replace_uri_of_nodes_with_type.insert(t.to_string()); + rules.subjects.of_type.insert(t.to_string()); return rules; } fn set_predicate_rule(p: &str) -> Rules { let mut rules = Rules { - replace_uri_of_nodes_with_type: HashSet::new(), - replace_values_of_subject_predicate: HashMap::new(), - replace_value_of_predicate: HashSet::new(), + invert: false, + subjects: SubjectRules { + of_type: HashSet::new(), + }, + objects: ObjectRules { + on_predicate: HashSet::new(), + on_type_predicate: HashMap::new(), + }, }; - rules.replace_value_of_predicate.insert(p.to_string()); + rules.objects.on_predicate.insert(p.to_string()); return rules; } fn set_subject_predicate_rule(s: &str, p: &str) -> Rules { let mut rules = Rules { - replace_uri_of_nodes_with_type: HashSet::new(), - replace_values_of_subject_predicate: HashMap::new(), - replace_value_of_predicate: HashSet::new(), + invert: false, + subjects: SubjectRules { + of_type: HashSet::new(), + }, + objects: ObjectRules { + on_predicate: HashSet::new(), + on_type_predicate: HashMap::new(), + }, }; let mut set = HashSet::new(); set.insert(p.to_string()); rules - .replace_values_of_subject_predicate + .objects + .on_type_predicate .insert(s.to_string(), set); return rules; From 79fe75c1c3ed3cfa72cfe953118e92c41096d942 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 02:43:41 +0200 Subject: [PATCH 03/20] refactor(cli): drop invert flag, use config field instead --- src/main.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index 7559823..bb847a8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -51,11 +51,6 @@ struct PseudoArgs { #[arg(default_value = "-")] input: PathBuf, - /// Invert the matching rules for the subject and the object. - /// Disabled by default - #[arg(short = 'v', long)] - invert_match: bool, - /// The config file descriptor to use for defining RDF elements to pseudonymize. /// Format: yaml #[arg(short, long)] @@ -103,7 +98,6 @@ fn main() { &args.output, &args.index, &args.secret, - &args.invert_match, ) } } From 52fec4817cdd0da4f3303a05a9cc082de99739ec Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 02:44:22 +0200 Subject: [PATCH 04/20] refactor(pseudo): move match_rules into rules module --- src/pass_second.rs | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/src/pass_second.rs b/src/pass_second.rs index eefe2da..60afeaf 100644 --- a/src/pass_second.rs +++ b/src/pass_second.rs @@ -10,34 +10,12 @@ use crate::{ crypto::{new_pseudonymizer, Pseudonymize}, io, log::Logger, - model::TripleMask, rdf_types::*, rules::{ - match_predicate_rule, match_subject_predicate_rule, match_type_rule_object, - match_type_rule_subject, Rules, + match_rules, Rules, }, }; -fn match_rules( - triple: Triple, - rules: &Rules, - type_map: &HashMap, - invert_match: &bool, -) -> TripleMask { - // Check each field of the triple against the rules - let mut mask = TripleMask::default(); - - mask = match_type_rule_subject(&triple.subject, mask, type_map, rules); - mask = match_type_rule_object(&triple.object, mask, type_map, rules); - mask = match_predicate_rule(&triple.predicate, mask, rules); - mask = match_subject_predicate_rule(&triple.subject, &triple.predicate, mask, type_map, rules); - - if *invert_match { - mask = mask.invert(); - } - - return mask; -} // mask and encode input triple // NOTE: This will need the type-map to perform masking @@ -47,9 +25,8 @@ fn process_triple( node_to_type: &HashMap, out: &mut impl Write, hasher: &dyn Pseudonymize, - invert_match: &bool, ) { - let mask = match_rules(triple.clone(), rules_config, node_to_type, invert_match); + let mask = match_rules(&triple, rules_config, node_to_type); let r = || -> std::io::Result<()> { out.write_all(hasher.pseudo_triple(&triple, mask).to_string().as_bytes())?; @@ -86,7 +63,6 @@ pub fn pseudonymize_graph( output: &Path, index: &Path, secret_path: &Option, - invert_match: &bool, ) { let buf_input = io::get_reader(input); let buf_index = io::get_reader(index); @@ -110,7 +86,6 @@ pub fn pseudonymize_graph( &node_to_type, &mut buf_output, &pseudonymizer, - invert_match, ); Result::<(), TurtleError>::Ok(()) }) @@ -139,7 +114,6 @@ mod tests { let output_path = dir.path().join("output.nt"); let type_map_path = Path::new("tests/data/type_map.nt"); let key = None; - let invert_match = false; pseudonymize_graph( &logger, &input_path, @@ -147,7 +121,6 @@ mod tests { &output_path, &type_map_path, &key, - &invert_match, ); } } From 8733fd09b923be1a3f9028c477008e71be6bd18e Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 02:45:28 +0200 Subject: [PATCH 05/20] refactor(rules): simplify matching logic --- src/rules.rs | 84 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index 371eb90..fa3b2f1 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -4,12 +4,14 @@ use serde::{Deserialize, Serialize}; use crate::model::TripleMask; +/// Rules for pseudonymizing subjects #[derive(Serialize, Deserialize, Debug, Default)] struct SubjectRules { // Replace values of nodes with a certain type. of_type: HashSet, } +/// Rules for pseudonymizing objects #[derive(Serialize, Deserialize, Debug, Default)] struct ObjectRules { // Replace values in matched `predicates`. @@ -18,6 +20,7 @@ struct ObjectRules { on_type_predicate: HashMap>, } +/// Rules for pseudonymizing triples #[derive(Serialize, Deserialize, Debug, Default)] pub struct Rules { // Invert all matchings @@ -29,39 +32,25 @@ pub struct Rules { } -/// Check if the type of input instance URI is in the rules. -fn match_type(subject: &str, rules: &Rules, type_map: &HashMap) -> bool { - match type_map.get(subject) { - Some(v) => rules.subjects.of_type.contains(v), - None => false, - } -} - -/// Check if the predicate URI is in the rules. -fn match_predicate(predicate: &str, rules: &Rules) -> bool { - rules.objects.on_predicate.contains(predicate) -} - -/// Check if the combination of subject type and predicate URIs is in the rules. -fn match_type_predicate( - subject: &str, - predicate: &str, - type_map: &HashMap, +/// Check all parts of the triple against rules. +pub fn match_rules( + triple: &Triple, rules: &Rules, -) -> bool { + type_map: &HashMap, +) -> TripleMask { - let subject_type = match type_map.get(subject) { - None => return false, - Some(v) => v - }; - let preds = rules.objects.on_type_predicate.get(subject_type); - if preds.is_none() || !preds.unwrap().contains(predicate) { - return false + let mut mask = + match_subject_rules(triple, rules, type_map) + | match_object_rules(triple, rules, type_map); + + if rules.invert { + mask = mask.invert(); } - return true + return mask; } +/// Checks subject and object against subject-rules. pub fn match_subject_rules( triple: &Triple, rules: &Rules, @@ -82,15 +71,16 @@ pub fn match_subject_rules( let mut mask = TripleMask::default(); if pseudo_subject { - mask = mask | TripleMask::SUBJECT; + mask |= TripleMask::SUBJECT; }; if pseudo_object { - mask = mask | TripleMask::OBJECT; + mask |= TripleMask::OBJECT; }; return mask } +/// Checks triple against object rules pub fn match_object_rules( triple: &Triple, rules: &Rules, @@ -116,11 +106,47 @@ pub fn match_object_rules( return mask } +/// Check if the type of input instance URI is in the rules. +fn match_type(subject: &str, rules: &Rules, type_map: &HashMap) -> bool { + match type_map.get(subject) { + Some(v) => rules.subjects.of_type.contains(v), + None => false, + } +} + +/// Check if the predicate URI is in the rules. +fn match_predicate(predicate: &str, rules: &Rules) -> bool { + rules.objects.on_predicate.contains(predicate) +} + +/// Check if the combination of subject type and predicate URIs is in the rules. +fn match_type_predicate( + subject: &str, + predicate: &str, + type_map: &HashMap, + rules: &Rules, +) -> bool { + + let subject_type = match type_map.get(subject) { + None => return false, + Some(v) => v + }; + let preds = rules.objects.on_type_predicate.get(subject_type); + if preds.is_none() || !preds.unwrap().contains(predicate) { + return false + } + + return true +} + + #[cfg(test)] mod tests { use super::*; use rstest::rstest; +use crate::model::TripleMask; + use crate::rdf_types::Triple; fn set_type_rule(t: &str) -> Rules { let mut rules = Rules::default(); From a134fec987960adecc5406d23c2d1df5c3092a04 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 02:45:45 +0200 Subject: [PATCH 06/20] test(rules): adapt tests [WIP] --- src/rules.rs | 44 +++++++++++++++----------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index fa3b2f1..01965c2 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -170,7 +170,7 @@ use crate::model::TripleMask; return rules; } - fn set_subject_predicate_rule(s: &str, p: &str) -> Rules { + fn set_type_predicate_rule(s: &str, p: &str) -> Rules { let mut rules = Rules { invert: false, subjects: SubjectRules { @@ -195,39 +195,25 @@ use crate::model::TripleMask; #[rstest] // Subject is in the rules & type index - #[case(true, "Alice", "Alice", "Person", "Person", true, false)] + #[case("Alice", HashMap::from([("Alice", "Person")]), "Person", true)] // Subject is in the type index, not in the rules - #[case(true, "Alice", "Alice", "Person", "Bank", false, false)] + #[case("Alice", HashMap::from([("Alice", "Person")]), "Bank", false)] // Subject is not in the type index - #[case(true, "Alice", "BankName", "Bank", "Bank", false, false)] - // Object is in the rules & type index - #[case(false, "Alice", "Alice", "Person", "Person", false, true)] + #[case("Alice", HashMap::from([("BankName", "Bank")]), "Bank", false)] fn type_rule( - #[case] is_subject: bool, - #[case] node_iri: &str, - #[case] index_subject: &str, - #[case] index_object: &str, + #[case] subject: &str, + #[case] index: HashMap<&str, &str>, #[case] rule_type: &str, - #[case] expected_s: bool, - #[case] expected_o: bool, + #[case] match_expected: bool, ) { + // convert index key/values into Strings + let type_index: HashMap = index + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + let rules = set_type_rule(rule_type); - let mut type_index = HashMap::new(); - type_index.insert(index_subject.to_string(), index_object.to_string()); - let mut mask = TripleMask::default(); - mask = if is_subject { - let node = Subject::NamedNode(NamedNode { - iri: node_iri.to_string(), - }); - match_type_rule_subject(&node, mask, &type_index, &rules) - } else { - let node = Term::NamedNode(NamedNode { - iri: node_iri.to_string(), - }); - match_type_rule_object(&node, mask, &type_index, &rules) - }; - assert_eq!(mask.is_set(&TripleMask::SUBJECT), expected_s); - assert_eq!(mask.is_set(&TripleMask::OBJECT), expected_o); + assert_eq!(match_type(subject, &rules, &type_index), match_expected); } #[rstest] @@ -273,7 +259,7 @@ use crate::model::TripleMask; iri: predicate_iri.to_string(), }; - let rules = set_subject_predicate_rule(rule_subject, rule_predicate); + let rules = set_type_predicate_rule(rule_subject, rule_predicate); let mut mask = TripleMask::default(); let mut type_map = HashMap::new(); From 38e53cbba9033964b6a696701b49f2e611e0e76b Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 02:46:05 +0200 Subject: [PATCH 07/20] style: fmt --- src/pass_second.rs | 5 +---- src/rules.rs | 40 +++++++++++++--------------------------- 2 files changed, 14 insertions(+), 31 deletions(-) diff --git a/src/pass_second.rs b/src/pass_second.rs index 60afeaf..1343885 100644 --- a/src/pass_second.rs +++ b/src/pass_second.rs @@ -11,12 +11,9 @@ use crate::{ io, log::Logger, rdf_types::*, - rules::{ - match_rules, Rules, - }, + rules::{match_rules, Rules}, }; - // mask and encode input triple // NOTE: This will need the type-map to perform masking fn process_triple( diff --git a/src/rules.rs b/src/rules.rs index 01965c2..d5b224b 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -29,7 +29,6 @@ pub struct Rules { pub subjects: SubjectRules, pub objects: ObjectRules, - } /// Check all parts of the triple against rules. @@ -38,10 +37,8 @@ pub fn match_rules( rules: &Rules, type_map: &HashMap, ) -> TripleMask { - - let mut mask = - match_subject_rules(triple, rules, type_map) - | match_object_rules(triple, rules, type_map); + let mut mask = + match_subject_rules(triple, rules, type_map) | match_object_rules(triple, rules, type_map); if rules.invert { mask = mask.invert(); @@ -57,27 +54,23 @@ pub fn match_subject_rules( type_map: &HashMap, ) -> TripleMask { let pseudo_subject = match &triple.subject { - Subject::NamedNode(n) => { - match_type(&n.iri, rules, type_map) - }, + Subject::NamedNode(n) => match_type(&n.iri, rules, type_map), _ => false, }; let pseudo_object = match &triple.object { - Term::NamedNode(n) => { - match_type(&n.iri, rules, type_map) - }, + Term::NamedNode(n) => match_type(&n.iri, rules, type_map), _ => false, }; let mut mask = TripleMask::default(); if pseudo_subject { - mask |= TripleMask::SUBJECT; + mask |= TripleMask::SUBJECT; }; if pseudo_object { mask |= TripleMask::OBJECT; }; - return mask + return mask; } /// Checks triple against object rules @@ -93,7 +86,7 @@ pub fn match_object_rules( } else { match_type_predicate(&n.iri, &triple.predicate.iri, type_map, rules) } - }, + } _ => false, }; @@ -103,7 +96,7 @@ pub fn match_object_rules( TripleMask::default() }; - return mask + return mask; } /// Check if the type of input instance URI is in the rules. @@ -126,27 +119,23 @@ fn match_type_predicate( type_map: &HashMap, rules: &Rules, ) -> bool { - let subject_type = match type_map.get(subject) { None => return false, - Some(v) => v + Some(v) => v, }; let preds = rules.objects.on_type_predicate.get(subject_type); if preds.is_none() || !preds.unwrap().contains(predicate) { - return false + return false; } - return true + return true; } - - #[cfg(test)] mod tests { use super::*; + use crate::{model::TripleMask, rdf_types::Triple}; use rstest::rstest; -use crate::model::TripleMask; - use crate::rdf_types::Triple; fn set_type_rule(t: &str) -> Rules { let mut rules = Rules::default(); @@ -185,10 +174,7 @@ use crate::model::TripleMask; let mut set = HashSet::new(); set.insert(p.to_string()); - rules - .objects - .on_type_predicate - .insert(s.to_string(), set); + rules.objects.on_type_predicate.insert(s.to_string(), set); return rules; } From 6bf35411fd4d792bda90c8902e81f46d4b36e585 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 14:23:04 +0200 Subject: [PATCH 08/20] test(rules): index macro + remove constant cases --- src/rules.rs | 89 +++++++++++++++++++++++----------------------------- 1 file changed, 39 insertions(+), 50 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index d5b224b..55224e3 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -6,14 +6,14 @@ use crate::model::TripleMask; /// Rules for pseudonymizing subjects #[derive(Serialize, Deserialize, Debug, Default)] -struct SubjectRules { +pub struct SubjectRules { // Replace values of nodes with a certain type. of_type: HashSet, } /// Rules for pseudonymizing objects #[derive(Serialize, Deserialize, Debug, Default)] -struct ObjectRules { +pub struct ObjectRules { // Replace values in matched `predicates`. on_predicate: HashSet, // Replace values of predicates for specific types @@ -134,9 +134,11 @@ fn match_type_predicate( #[cfg(test)] mod tests { use super::*; - use crate::{model::TripleMask, rdf_types::Triple}; use rstest::rstest; + const SUBJECT_IRI: &str = "Alice"; + const PREDICATE_IRI: &str = "hasName"; + fn set_type_rule(t: &str) -> Rules { let mut rules = Rules::default(); @@ -144,6 +146,20 @@ mod tests { return rules; } + // Helper macro to create a HashMap from pairs + #[macro_export] + macro_rules! index { + () => { + ::std::collections::HashMap::new() + }; + + ($($key:expr => $value:expr),+ $(,)?) => { + ::std::collections::HashMap::from([ + $((String::from($key), String::from($value))),* + ]) + }; + } + fn set_predicate_rule(p: &str) -> Rules { let mut rules = Rules { invert: false, @@ -181,79 +197,52 @@ mod tests { #[rstest] // Subject is in the rules & type index - #[case("Alice", HashMap::from([("Alice", "Person")]), "Person", true)] + #[case(index! { "Alice" => "Person" }, "Person", true)] // Subject is in the type index, not in the rules - #[case("Alice", HashMap::from([("Alice", "Person")]), "Bank", false)] + #[case(index! { "Alice" => "Person" }, "Bank", false)] // Subject is not in the type index - #[case("Alice", HashMap::from([("BankName", "Bank")]), "Bank", false)] + #[case(index! { "BankName" => "Bank" }, "Bank", false)] fn type_rule( - #[case] subject: &str, - #[case] index: HashMap<&str, &str>, + #[case] index: HashMap, #[case] rule_type: &str, #[case] match_expected: bool, ) { // convert index key/values into Strings - let type_index: HashMap = index - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(); let rules = set_type_rule(rule_type); - assert_eq!(match_type(subject, &rules, &type_index), match_expected); + assert_eq!(match_type(SUBJECT_IRI, &rules, &index), match_expected); } #[rstest] // Predicate is in the rules - #[case("hasName", "hasName", true)] + #[case("hasName", true)] // Predicate is not in the rules - #[case("hasName", "hasAge", false)] - fn predicate_rule(#[case] node_iri: &str, #[case] rule_type: &str, #[case] expected_o: bool) { - let predicate = NamedNode { - iri: node_iri.to_string(), - }; - let rules = set_predicate_rule(rule_type); - let mut mask = TripleMask::default(); - - mask = match_predicate_rule(&predicate, mask, &rules); - - assert!(!mask.is_set(&TripleMask::SUBJECT)); - assert_eq!(mask.is_set(&TripleMask::OBJECT), expected_o); + #[case("hasAge", false)] + fn predicate_rule(#[case] rule_predicate: &str, #[case] match_expected: bool) { + let rules = set_predicate_rule(rule_predicate); + assert_eq!(match_predicate(PREDICATE_IRI, &rules), match_expected); } #[rstest] // Subject predicate in config - #[case("Alice", "hasName", "Person", "hasName", "Alice", "Person", true)] + #[case("Person", "hasName", index! { "Alice" => "Person" }, true)] // Subject in config, predicate not - #[case("Alice", "hasName", "Person", "hasAge", "Alice", "Person", false)] + #[case("Person", "hasAge", index! { "Alice" => "Person" }, false)] // Subject predicate not in config - #[case("Alice", "hasName", "Bob", "hasAge", "Alice", "Person", false)] + #[case("Bob", "hasAge", index! { "Alice" => "Person" }, false)] // Subject not in type index - #[case("Alice", "hasName", "Bob", "hasAge", "Bob", "Person", false)] - fn subject_predicate_rule( - #[case] subject_iri: &str, - #[case] predicate_iri: &str, - #[case] rule_subject: &str, + #[case("Bob", "hasAge", index! { "Bob" => "Person" }, false)] + fn type_predicate_rule( + #[case] rule_type: &str, #[case] rule_predicate: &str, - #[case] index_subject: &str, - #[case] index_object: &str, - #[case] expected_o: bool, + #[case] index: HashMap, + #[case] match_expected: bool, ) { - let subject = Subject::NamedNode(NamedNode { - iri: subject_iri.to_string(), - }); - let predicate = NamedNode { - iri: predicate_iri.to_string(), - }; - let rules = set_type_predicate_rule(rule_subject, rule_predicate); + let rules = set_type_predicate_rule(rule_type, rule_predicate); - let mut mask = TripleMask::default(); - let mut type_map = HashMap::new(); - type_map.insert(index_subject.to_string(), index_object.to_string()); - mask = match_subject_predicate_rule(&subject, &predicate, mask, &type_map, &rules); + assert_eq!(match_type_predicate(SUBJECT_IRI, PREDICATE_IRI, &index, &rules), match_expected); - assert!(!mask.is_set(&TripleMask::SUBJECT)); - assert_eq!(mask.is_set(&TripleMask::OBJECT), expected_o); } } From 98dc0719dbbbc500825a80a31c6e9fe7084fdea5 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 14:24:50 +0200 Subject: [PATCH 09/20] test(rules): use Rules::default() for init --- src/rules.rs | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index 55224e3..2a2d1ad 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -161,31 +161,13 @@ mod tests { } fn set_predicate_rule(p: &str) -> Rules { - let mut rules = Rules { - invert: false, - subjects: SubjectRules { - of_type: HashSet::new(), - }, - objects: ObjectRules { - on_predicate: HashSet::new(), - on_type_predicate: HashMap::new(), - }, - }; + let mut rules = Rules::default(); rules.objects.on_predicate.insert(p.to_string()); return rules; } fn set_type_predicate_rule(s: &str, p: &str) -> Rules { - let mut rules = Rules { - invert: false, - subjects: SubjectRules { - of_type: HashSet::new(), - }, - objects: ObjectRules { - on_predicate: HashSet::new(), - on_type_predicate: HashMap::new(), - }, - }; + let mut rules = Rules::default(); let mut set = HashSet::new(); set.insert(p.to_string()); From d44511306acb595e8f830f1351c23a3e2bebe6cb Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 14:50:56 +0200 Subject: [PATCH 10/20] fix(rules): make all fields optional in serde --- src/rules.rs | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index 2a2d1ad..2094d55 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -8,6 +8,7 @@ use crate::model::TripleMask; #[derive(Serialize, Deserialize, Debug, Default)] pub struct SubjectRules { // Replace values of nodes with a certain type. + #[serde(default)] of_type: HashSet, } @@ -15,8 +16,10 @@ pub struct SubjectRules { #[derive(Serialize, Deserialize, Debug, Default)] pub struct ObjectRules { // Replace values in matched `predicates`. + #[serde(default)] on_predicate: HashSet, // Replace values of predicates for specific types + #[serde(default)] on_type_predicate: HashMap>, } @@ -24,10 +27,13 @@ pub struct ObjectRules { #[derive(Serialize, Deserialize, Debug, Default)] pub struct Rules { // Invert all matchings + #[serde(default)] pub invert: bool, + #[serde(default)] pub subjects: SubjectRules, + #[serde(default)] pub objects: ObjectRules, } @@ -135,16 +141,12 @@ fn match_type_predicate( mod tests { use super::*; use rstest::rstest; + use serde_yml; + // Instance used in tests const SUBJECT_IRI: &str = "Alice"; const PREDICATE_IRI: &str = "hasName"; - fn set_type_rule(t: &str) -> Rules { - let mut rules = Rules::default(); - - rules.subjects.of_type.insert(t.to_string()); - return rules; - } // Helper macro to create a HashMap from pairs #[macro_export] @@ -160,22 +162,10 @@ mod tests { }; } - fn set_predicate_rule(p: &str) -> Rules { - let mut rules = Rules::default(); - rules.objects.on_predicate.insert(p.to_string()); - return rules; + fn parse_rules(yml: &str) -> Rules { + serde_yml::from_str(yml).unwrap() } - fn set_type_predicate_rule(s: &str, p: &str) -> Rules { - let mut rules = Rules::default(); - - let mut set = HashSet::new(); - set.insert(p.to_string()); - - rules.objects.on_type_predicate.insert(s.to_string(), set); - - return rules; - } #[rstest] // Subject is in the rules & type index @@ -189,9 +179,12 @@ mod tests { #[case] rule_type: &str, #[case] match_expected: bool, ) { - // convert index key/values into Strings + let rules = parse_rules(&format!(" + subjects: + of_type: + - {rule_type} + ")); - let rules = set_type_rule(rule_type); assert_eq!(match_type(SUBJECT_IRI, &rules, &index), match_expected); } @@ -201,7 +194,11 @@ mod tests { // Predicate is not in the rules #[case("hasAge", false)] fn predicate_rule(#[case] rule_predicate: &str, #[case] match_expected: bool) { - let rules = set_predicate_rule(rule_predicate); + let rules = parse_rules(&format!(" + objects: + on_predicate: + - {rule_predicate} + ")); assert_eq!(match_predicate(PREDICATE_IRI, &rules), match_expected); } @@ -221,8 +218,12 @@ mod tests { #[case] match_expected: bool, ) { - let rules = set_type_predicate_rule(rule_type, rule_predicate); - + let rules = parse_rules(&format!(" + objects: + on_type_predicate: + {rule_type}: + - {rule_predicate} + ")); assert_eq!(match_type_predicate(SUBJECT_IRI, PREDICATE_IRI, &index, &rules), match_expected); From 53f5e59328e979351d03a85c13262cde3053aed1 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 15:20:31 +0200 Subject: [PATCH 11/20] style: fmt --- src/rules.rs | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index 2094d55..4ece1ae 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -147,7 +147,6 @@ mod tests { const SUBJECT_IRI: &str = "Alice"; const PREDICATE_IRI: &str = "hasName"; - // Helper macro to create a HashMap from pairs #[macro_export] macro_rules! index { @@ -157,7 +156,7 @@ mod tests { ($($key:expr => $value:expr),+ $(,)?) => { ::std::collections::HashMap::from([ - $((String::from($key), String::from($value))),* + $((String::from($key), String::from($value))),* ]) }; } @@ -166,7 +165,6 @@ mod tests { serde_yml::from_str(yml).unwrap() } - #[rstest] // Subject is in the rules & type index #[case(index! { "Alice" => "Person" }, "Person", true)] @@ -179,11 +177,13 @@ mod tests { #[case] rule_type: &str, #[case] match_expected: bool, ) { - let rules = parse_rules(&format!(" + let rules = parse_rules(&format!( + " subjects: of_type: - {rule_type} - ")); + " + )); assert_eq!(match_type(SUBJECT_IRI, &rules, &index), match_expected); } @@ -194,11 +194,13 @@ mod tests { // Predicate is not in the rules #[case("hasAge", false)] fn predicate_rule(#[case] rule_predicate: &str, #[case] match_expected: bool) { - let rules = parse_rules(&format!(" + let rules = parse_rules(&format!( + " objects: on_predicate: - {rule_predicate} - ")); + " + )); assert_eq!(match_predicate(PREDICATE_IRI, &rules), match_expected); } @@ -217,15 +219,18 @@ mod tests { #[case] index: HashMap, #[case] match_expected: bool, ) { - - let rules = parse_rules(&format!(" + let rules = parse_rules(&format!( + " objects: on_type_predicate: {rule_type}: - {rule_predicate} - ")); - - assert_eq!(match_type_predicate(SUBJECT_IRI, PREDICATE_IRI, &index, &rules), match_expected); + " + )); + assert_eq!( + match_type_predicate(SUBJECT_IRI, PREDICATE_IRI, &index, &rules), + match_expected + ); } } From 28a856b46e3ca56e5a96e4a9d924c24cb53eeb37 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 15:27:54 +0200 Subject: [PATCH 12/20] test(rules): use constant in cases --- src/rules.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index 4ece1ae..dc4d834 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -167,9 +167,9 @@ mod tests { #[rstest] // Subject is in the rules & type index - #[case(index! { "Alice" => "Person" }, "Person", true)] + #[case(index! { SUBJECT_IRI => "Person" }, "Person", true)] // Subject is in the type index, not in the rules - #[case(index! { "Alice" => "Person" }, "Bank", false)] + #[case(index! { SUBJECT_IRI => "Person" }, "Bank", false)] // Subject is not in the type index #[case(index! { "BankName" => "Bank" }, "Bank", false)] fn type_rule( @@ -190,7 +190,7 @@ mod tests { #[rstest] // Predicate is in the rules - #[case("hasName", true)] + #[case(PREDICATE_IRI, true)] // Predicate is not in the rules #[case("hasAge", false)] fn predicate_rule(#[case] rule_predicate: &str, #[case] match_expected: bool) { @@ -206,11 +206,11 @@ mod tests { #[rstest] // Subject predicate in config - #[case("Person", "hasName", index! { "Alice" => "Person" }, true)] + #[case("Person", "hasName", index! { SUBJECT_IRI => "Person" }, true)] // Subject in config, predicate not - #[case("Person", "hasAge", index! { "Alice" => "Person" }, false)] + #[case("Person", "hasAge", index! { SUBJECT_IRI => "Person" }, false)] // Subject predicate not in config - #[case("Bob", "hasAge", index! { "Alice" => "Person" }, false)] + #[case("Bob", "hasAge", index! { SUBJECT_IRI => "Person" }, false)] // Subject not in type index #[case("Bob", "hasAge", index! { "Bob" => "Person" }, false)] fn type_predicate_rule( From 544fceb54f507b42c437c923069ee57cefac7eb9 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 17:24:32 +0200 Subject: [PATCH 13/20] fix(rules): object rules matching --- src/rules.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index dc4d834..05b246c 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -53,7 +53,7 @@ pub fn match_rules( return mask; } -/// Checks subject and object against subject-rules. +/// Check triple against subject-pseudonymization rules. pub fn match_subject_rules( triple: &Triple, rules: &Rules, @@ -79,15 +79,15 @@ pub fn match_subject_rules( return mask; } -/// Checks triple against object rules +/// Checks triple against object-pseudonymization rules pub fn match_object_rules( triple: &Triple, rules: &Rules, type_map: &HashMap, ) -> TripleMask { - let pseudo_object = match &triple.object { - Term::NamedNode(n) => { - if match_predicate(&n.iri, rules) { + let pseudo_object = match &triple.subject { + Subject::NamedNode(n) => { + if match_predicate(&triple.predicate.iri, rules) { true } else { match_type_predicate(&n.iri, &triple.predicate.iri, type_map, rules) From 352d2a1d14b287abb97db4b13e094a5b1d46aaca Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 17:36:16 +0200 Subject: [PATCH 14/20] fix(rules): match object rules on blanknodes --- src/rules.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index 05b246c..e7d7843 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -85,15 +85,18 @@ pub fn match_object_rules( rules: &Rules, type_map: &HashMap, ) -> TripleMask { + + if match_predicate(&triple.predicate.iri, rules) { + return TripleMask::OBJECT; + } + let pseudo_object = match &triple.subject { Subject::NamedNode(n) => { - if match_predicate(&triple.predicate.iri, rules) { - true - } else { - match_type_predicate(&n.iri, &triple.predicate.iri, type_map, rules) - } - } - _ => false, + match_type_predicate(&n.iri, &triple.predicate.iri, type_map, rules) + }, + Subject::BlankNode(b) => { + match_type_predicate(&b.id, &triple.predicate.iri, type_map, rules) + }, }; let mask = if pseudo_object { From 725595f04533e5aab05ad445f830e81c0b474e8d Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 17:51:09 +0200 Subject: [PATCH 15/20] refactor(rules): explicit matches --- src/rules.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index e7d7843..d338989 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -61,11 +61,12 @@ pub fn match_subject_rules( ) -> TripleMask { let pseudo_subject = match &triple.subject { Subject::NamedNode(n) => match_type(&n.iri, rules, type_map), - _ => false, + Subject::BlankNode(_) => false, }; let pseudo_object = match &triple.object { Term::NamedNode(n) => match_type(&n.iri, rules, type_map), - _ => false, + Term::BlankNode(_) => false, + Term::Literal(_) => false, }; let mut mask = TripleMask::default(); @@ -99,20 +100,19 @@ pub fn match_object_rules( }, }; - let mask = if pseudo_object { - TripleMask::OBJECT - } else { - TripleMask::default() - }; + if pseudo_object { + return TripleMask::OBJECT + } - return mask; + return TripleMask::default() } /// Check if the type of input instance URI is in the rules. fn match_type(subject: &str, rules: &Rules, type_map: &HashMap) -> bool { - match type_map.get(subject) { - Some(v) => rules.subjects.of_type.contains(v), - None => false, + if let Some(v) = type_map.get(subject) { + rules.subjects.of_type.contains(v) + } else { + false } } From 2edf744d103f7d291b20677d89017425a0bcec82 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 18:06:11 +0200 Subject: [PATCH 16/20] refactor: rename pass_first/second to index/pseudo --- src/{pass_first.rs => index.rs} | 0 src/main.rs | 8 ++++---- src/{pass_second.rs => pseudo.rs} | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename src/{pass_first.rs => index.rs} (100%) rename src/{pass_second.rs => pseudo.rs} (100%) diff --git a/src/pass_first.rs b/src/index.rs similarity index 100% rename from src/pass_first.rs rename to src/index.rs diff --git a/src/main.rs b/src/main.rs index bb847a8..b5edbd7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,16 +3,16 @@ mod crypto; mod io; mod log; mod model; -mod pass_first; -mod pass_second; +mod index; +mod pseudo; mod rdf_types; mod rules; // Define the imports. use crate::{ log::{create_logger, info}, - pass_first::create_type_map, - pass_second::pseudonymize_graph, + index::create_type_map, + pseudo::pseudonymize_graph, }; use clap::{Args, Parser, Subcommand}; diff --git a/src/pass_second.rs b/src/pseudo.rs similarity index 100% rename from src/pass_second.rs rename to src/pseudo.rs From d2f3a705beb8d5db5123d90a51d1bd1c9053e352 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 18:09:05 +0200 Subject: [PATCH 17/20] style: fmt --- src/main.rs | 4 ++-- src/rules.rs | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main.rs b/src/main.rs index b5edbd7..d754fef 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,17 +1,17 @@ // Define the module. mod crypto; +mod index; mod io; mod log; mod model; -mod index; mod pseudo; mod rdf_types; mod rules; // Define the imports. use crate::{ - log::{create_logger, info}, index::create_type_map, + log::{create_logger, info}, pseudo::pseudonymize_graph, }; diff --git a/src/rules.rs b/src/rules.rs index d338989..6b3a039 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -86,7 +86,6 @@ pub fn match_object_rules( rules: &Rules, type_map: &HashMap, ) -> TripleMask { - if match_predicate(&triple.predicate.iri, rules) { return TripleMask::OBJECT; } @@ -94,17 +93,17 @@ pub fn match_object_rules( let pseudo_object = match &triple.subject { Subject::NamedNode(n) => { match_type_predicate(&n.iri, &triple.predicate.iri, type_map, rules) - }, + } Subject::BlankNode(b) => { match_type_predicate(&b.id, &triple.predicate.iri, type_map, rules) - }, + } }; if pseudo_object { - return TripleMask::OBJECT + return TripleMask::OBJECT; } - return TripleMask::default() + return TripleMask::default(); } /// Check if the type of input instance URI is in the rules. From f1f7bd338e3b436140140da17fd461f93f6c2900 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 19:33:23 +0200 Subject: [PATCH 18/20] test(rules): test top-level matching function --- src/rules.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/rules.rs b/src/rules.rs index 6b3a039..4f0c019 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -142,6 +142,8 @@ fn match_type_predicate( #[cfg(test)] mod tests { use super::*; + use rio_api::parser::TriplesParser; + use rio_turtle::{TurtleError, TurtleParser}; use rstest::rstest; use serde_yml; @@ -235,4 +237,37 @@ mod tests { match_expected ); } + + #[rstest] + // sensitive subject, sensitive literal object + #[case(r#" "42" ."#, 0b101)] + // sensitive subject, non-sensitive object + #[case(r#" 174 ."#, 0b100)] + // sensitive subject, sensitive named node object + #[case(r#" ."#, 0b101)] + // non-sensitive subject, sensitive named node object + #[case(r#" ."#, 0b001)] + // Test the parsing of different triples against fixed rules/index. + fn individual_triple(#[case] triple: &str, #[case] expected_mask: u8) { + let rules: Rules = parse_rules( + r#" + subjects: + of_type: ["urn:Person"] + objects: + on_predicate: ["urn:hasAge"] + "#, + ); + let index = index! { + "urn:Alice" => "urn:Person", + "urn:Bob" => "urn:Person", + "urn:ACME" => "urn:Organization" + }; + TurtleParser::new(triple.as_ref(), None) + .parse_all(&mut |t| { + let mask = match_rules(&t.into(), &rules, &index); + assert_eq!(mask.bits(), expected_mask); + Ok(()) as Result<(), TurtleError> + }) + .unwrap(); + } } From 87883da0d4d4b6f6395eec8f976b0f19cfd57e23 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Tue, 6 Aug 2024 19:55:04 +0200 Subject: [PATCH 19/20] tets(rules): add cases for on_type_predicate --- src/rules.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/rules.rs b/src/rules.rs index 4f0c019..f87bafe 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -239,14 +239,16 @@ mod tests { } #[rstest] - // sensitive subject, sensitive literal object + // sensitive subject, on-type sensitive object #[case(r#" "42" ."#, 0b101)] - // sensitive subject, non-sensitive object - #[case(r#" 174 ."#, 0b100)] + // sensitive subject, sensitive literal object + #[case(r#" "Foobar" ."#, 0b101)] // sensitive subject, sensitive named node object #[case(r#" ."#, 0b101)] // non-sensitive subject, sensitive named node object #[case(r#" ."#, 0b001)] + // non-sensitive subject, non-sensitive object + #[case(r#" "200" ."#, 0b000)] // Test the parsing of different triples against fixed rules/index. fn individual_triple(#[case] triple: &str, #[case] expected_mask: u8) { let rules: Rules = parse_rules( @@ -254,7 +256,9 @@ mod tests { subjects: of_type: ["urn:Person"] objects: - on_predicate: ["urn:hasAge"] + on_predicate: ["urn:hasLastName"] + on_type_predicate: + "urn:Person": ["urn:hasAge"] "#, ); let index = index! { From 63fcebbef430a58c2c43855194f90920b8ccf830 Mon Sep 17 00:00:00 2001 From: cmdoret Date: Thu, 8 Aug 2024 11:14:09 +0200 Subject: [PATCH 20/20] docs(tutorial): update config fields --- docs/tutorial.md | 49 +++++++++++++++--------------------------------- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/docs/tutorial.md b/docs/tutorial.md index 04a21a6..ab10218 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -23,11 +23,7 @@ There are three possible ways to pseudonymize RDF triples: 2. Pseudonymize values for specific subject-predicate combinations. 3. Pseudonymize any value for a given predicate. -By using all three ways together, we're able to get an RDF file with sensitive -information: - -
- Click to show input +By combining these, can process an RDF file with sensitive information: ```ntriples . @@ -40,15 +36,12 @@ information: "Bank" . ``` -
-And pseudonymize the sensitive information such as people's names, personal and -secret information while keeping the rest as is: +into a pseudonymized file where the sensitive information such as people's names, personal and +secret information is hashed to protect privacy: -
- Click to show output -``` +```ntriples . . . @@ -59,55 +52,49 @@ secret information while keeping the rest as is: "Bank" . ``` -
- The next subsections break down each of the three pseudonymization approaches to better understand how they operate. ### 1. Pseudonymize the URI of nodes with `rdf:type` -
- Click to show Given the following config: ```yaml -replace_uri_of_nodes_with_type: +subjects: + of_type: - "http://xmlns.com/foaf/0.1/Person" ``` The goal is to pseudonymize all instaces of `rdf:type` Person. The following input file: -``` +```ntriples . ``` Would become: -``` +```ntriples . ``` -
### 2. Pseudonymize values for specific subject-predicate combinations -
- Click to show - Given the following config: ```yaml -replace_values_of_subject_predicate: - "http://xmlns.com/foaf/0.1/Person": +objects: + on_type_predicate: + "http://xmlns.com/foaf/0.1/Person": - "http://schema.org/name" ``` The goal is to pseudonymize only the instances of names when they're associated to Person. The following input file: -``` +```ntriples . "Alice" . . @@ -116,19 +103,15 @@ to Person. The following input file: Would become: -``` +```ntriples . "af321bbc" . . "Bank" . ``` -
- ### 3. Pseudonymize any value for a given predicate -
- Click to show Given the following config: @@ -140,7 +123,7 @@ replace_value_of_predicate: The goal is to pseudonymize any values associated to name. The following input file: -``` +```ntriples . "Alice" . . @@ -149,11 +132,9 @@ file: Would become: -``` +```ntriples . "af321bbc" . . "38a3dd71" . ``` - -