From c0198fe2182093310c86bc6f025aca0971191184 Mon Sep 17 00:00:00 2001 From: Benoit Chevallier-Mames Date: Fri, 5 Jul 2024 17:56:08 +0200 Subject: [PATCH] docs(frontend): making an Alphabet class --- .../levenshtein_distance.py | 122 ++++++++++-------- 1 file changed, 71 insertions(+), 51 deletions(-) diff --git a/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py b/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py index 9e4c9d6f84..30db34e4b0 100644 --- a/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py +++ b/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py @@ -17,24 +17,31 @@ class Alphabet: my_module = None def set_lowercase(self): + """Set lower case alphabet.""" self.letters = "".join([chr(97 + i) for i in range(26)]) def set_uppercase(self): + """Set upper case alphabet.""" self.letters = "".join([chr(65 + i) for i in range(26)]) def set_anycase(self): + """Set any-case alphabet.""" self.letters = "".join([chr(97 + i) for i in range(26)] + [chr(65 + i) for i in range(26)]) def set_dna(self): + """Set DNA alphabet.""" self.letters = "ACTG" def return_available_alphabets(): + """Return available alphabets.""" return ["string", "STRING", "StRiNg", "ACTG"] def check_alphabet(self, alphabet): + """Check an alphabet is available.""" assert alphabet in Alphabet.return_available_alphabets(), f"Unknown alphabet {alphabet}" def set_alphabet(self, alphabet, verbose=True): + """Set the alphabet.""" self.check_alphabet(alphabet) if alphabet == "string": @@ -53,12 +60,69 @@ def set_alphabet(self, alphabet, verbose=True): for i, c in enumerate(self.letters): self.mapping_to_int[c] = i + def check_string_is_in_alphabet(self, string): + """Check a string is a valid string of an alphabet.""" + assert len(self.mapping_to_int) > 0, "Mapping not defined" + + for c in string: + if c not in self.mapping_to_int: + raise ValueError( + f"Char {c} of {string} is not in alphabet {list(self.mapping_to_int.keys())}, please choose the right --alphabet" + ) + + def _random_pick_in_values(self): + """Pick the integer-encoding of a random char in an alphabet.""" + return numpy.random.randint(len(self.mapping_to_int)) + + def _random_pick_in_keys(self): + """Pick a random char in an alphabet.""" + return random.choice(list(self.mapping_to_int)) + + def _random_string(self, l): + """Pick a random string in the alphabet.""" + return "".join([self._random_pick_in_keys() for _ in range(l)]) + + def prepare_random_patterns(self, len_min, len_max, nb_strings): + """Prepare random patterns of different lengths.""" + assert len(self.mapping_to_int) > 0, "Mapping not defined" + + list_patterns = [] + for _ in range(nb_strings): + for length_1 in range(len_min, len_max + 1): + for length_2 in range(len_min, len_max + 1): + list_patterns += [ + ( + self._random_string(length_1), + self._random_string(length_2), + ) + for _ in range(1) + ] + + return list_patterns + + def encode_string(self, string): + """Encode a string, ie map it to integers using the alphabet.""" + return tuple([self.mapping_to_int[si] for si in string]) + + def encode_and_encrypt_strings(self, a, b): + """Encode a string, ie map it to integers using the alphabet, and then encrypt the integers.""" + a_as_int = self.encode_string(a) + b_as_int = self.encode_string(b) + + a_enc = tuple(self.my_module.equal.encrypt(ai, None)[0] for ai in a_as_int) + b_enc = tuple(self.my_module.equal.encrypt(None, bi)[1] for bi in b_as_int) + + return a_enc, b_enc + def compile_module(self, args): """Compile the FHE module.""" assert len(self.mapping_to_int) > 0, "Mapping not defined" inputset_equal = [ - (random_pick_in_values(self.mapping_to_int), random_pick_in_values(self.mapping_to_int)) + ( + self._random_pick_in_values(), + self._random_pick_in_values(), + ) for _ in range(1000) ] inputset_mix = [ @@ -81,34 +145,6 @@ def compile_module(self, args): min_max_strategy_preference=fhe.MinMaxStrategy.ONE_TLU_PROMOTED, ) - def check_string_is_in_alphabet(self, string): - """Check a string is a valid string of an alphabet.""" - assert len(self.mapping_to_int) > 0, "Mapping not defined" - - for c in string: - if c not in self.mapping_to_int: - raise ValueError( - f"Char {c} of {string} is not in alphabet {list(self.mapping_to_int.keys())}, please choose the right --alphabet" - ) - - def prepare_random_patterns(self, len_min, len_max, nb_strings): - """Prepare random patterns of different lengths.""" - assert len(self.mapping_to_int) > 0, "Mapping not defined" - - list_patterns = [] - for _ in range(nb_strings): - for length_1 in range(len_min, len_max + 1): - for length_2 in range(len_min, len_max + 1): - list_patterns += [ - ( - random_string(self.mapping_to_int, length_1), - random_string(self.mapping_to_int, length_2), - ) - for _ in range(1) - ] - - return list_patterns - def compute_in_simulation(self, list_patterns): """Check equality between distance in simulation and clear distance.""" print("Computations in simulation\n") @@ -117,8 +153,8 @@ def compute_in_simulation(self, list_patterns): print(f" Computing Levenshtein between strings '{a}' and '{b}'", end="") - a_as_int = tuple([self.mapping_to_int[ai] for ai in a]) - b_as_int = tuple([self.mapping_to_int[bi] for bi in b]) + a_as_int = self.encode_string(a) + b_as_int = self.encode_string(b) l1_simulate = levenshtein_simulate(self.my_module, a_as_int, b_as_int) l1_clear = levenshtein_clear(a_as_int, b_as_int) @@ -138,11 +174,7 @@ def compute_in_fhe(self, list_patterns, verbose=True, show_distance=False): print(f" Computing Levenshtein between strings '{a}' and '{b}'", end="") - a_as_int = [self.mapping_to_int[ai] for ai in a] - b_as_int = [self.mapping_to_int[bi] for bi in b] - - a_enc = tuple(self.my_module.equal.encrypt(ai, None)[0] for ai in a_as_int) - b_enc = tuple(self.my_module.equal.encrypt(None, bi)[1] for bi in b_as_int) + a_enc, b_enc = self.encode_and_encrypt_strings(a, b) time_begin = time.time() l1_fhe_enc = levenshtein_fhe(self.my_module, a_enc, b_enc) @@ -160,26 +192,12 @@ def compute_in_fhe(self, list_patterns, verbose=True, show_distance=False): print(f" - distance is {l1_fhe}, computed in {time_end - time_begin:.2f} seconds") -def random_pick_in_values(mapping_to_int): - """Pick the integer-encoding of a random char in an alphabet.""" - return numpy.random.randint(len(mapping_to_int)) - - -def random_pick_in_keys(mapping_to_int): - """Pick a random char in an alphabet.""" - return random.choice(list(mapping_to_int)) - - -def random_string(mapping_to_int, l): - """Pick a random string in the alphabet.""" - return "".join([random_pick_in_keys(mapping_to_int) for _ in range(l)]) - - # Module FHE @fhe.module() class LevenshsteinModule: @fhe.function({"x": "encrypted", "y": "encrypted"}) def equal(x, y): + """Assert equality between two chars of the alphabet.""" return x == y @fhe.function( @@ -192,6 +210,8 @@ def equal(x, y): } ) def mix(is_equal, if_equal, case_1, case_2, case_3): + """Compute the min of (case_1, case_2, case_3), and then return `if_equal` if `is_equal` is + True, or the min in the other case.""" min_12 = numpy.minimum(case_1, case_2) min_123 = numpy.minimum(min_12, case_3)