From 96824932934014da06459ec1472c864fcc4c9a64 Mon Sep 17 00:00:00 2001 From: Benoit Chevallier-Mames Date: Wed, 19 Jun 2024 14:40:28 +0200 Subject: [PATCH] docs(frontend): adding a use-case for Levenshtein distance closes #https://github.com/zama-ai/concrete-internal/issues/750 --- docs/tutorials/see-all-tutorials.md | 1 + .../levenshtein_distance.md | 210 +++++++++ .../levenshtein_distance.py | 442 ++++++++++++++++++ 3 files changed, 653 insertions(+) create mode 100644 frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.md create mode 100644 frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py diff --git a/docs/tutorials/see-all-tutorials.md b/docs/tutorials/see-all-tutorials.md index a492712fd6..b89bd8c163 100644 --- a/docs/tutorials/see-all-tutorials.md +++ b/docs/tutorials/see-all-tutorials.md @@ -14,6 +14,7 @@ * [Game of Life](../../frontends/concrete-python/examples/game_of_life/game_of_life.md) * [XOR distance](../../frontends/concrete-python/examples/xor_distance/xor_distance.md) * [SHA1 with Modules](../../frontends/concrete-python/examples/sha1/sha1.md) +* [Levenshtein distance with Modules](../../frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.md) #### Blog tutorials diff --git a/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.md b/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.md new file mode 100644 index 0000000000..77201f32f0 --- /dev/null +++ b/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.md @@ -0,0 +1,210 @@ +# Computing the Levenshtein distance in FHE + +## Levenshtein distance + +Levenshtein distance is a classical distance to compare two strings. Let's write strings a and b as +vectors of characters, meaning a[0] is the first char of a and a[1:] is the rest of the string. +Levenshtein distance is defined as: + + Levenshtein(a, b) := + length(a) if length(b) == 0, or + length(b) if length(a) == 0, or + Levenshtein(a[1:], b[1:]) if a[0] == b[0], or + 1 + min(Levenshtein(a[1:], b), Levenshtein(a, b[1:]), Levenshtein(a[1:], b[1:])) + +More information can be found for example on the [Wikipedia page](https://en.wikipedia.org/wiki/Levenshtein_distance). + +## Computing the distance in FHE + +It can be interesting to compute this distance over encrypted data, for example in the banking sector. +We show in [our code](levenshtein_distance.py) how to do that simply, with our FHE modules. + +Available options are: + +``` +usage: levenshtein_distance.py [-h] [--show_mlir] [--show_optimizer] [--autotest] [--autoperf] [--distance DISTANCE DISTANCE] + [--alphabet {string,STRING,StRiNg,ACTG}] [--max_string_length MAX_STRING_LENGTH] + +Levenshtein distance in Concrete. + +optional arguments: + -h, --help show this help message and exit + --show_mlir Show the MLIR + --show_optimizer Show the optimizer outputs + --autotest Run random tests + --autoperf Run benchmarks + --distance DISTANCE DISTANCE + Compute a distance + --alphabet {string,STRING,StRiNg,ACTG} + Setting the alphabet + --max_string_length MAX_STRING_LENGTH + Setting the maximal size of strings +``` + +The different alphabets are: +- string: non capitalized letters, ie `[a-z]*` +- STRING: capitalized letters, ie `[A-Z]*` +- StRiNg: non capitalized letters and capitalized letters +- ACTG: `[ACTG]*`, for DNA analysis + +It is very easy to add a new alphabet in the code. + +The most important usages are: + +- `python levenshtein_distance.py --distance Zama amazing --alphabet StRiNg --max_string_length 7`: Compute the distance between +strings "Zama" and "amazing", considering the chars of "StRiNg" alphabet + +``` + +Running distance between strings 'Zama' and 'amazing' for alphabet StRiNg: + + Computing Levenshtein between strings 'Zama' and 'amazing' - distance is 5, computed in 44.51 seconds + +Successful end +``` + +- `python levenshtein_distance.py --autotest`: Run random tests with the alphabet. + +``` +Making random tests with alphabet string +Letters are abcdefghijklmnopqrstuvwxyz + +Computations in simulation + + Computing Levenshtein between strings '' and '' - OK + Computing Levenshtein between strings '' and 'p' - OK + Computing Levenshtein between strings '' and 'vv' - OK + Computing Levenshtein between strings '' and 'mxg' - OK + Computing Levenshtein between strings '' and 'iuxf' - OK + Computing Levenshtein between strings 'k' and '' - OK + Computing Levenshtein between strings 'p' and 'g' - OK + Computing Levenshtein between strings 'v' and 'ky' - OK + Computing Levenshtein between strings 'f' and 'uoq' - OK + Computing Levenshtein between strings 'f' and 'kwfj' - OK + Computing Levenshtein between strings 'ut' and '' - OK + Computing Levenshtein between strings 'pa' and 'g' - OK + Computing Levenshtein between strings 'bu' and 'sx' - OK + Computing Levenshtein between strings 'is' and 'diy' - OK + Computing Levenshtein between strings 'fz' and 'unda' - OK + Computing Levenshtein between strings 'sem' and '' - OK + Computing Levenshtein between strings 'dbr' and 'o' - OK + Computing Levenshtein between strings 'dgj' and 'hk' - OK + Computing Levenshtein between strings 'ejb' and 'tfo' - OK + Computing Levenshtein between strings 'afa' and 'ygqo' - OK + Computing Levenshtein between strings 'lhcc' and '' - OK + Computing Levenshtein between strings 'uoiu' and 'u' - OK + Computing Levenshtein between strings 'tztt' and 'xo' - OK + Computing Levenshtein between strings 'ufsa' and 'mil' - OK + Computing Levenshtein between strings 'uuzl' and 'dzkr' - OK + +Computations in FHE + + Computing Levenshtein between strings '' and '' - OK in 1.29 seconds + Computing Levenshtein between strings '' and 'p' - OK in 0.26 seconds + Computing Levenshtein between strings '' and 'vv' - OK in 0.26 seconds + Computing Levenshtein between strings '' and 'mxg' - OK in 0.22 seconds + Computing Levenshtein between strings '' and 'iuxf' - OK in 0.22 seconds + Computing Levenshtein between strings 'k' and '' - OK in 0.22 seconds + Computing Levenshtein between strings 'p' and 'g' - OK in 1.09 seconds + Computing Levenshtein between strings 'v' and 'ky' - OK in 1.93 seconds + Computing Levenshtein between strings 'f' and 'uoq' - OK in 3.09 seconds + Computing Levenshtein between strings 'f' and 'kwfj' - OK in 3.98 seconds + Computing Levenshtein between strings 'ut' and '' - OK in 0.25 seconds + Computing Levenshtein between strings 'pa' and 'g' - OK in 1.90 seconds + Computing Levenshtein between strings 'bu' and 'sx' - OK in 3.52 seconds + Computing Levenshtein between strings 'is' and 'diy' - OK in 5.04 seconds + Computing Levenshtein between strings 'fz' and 'unda' - OK in 6.53 seconds + Computing Levenshtein between strings 'sem' and '' - OK in 0.22 seconds + Computing Levenshtein between strings 'dbr' and 'o' - OK in 2.78 seconds + Computing Levenshtein between strings 'dgj' and 'hk' - OK in 4.92 seconds + Computing Levenshtein between strings 'ejb' and 'tfo' - OK in 7.18 seconds + Computing Levenshtein between strings 'afa' and 'ygqo' - OK in 9.25 seconds + Computing Levenshtein between strings 'lhcc' and '' - OK in 0.22 seconds + Computing Levenshtein between strings 'uoiu' and 'u' - OK in 3.52 seconds + Computing Levenshtein between strings 'tztt' and 'xo' - OK in 6.45 seconds + Computing Levenshtein between strings 'ufsa' and 'mil' - OK in 9.11 seconds + Computing Levenshtein between strings 'uuzl' and 'dzkr' - OK in 12.01 seconds + +Successful end +``` + +- `python levenshtein_distance.py --autoperf`: Benchmark with random strings, for the different alphabets. + +``` + +Typical performances for alphabet ACTG, with string of maximal length: + + Computing Levenshtein between strings 'CGGA' and 'GCTA' - OK in 4.77 seconds + Computing Levenshtein between strings 'TTCC' and 'CAAG' - OK in 4.45 seconds + Computing Levenshtein between strings 'TGAG' and 'CATC' - OK in 4.38 seconds + +Typical performances for alphabet string, with string of maximal length: + + Computing Levenshtein between strings 'tsyl' and 'slTz' - OK in 13.76 seconds + Computing Levenshtein between strings 'rdfu' and 'qbam' - OK in 12.89 seconds + Computing Levenshtein between strings 'ngoz' and 'fxGw' - OK in 12.88 seconds + +Typical performances for alphabet STRING, with string of maximal length: + + Computing Levenshtein between strings 'OjgB' and 'snQc' - OK in 23.94 seconds + Computing Levenshtein between strings 'UXWO' and 'rVgF' - OK in 23.69 seconds + Computing Levenshtein between strings 'NsBT' and 'IFuC' - OK in 23.40 seconds + +Typical performances for alphabet StRiNg, with string of maximal length: + + Computing Levenshtein between strings 'ImNJ' and 'zyUB' - OK in 23.71 seconds + Computing Levenshtein between strings 'upAT' and 'XfWs' - OK in 23.52 seconds + Computing Levenshtein between strings 'HVXJ' and 'dQvr' - OK in 23.73 seconds + +Successful end + +``` + +## Complexity analysis + +Let's analyze a bit the complexity of the function `levenshtein_fhe` in FHE. We can see that the +function cannot apply `if`'s as in the clear function `levenshtein_clear`: it has to compute the two +branches (the one for the True, and the one for the False), and finally compute an `fhe.if_then_else` +of the two possible values. This slowdown is not specific to Concrete, it is by nature of FHE, where +encrypted conditions imply such a trick. + +Another interesting part is the impact of the choice of the alphabet: in `run`, we are going to +compare two chars of the alphabet, and return an encrypted boolean to code for the equality / inequality +of these two chars. This is basically done with a single programmable bootstrapping (PBS) of `w+1` +bits, where `w` is the floored log2 value of the number of chars in the alphabet. For example, for +the 'string' alphabet, which has 26 letters, `w = 5` and so we use a signed 6-bit value as input of a +table lookup. For the larger 'StRiNg' alphabet, that's a signed 7-bit PBS. For small DNA alphabet 'ACTG', +it's only signed 3-bit PBS. + +## Benchmarks on hpc7a + +The benchmarks were done using Concrete 2.7 on `hpc7a` machine on AWS, and give: + +``` + +Typical performances for alphabet ACTG, with string of maximal length: + + Computing Levenshtein between strings 'CGGA' and 'GCTA' - OK in 4.77 seconds + Computing Levenshtein between strings 'TTCC' and 'CAAG' - OK in 4.45 seconds + Computing Levenshtein between strings 'TGAG' and 'CATC' - OK in 4.38 seconds + +Typical performances for alphabet string, with string of maximal length: + + Computing Levenshtein between strings 'tsyl' and 'slTz' - OK in 13.76 seconds + Computing Levenshtein between strings 'rdfu' and 'qbam' - OK in 12.89 seconds + Computing Levenshtein between strings 'ngoz' and 'fxGw' - OK in 12.88 seconds + +Typical performances for alphabet STRING, with string of maximal length: + + Computing Levenshtein between strings 'OjgB' and 'snQc' - OK in 23.94 seconds + Computing Levenshtein between strings 'UXWO' and 'rVgF' - OK in 23.69 seconds + Computing Levenshtein between strings 'NsBT' and 'IFuC' - OK in 23.40 seconds + +Typical performances for alphabet StRiNg, with string of maximal length: + + Computing Levenshtein between strings 'ImNJ' and 'zyUB' - OK in 23.71 seconds + Computing Levenshtein between strings 'upAT' and 'XfWs' - OK in 23.52 seconds + Computing Levenshtein between strings 'HVXJ' and 'dQvr' - OK in 23.73 seconds + +Successful end +``` diff --git a/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py b/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py new file mode 100644 index 0000000000..6dc57fd73f --- /dev/null +++ b/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.py @@ -0,0 +1,442 @@ +# Computing Levenstein distance between strings, https://en.wikipedia.org/wiki/Levenshtein_distance + +import time +import argparse +import random +from functools import lru_cache + +import numpy + +from concrete import fhe + + +class Alphabet: + + letters: str = None + mapping_to_int: dict = {} + + @staticmethod + def lowercase(): + """Set lower case alphabet.""" + return Alphabet("abcdefghijklmnopqrstuvwxyz") + + @staticmethod + def uppercase(): + """Set upper case alphabet.""" + return Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + + @staticmethod + def anycase(): + """Set any-case alphabet.""" + return Alphabet.lowercase() + Alphabet.uppercase() + + @staticmethod + def dna(): + """Set DNA alphabet.""" + return Alphabet("ATGC") + + def __init__(self, letters: str): + self.letters = letters + + for i, c in enumerate(self.letters): + self.mapping_to_int[c] = i + + def __add__(self, other: "Alphabet") -> "Alphabet": + return Alphabet(self.letters + other.letters) + + def return_available_alphabets() -> list: + """Return available alphabets.""" + return ["string", "STRING", "StRiNg", "ACTG"] + + @staticmethod + def init_by_name(alphabet_name: str) -> "Alphabet": + """Set the alphabet.""" + assert ( + alphabet_name in Alphabet.return_available_alphabets() + ), f"Unknown alphabet {alphabet_name}" + + if alphabet_name == "string": + return Alphabet.lowercase() + if alphabet_name == "STRING": + return Alphabet.uppercase() + if alphabet_name == "StRiNg": + return Alphabet.anycase() + if alphabet_name == "ACTG": + return Alphabet.dna() + + def random_pick_in_values(self) -> int: + """Pick the integer-encoding of a random char in an alphabet.""" + return numpy.random.randint(len(self.mapping_to_int)) + + def _random_string(self, length: int) -> str: + """Pick a random string in the alphabet.""" + return "".join([random.choice(list(self.mapping_to_int)) for _ in range(length)]) + + def prepare_random_patterns(self, len_min: int, len_max: int, nb_strings: int) -> list: + """Prepare random patterns of different lengths.""" + assert len(self.mapping_to_int) > 0, "Mapping not defined" + + list_patterns = [] + for _ in range(nb_strings): + for length_1 in range(len_min, len_max + 1): + for length_2 in range(len_min, len_max + 1): + list_patterns += [ + ( + self._random_string(length_1), + self._random_string(length_2), + ) + for _ in range(1) + ] + + return list_patterns + + def encode(self, string: str) -> tuple: + """Encode a string, ie map it to integers using the alphabet.""" + + assert len(self.mapping_to_int) > 0, "Mapping not defined" + + for si in string: + if si not in self.mapping_to_int: + raise ValueError( + f"Char {si} of {string} is not in alphabet {list(self.mapping_to_int.keys())}, please choose the right --alphabet" + ) + + return tuple([self.mapping_to_int[si] for si in string]) + + +class LevenshteinDistance: + alphabet: Alphabet + module: fhe.module + + def __init__(self, alphabet: Alphabet, args): + self.alphabet = alphabet + + self._compile_module(args) + + def calculate(self, a: str, b: str, mode: str, show_distance: bool = False): + """Compute a distance between two strings, either in fhe or in simulate.""" + if mode == "simulate": + self._compute_in_simulation([(a, b)]) + else: + assert mode == "fhe", "Only 'simulate' and 'fhe' mode are available" + self._compute_in_fhe([(a, b)], show_distance=show_distance) + + def calculate_list(self, l: list, mode: str): + """Compute a distance between strings of a list, either in fhe or in simulate.""" + for (a, b) in l: + self.calculate(a, b, mode) + + def _encode_and_encrypt_strings(self, a: str, b: str) -> tuple: + """Encode a string, ie map it to integers using the alphabet, and then encrypt the integers.""" + a_as_int = self.alphabet.encode(a) + b_as_int = self.alphabet.encode(b) + + a_enc = tuple(self.module.equal.encrypt(ai, None)[0] for ai in a_as_int) + b_enc = tuple(self.module.equal.encrypt(None, bi)[1] for bi in b_as_int) + + return a_enc, b_enc + + def _compile_module(self, args): + """Compile the FHE module.""" + assert len(self.alphabet.mapping_to_int) > 0, "Mapping not defined" + + inputset_equal = [ + ( + self.alphabet.random_pick_in_values(), + self.alphabet.random_pick_in_values(), + ) + for _ in range(1000) + ] + inputset_mix = [ + ( + numpy.random.randint(2), + numpy.random.randint(args.max_string_length), + numpy.random.randint(args.max_string_length), + numpy.random.randint(args.max_string_length), + numpy.random.randint(args.max_string_length), + ) + for _ in range(1000) + ] + + self.module = LevenshsteinModule.compile( + { + "equal": inputset_equal, + "mix": inputset_mix, + "constant": [i for i in range(len(self.alphabet.mapping_to_int))], + }, + show_mlir=args.show_mlir, + p_error=10**-20, + show_optimizer=args.show_optimizer, + comparison_strategy_preference=fhe.ComparisonStrategy.ONE_TLU_PROMOTED, + min_max_strategy_preference=fhe.MinMaxStrategy.ONE_TLU_PROMOTED, + ) + + def _compute_in_simulation(self, list_patterns: list): + """Check equality between distance in simulation and clear distance.""" + for a, b in list_patterns: + + print(f" Computing Levenshtein between strings '{a}' and '{b}'", end="") + + a_as_int = self.alphabet.encode(a) + b_as_int = self.alphabet.encode(b) + + l1_simulate = levenshtein_simulate(self.module, a_as_int, b_as_int) + l1_clear = levenshtein_clear(a_as_int, b_as_int) + + assert l1_simulate == l1_clear, f" {l1_simulate=} and {l1_clear=} are different" + print(" - OK") + + def _compute_in_fhe(self, list_patterns: list, show_distance: bool = False): + """Check equality between distance in FHE and clear distance.""" + self.module.keygen() + + # Checks in FHE + for a, b in list_patterns: + + print(f" Computing Levenshtein between strings '{a}' and '{b}'", end="") + + a_enc, b_enc = self._encode_and_encrypt_strings(a, b) + + time_begin = time.time() + l1_fhe_enc = levenshtein_fhe(self.module, a_enc, b_enc) + time_end = time.time() + + l1_fhe = self.module.mix.decrypt(l1_fhe_enc) + + l1_clear = levenshtein_clear(a, b) + + assert l1_fhe == l1_clear, f" {l1_fhe=} and {l1_clear=} are different" + + if not show_distance: + print(f" - OK in {time_end - time_begin:.2f} seconds") + else: + print(f" - distance is {l1_fhe}, computed in {time_end - time_begin:.2f} seconds") + + +# Module FHE +@fhe.module() +class LevenshsteinModule: + @fhe.function({"x": "encrypted", "y": "encrypted"}) + def equal(x, y): + """Assert equality between two chars of the alphabet.""" + return x == y + + @fhe.function({"x": "clear"}) + def constant(x): + return fhe.zero() + x + + @fhe.function( + { + "is_equal": "encrypted", + "if_equal": "encrypted", + "case_1": "encrypted", + "case_2": "encrypted", + "case_3": "encrypted", + } + ) + def mix(is_equal, if_equal, case_1, case_2, case_3): + """Compute the min of (case_1, case_2, case_3), and then return `if_equal` if `is_equal` is + True, or the min in the other case.""" + min_12 = numpy.minimum(case_1, case_2) + min_123 = numpy.minimum(min_12, case_3) + + return fhe.if_then_else(is_equal, if_equal, 1 + min_123) + + # There is a single output in mix: it can go to + # - input 1 of mix + # - input 2 of mix + # - input 3 of mix + # - input 4 of mix + # or just be the final output + # + # There is a single output of equal, it goes to input 0 of mix + composition = fhe.Wired( + [ + fhe.Wire(fhe.AllOutputs(equal), fhe.Input(mix, 0)), + fhe.Wire(fhe.AllOutputs(mix), fhe.Input(mix, 1)), + fhe.Wire(fhe.AllOutputs(mix), fhe.Input(mix, 2)), + fhe.Wire(fhe.AllOutputs(mix), fhe.Input(mix, 3)), + fhe.Wire(fhe.AllOutputs(mix), fhe.Input(mix, 4)), + fhe.Wire(fhe.AllOutputs(constant), fhe.Input(mix, 1)), + fhe.Wire(fhe.AllOutputs(constant), fhe.Input(mix, 2)), + fhe.Wire(fhe.AllOutputs(constant), fhe.Input(mix, 3)), + fhe.Wire(fhe.AllOutputs(constant), fhe.Input(mix, 4)), + ] + ) + + +@lru_cache +def levenshtein_clear(x: str, y: str): + """Compute the distance in clear, for reference and comparison.""" + if len(x) == 0: + return len(y) + if len(y) == 0: + return len(x) + + if x[0] == y[0]: + return levenshtein_clear(x[1:], y[1:]) + + case_1 = levenshtein_clear(x[1:], y) + case_2 = levenshtein_clear(x, y[1:]) + case_3 = levenshtein_clear(x[1:], y[1:]) + + return 1 + min(case_1, case_2, case_3) + + +@lru_cache +def levenshtein_simulate(module: fhe.module, x: str, y: str): + """Compute the distance in simulation.""" + if len(x) == 0: + return len(y) + if len(y) == 0: + return len(x) + + if_equal = levenshtein_simulate(module, x[1:], y[1:]) + case_1 = levenshtein_simulate(module, x[1:], y) + case_2 = levenshtein_simulate(module, x, y[1:]) + case_3 = if_equal + + is_equal = module.equal(x[0], y[0]) + returned_value = module.mix(is_equal, if_equal, case_1, case_2, case_3) + + return returned_value + + +@lru_cache +def levenshtein_fhe(module: fhe.module, x: str, y: str): + """Compute the distance in FHE.""" + if len(x) == 0: + return module.constant.run(module.constant.encrypt(len(y))) + if len(y) == 0: + return module.constant.run(module.constant.encrypt(len(x))) + + if_equal = levenshtein_fhe(module, x[1:], y[1:]) + case_1 = levenshtein_fhe(module, x[1:], y) + case_2 = levenshtein_fhe(module, x, y[1:]) + case_3 = if_equal + + is_equal = module.equal.run(x[0], y[0]) + returned_value = module.mix.run(is_equal, if_equal, case_1, case_2, case_3) + + return returned_value + + +def manage_args(): + """Manage user arguments.""" + parser = argparse.ArgumentParser(description="Levenshtein distance in Concrete.") + parser.add_argument( + "--show_mlir", + dest="show_mlir", + action="store_true", + help="Show the MLIR", + ) + parser.add_argument( + "--show_optimizer", + dest="show_optimizer", + action="store_true", + help="Show the optimizer outputs", + ) + parser.add_argument( + "--autotest", + dest="autotest", + action="store_true", + help="Run random tests", + ) + parser.add_argument( + "--autoperf", + dest="autoperf", + action="store_true", + help="Run benchmarks", + ) + parser.add_argument( + "--distance", + dest="distance", + nargs=2, + type=str, + action="store", + help="Compute a distance", + ) + parser.add_argument( + "--alphabet", + dest="alphabet", + choices=Alphabet.return_available_alphabets(), + default="string", + help="Setting the alphabet", + ) + parser.add_argument( + "--max_string_length", + dest="max_string_length", + type=int, + default=4, + help="Setting the maximal size of strings", + ) + args = parser.parse_args() + + # At least one option + assert ( + args.autoperf + args.autotest + (args.distance != None) > 0 + ), "must activate one option --autoperf or --autotest or --distance" + + return args + + +def main(): + """Main function.""" + print() + + # Options by the user + args = manage_args() + + # Do what the user requested + if args.autotest: + + alphabet = Alphabet.init_by_name(args.alphabet) + levenshtein_distance = LevenshteinDistance(alphabet, args) + + print(f"Making random tests with alphabet {args.alphabet}") + print(f"Letters are {alphabet.letters}\n") + + list_patterns = alphabet.prepare_random_patterns(0, args.max_string_length, 1) + print("Computations in simulation\n") + levenshtein_distance.calculate_list(list_patterns, mode="simulate") + print("\nComputations in FHE\n") + levenshtein_distance.calculate_list(list_patterns, mode="fhe") + print("") + + if args.autoperf: + for alphabet_name in ["ACTG", "string", "STRING", "StRiNg"]: + print( + f"Typical performances for alphabet {alphabet_name}, with string of maximal length:\n" + ) + + alphabet = Alphabet.init_by_name(alphabet_name) + levenshtein_distance = LevenshteinDistance(alphabet, args) + list_patterns = alphabet.prepare_random_patterns( + args.max_string_length, args.max_string_length, 3 + ) + levenshtein_distance.calculate_list(list_patterns, mode="fhe") + print("") + + if args.distance != None: + print( + f"Running distance between strings '{args.distance[0]}' and '{args.distance[1]}' for alphabet {args.alphabet}:\n" + ) + + if max(len(args.distance[0]), len(args.distance[1])) > args.max_string_length: + args.max_string_length = max(len(args.distance[0]), len(args.distance[1])) + print( + "Warning, --max_string_length was smaller than lengths of the input strings, fixing it" + ) + + alphabet = Alphabet.init_by_name(args.alphabet) + levenshtein_distance = LevenshteinDistance(alphabet, args) + levenshtein_distance.calculate( + args.distance[0], args.distance[1], mode="fhe", show_distance=True + ) + print("") + + print("Successful end\n") + + +if __name__ == "__main__": + main()