From b98ad394789219181709edf9f60716b8e3a185b1 Mon Sep 17 00:00:00 2001 From: Benoit Chevallier-Mames Date: Mon, 22 Jul 2024 15:58:02 +0200 Subject: [PATCH] docs(frontend): moving tutorials in the same directory --- docs/application-tutorial/sha256.ipynb | 763 ---------------- docs/tutorials/see-all-tutorials.md | 12 +- .../{game_of_life.md => README.md} | 0 .../dynamic-size.py | 0 .../key_value_database.ipynb | 105 +-- .../static-size.py | 0 .../{levenshtein_distance.md => README.md} | 0 .../examples/sha1/{sha1.md => README.md} | 0 .../examples/sha256/sha256.ipynb | 844 ++++++++++++++++++ .../{xor_distance.md => README.md} | 0 10 files changed, 897 insertions(+), 827 deletions(-) delete mode 100644 docs/application-tutorial/sha256.ipynb rename frontends/concrete-python/examples/game_of_life/{game_of_life.md => README.md} (100%) rename frontends/concrete-python/examples/{key-value-database => key_value_database}/dynamic-size.py (100%) rename {docs/application-tutorial => frontends/concrete-python/examples/key_value_database}/key_value_database.ipynb (93%) rename frontends/concrete-python/examples/{key-value-database => key_value_database}/static-size.py (100%) rename frontends/concrete-python/examples/levenshtein_distance/{levenshtein_distance.md => README.md} (100%) rename frontends/concrete-python/examples/sha1/{sha1.md => README.md} (100%) create mode 100644 frontends/concrete-python/examples/sha256/sha256.ipynb rename frontends/concrete-python/examples/xor_distance/{xor_distance.md => README.md} (100%) diff --git a/docs/application-tutorial/sha256.ipynb b/docs/application-tutorial/sha256.ipynb deleted file mode 100644 index c0b84aaead..0000000000 --- a/docs/application-tutorial/sha256.ipynb +++ /dev/null @@ -1,763 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "_FTzVxUkjQno" - }, - "source": [ - "# SHA-256 Implementation Using Concrete\n", - "\n", - "In this tutorial, we will explore the implementation of SHA-256, a widely used hashing algorithm, using concrete-python. Details about the algorithm can be found [here](https://en.wikipedia.org/wiki/SHA-2).\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zXozpJvmcBH1", - "outputId": "79dfc00b-10cc-4ffd-d4b9-a10f18d8d01e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: concrete-python in /usr/local/lib/python3.10/dist-packages (1.0.0)\n", - "Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.10/dist-packages (from concrete-python) (1.24.3)\n", - "Requirement already satisfied: scipy>=1.10 in /usr/local/lib/python3.10/dist-packages (from concrete-python) (1.10.1)\n", - "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.10/dist-packages (from concrete-python) (2.0.0+cu118)\n", - "Requirement already satisfied: networkx>=2.6 in /usr/local/lib/python3.10/dist-packages (from concrete-python) (3.1)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (4.5.0)\n", - "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (2.0.0)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (1.11.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (3.1.2)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (3.12.0)\n", - "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13->concrete-python) (3.25.2)\n", - "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13->concrete-python) (16.0.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13->concrete-python) (2.1.2)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13->concrete-python) (1.3.0)\n" - ] - } - ], - "source": [ - "# Uncomment this line to install dependency\n", - "# ! pip install concrete-python\n", - "\n", - "# Required libraries\n", - "from concrete import fhe\n", - "import numpy as np" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "oCfjYazikbm_" - }, - "source": [ - "## Data Representation\n", - "As mentioned in the wiki page, all variables are $32$-bit unsigned integers. Additions should be calculated modulo $2^{32}$.\n", - "\n", - "While addition of 32-bit numbers are possible in the library, any other operations such modulizing, rotations, and bitwise operations are currently not possible. These operations require a lookup table with 32-bit inputs, but as of writing this tutorial, concrete-python supports up to 16-bit lookup tables. Higher precision lookup tables is still a research challenge in the homomorphic world and such a table would be dificult to compile and store at this moment.\n", - "\n", - "Thus, we need to break all the variables to **chunks** and work at the chunk level. Throughtout the code, *WIDTH* refers to the bitwidth of a chunk, and *NUM_CHUNKS* shows the number of chunks we need to represent a 32-bit data. These parameters are set at the begining of the code. We vary these parameters to see the impact of the *WIDTH* on the performance of the compiler and the circuit.\n", - "\n", - "![chunks.jpg]()\n", - "\n", - "Figure 1: Shows a break down of 32 bit of data into 4 chunks of 8 bit. This is not the only way to chunk the input." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "yaz8cNzjQ1UW" - }, - "outputs": [], - "source": [ - "# Bitwidth of each chunk and number of chunks in each 32-bit number.\n", - "WIDTH, NUM_CHUNKS= 4, 8\n", - "\n", - "## Some other valid parameter sets\n", - "# WIDTH, NUM_CHUNKS= 8, 4\n", - "# WIDTH, NUM_CHUNKS= 2, 16\n", - "\n", - "assert (WIDTH * NUM_CHUNKS == 32)\n", - "\n", - "def break_down_data(data, data_size):\n", - " all_chunks = [\n", - " [ (x >> i*WIDTH)%(2**WIDTH) for i in range(data_size//WIDTH)[::-1] ]\n", - " for x in data\n", - " ]\n", - " return all_chunks\n", - "\n", - "def reshape_data(data):\n", - " return np.array(data).reshape(-1, NUM_CHUNKS)\n", - "\n", - "def chunks_to_uint32(chunks):\n", - " return int(sum([2**((NUM_CHUNKS-1-i)*WIDTH)*x for i, x in enumerate(chunks)]))\n", - "\n", - "def chunks_to_hexarray(chunks):\n", - " hexes = [hex(chunks_to_uint32(word))[2:] for word in chunks]\n", - " hexes = ['0'*(8-len(y))+y for y in hexes] #Appending leadning zero to the ones that are less than 8 characters TODO: write better\n", - " result = \"\".join(hexes)\n", - " return result\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "u7pA-B3As9u4" - }, - "source": [ - "### Creating Chunks\n", - "There are two list of constants in the algorithm, K and H. Before executing the algorithm, we need to break them to chunks using `split_to_chunks` function.\n", - "\n", - "\n", - "The input of the algorithm is arbitrary bytes. We might need to break each byte to smaller chunks based on the value of *WIDTH* after padding the data as per instructed by the algorithm. `break_down_data` function returns a numpy array of shape (48,NUM_CHUNKS)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "b8rlvVf42CIa" - }, - "source": [ - "## Operations\n", - "Now that the data is stores as chunks, we must modify all operations we need to work at the level of chunks. In this section we explain how we implemented the required operations. The main three category of operations that we need to implement SHA-256 are:\n", - "\n", - "* Bitwise operations (AND, OR, XOR, NEGATE)\n", - "* Shifts and Rotations\n", - "* Modular Addition " - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "zlM1RN-NnjDn" - }, - "source": [ - "### Bitwise Operations\n", - "Bitwise operations are easily implemented in concrete-numpy. A bitwise operation over a 32-bit number is equivalent to the same operation over the chunks." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "CxCwJOao2KCt" - }, - "source": [ - "### Rotation and Shifts\n", - "To understand how rotations work, consider a small example with 4 chunks of width 4, representing a 16-bit number, as shown in Figure 1. Most significant bits are located at index 0. So a 16-bit number will be `[[chunk_0], [chunk_1], [chunk_2], [chunk_3]]` with WIDTH=4. There are two possible scenario for rotations:\n", - "\n", - "1. Any rotation by a multiple of WIDTH (in this case, 4) will result in rotating the array of chunks. For example, right rotate(4) will be `[[chunk_3], [chunk_0], [chunk_1], [chunk_2]]`.\n", - "\n", - "2. For rotations less than WIDTH, for example `y`, we break every chunk into two parts of bitlength, `WIDTH-y` and `y`. We need to add the low `y`-bits of each chunk with the high `WIDTH-y` bits of the next chunk. Figure 2 illustrated this process. We leverage two lookup tables to extract the two segments of each chunk.\n", - "\n", - "\n", - "3. Rotations by other amounts are broken into the two steps described above.\n", - "\n", - "![Rotation.jpg]()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "575ogsJhFDIo" - }, - "source": [ - "### Shift\n", - "The shift operation is the same as rotation, but we prepend the encrypted scalar zero when we move the bits to the right." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "TRAFRZime-Jv" - }, - "outputs": [], - "source": [ - "def right_rotate_list_of_chunks(list_to_rotate, amount):\n", - " return np.concatenate((\n", - " list_to_rotate[-amount:],\n", - " list_to_rotate[:-amount]\n", - " ))\n", - " \n", - "def right_shift_list_of_chunks(list_to_rotate, amount):\n", - " return np.concatenate((\n", - " [0] * list_to_rotate[-amount:].shape[0] ,\n", - " list_to_rotate[:- amount]\n", - " ))\n", - " \n", - "def left_shift_list_of_chunks(list_to_rotate, amount):\n", - " return np.concatenate((\n", - " list_to_rotate[amount:] ,\n", - " [0] * list_to_rotate[:amount].shape[0]\n", - " ))\n", - "\n", - "def rotate_less_than_width(chunks, shift):\n", - " raised_low_bits = fhe.univariate(lambda x: (x % 2**shift) << (WIDTH-shift))(chunks)\n", - " shifted_raised_low_bits = right_rotate_list_of_chunks(raised_low_bits, 1)\n", - "\n", - " high_bits = chunks >> shift\n", - " return shifted_raised_low_bits + high_bits\n", - "\n", - "def right_rotate(chunks, rotate_amount):\n", - " x = rotate_amount // WIDTH\n", - " y = rotate_amount % WIDTH\n", - " if x != 0: \n", - " rotated_chunks = right_rotate_list_of_chunks(chunks, x)\n", - " else:\n", - " rotated_chunks = chunks\n", - " if y != 0:\n", - " rotated = rotate_less_than_width(rotated_chunks, y)\n", - " else:\n", - " rotated = rotated_chunks\n", - "\n", - " return rotated\n", - "\n", - "def right_shift(chunks, shift_amount):\n", - " x = shift_amount // WIDTH\n", - " y = shift_amount % WIDTH\n", - " if x != 0:\n", - " shifted_chunks = right_shift_list_of_chunks(chunks, x)\n", - " else:\n", - " shifted_chunks = chunks\n", - " if y != 0:\n", - " # shift within chunks\n", - " raised_low_bits = fhe.univariate(lambda x: (x % 2**y) << (WIDTH-y))(shifted_chunks)\n", - " shifted_raised_low_bits = right_shift_list_of_chunks(raised_low_bits, 1)\n", - " high_bits = shifted_chunks >> y\n", - " result = shifted_raised_low_bits + high_bits\n", - " else:\n", - " result = shifted_chunks\n", - " return result" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "SKg8mKFOPXSV" - }, - "source": [ - "### Modular 32-bit Addition\n", - "Modular 32-bit addition is frequently used in SHA256. While Concrete supports additions of 32-bit numbers, modulizing the result requires a lookup table which is too large for Concrete. Hence, the addition must be done over chunks.\n", - "\n", - "Below is the function to add two 32-bit numbers mod $2^{32}$." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "EJEPvp2wQms9" - }, - "outputs": [], - "source": [ - "def add_two_32_bits(a,b):\n", - " added = np.sum([a,b], axis=0)\n", - "\n", - " for i in range(NUM_CHUNKS):\n", - " results = added % (2 ** WIDTH)\n", - " if i < NUM_CHUNKS-1:\n", - " carries = added >> WIDTH\n", - " added = left_shift_list_of_chunks(carries, 1) + results\n", - "\n", - " return results\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "Uo6o_QMQn_fw" - }, - "outputs": [], - "source": [ - "# Testing the addition function, adding four 32-bit numbers\n", - "test_inputs = np.random.randint(0,2**32, size=(2,))\n", - "input_chunks = break_down_data(test_inputs, 32)\n", - "\n", - "assert(chunks_to_uint32(add_two_32_bits(input_chunks[0], input_chunks[1]))== np.sum(test_inputs) % (2**32))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "IOr8DTRJRYTl" - }, - "source": [ - "Adding two 4-bit numbers results in a 5-bit number. We then use two lookup tables:\n", - "\n", - "* `extract_carry` which extracts the carry of adding two chunks\n", - "* `extract_result` which extracts the 4-bit chunk which results from adding two chunks (without the carry)\n", - "\n", - "Each carry must now be added to the chunk next chunk and this process is repeated for as many chunks as there are. The figure below illustrates this process.\n", - "\n", - "![add-chunks.png]()\n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "4-dQk0wbtPOe" - }, - "source": [ - "The benefit of this addition algorithm is that it can be extended to the case where more two 32-bit numbers are added. The only difference is that the carry from the first iteration of the loop can be larger than 1. Specifically, by adding $k$ 4-bit numbers, the carry can be as big as $\\log_2 k$. For correctness, $\\log_2 k$ must be less than 4 or $k<16$.\n", - "\n", - "In our implementation of SHA-256, we only have two input and four input additions, so we only implement those.\n", - "\n", - "For four input addition, he first iteration of the loop, we use a different lookup table that extract a 2-bit carry and rest of the chunk. The rest of the algorithm does not change." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "obO8wHRbXHfj" - }, - "outputs": [], - "source": [ - "def add_four_32_bits(a,b,c,d):\n", - " added = np.sum([a,b,c,d], axis=0)\n", - " \n", - " # First iteration of the loop is seperated\n", - " carries = added >> WIDTH\n", - " results = added % (2**WIDTH)\n", - " shifted_carries = left_shift_list_of_chunks(carries, 1)\n", - " added = shifted_carries + results\n", - "\n", - " for i in range(1,NUM_CHUNKS):\n", - " results = added % (2**WIDTH)\n", - " \n", - " # In the last iteration, carries need not be calculated\n", - " if i != NUM_CHUNKS-1: \n", - " carries = added >> WIDTH\n", - " shifted_carries = left_shift_list_of_chunks(carries, 1)\n", - " added = shifted_carries + results\n", - "\n", - " return results" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "zcwnDdPFdqE1" - }, - "outputs": [], - "source": [ - "# Testing the addition function, adding four 32-bit numbers\n", - "\n", - "for _ in range(1000):\n", - " test_inputs = np.random.randint(0,2**32, size=(4,))\n", - " input_chunks = break_down_data(test_inputs, 32)\n", - "\n", - " assert(chunks_to_uint32(add_four_32_bits(input_chunks[0], input_chunks[1], input_chunks[2], input_chunks[3]))== np.sum(test_inputs) % (2**32))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "1g6eEdhGoJl9" - }, - "source": [ - "## Operations for SHA-256\n", - "\n", - "Using the basic operations from the previous section, we can now implement all the necessary functions for SHA256" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "uo7HfO1DpVFK" - }, - "outputs": [], - "source": [ - "# Used in the expansion\n", - "\n", - "def s0(w):\n", - " return right_rotate(w, 7) ^ right_rotate(w, 18) ^ right_shift(w, 3)\n", - "\n", - "def s1(w):\n", - " return right_rotate(w, 17) ^ right_rotate(w, 19) ^ right_shift(w, 10)\n", - "\n", - "# Used in main loop\n", - "\n", - "def S0(a_word):\n", - " return right_rotate(a_word, 2) ^ right_rotate(a_word, 13) ^ right_rotate(a_word, 22)\n", - "\n", - "def S1(e_word):\n", - " return right_rotate(e_word, 6) ^ right_rotate(e_word, 11) ^ right_rotate(e_word, 25)\n", - "\n", - "def Ch(e_word, f_word, g_word):\n", - " return (e_word & f_word) ^ ((2**WIDTH-1 - e_word) & g_word)\n", - "\n", - "def Maj(a_word, b_word, c_word):\n", - " return (a_word & b_word) ^ (a_word & c_word) ^ (b_word & c_word)\n", - "\n", - "def main_loop(args, w_i_plus_k_i):\n", - " a, b, c, d, e, f, g, h = args\n", - " temp1 = add_four_32_bits(h,S1(e),Ch(e, f, g), w_i_plus_k_i)\n", - " temp2 = add_two_32_bits(S0(a), Maj(a, b, c))\n", - " new_a = add_two_32_bits(temp1, temp2)\n", - " new_e = add_two_32_bits(d, temp1)\n", - " return np.array([new_a, a, b, c, new_e, e, f, g])" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "biM997KmvwUL" - }, - "source": [ - "We also need a function to pad the input as the first step of SHA256." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "fZ-3sEH5vopA" - }, - "source": [ - "Moreover, we need a function to parse the input given to the program. The input is given as bytes, but the chunks might be smaller. We extract smaller chunks from bytes using lookup tables." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "L4leg-z_skkU" - }, - "source": [ - "## Bringing it all together\n", - "Using all the components from the above, we can implement SHA256 as shown below." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "tmSfvdpyrwUx" - }, - "outputs": [], - "source": [ - "K = [\n", - " 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,\n", - " 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,\n", - " 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,\n", - " 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,\n", - " 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,\n", - " 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,\n", - " 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,\n", - " 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2\n", - "]\n", - "H = [0x6a09e667,0xbb67ae85,0x3c6ef372,0xa54ff53a,0x510e527f,0x9b05688c,0x1f83d9ab,0x5be0cd19]" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "NHGCiC-Gk_tw" - }, - "outputs": [], - "source": [ - "k_in = reshape_data(break_down_data(K, 32))\n", - "h_in = reshape_data(break_down_data(H, 32))" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "yTiMkmBsHmKy" - }, - "outputs": [], - "source": [ - "def uint64_to_bin(uint64 : int):\n", - " return (\"\".join([str(uint64 >> i & 1) for i in range(63, -1, -1)]))\n", - "\n", - "def sha256_preprocess(text):\n", - " \"\"\"\n", - " Takes a message of arbitrary length and returns a message\n", - " of length that is a multiple of 512 bits, with the original message padded\n", - " with a 1 bit, followed by 0 bits, followed by the original message length\n", - " in bits\n", - " \"\"\"\n", - " data = text\n", - " # convert to uint4 and group into 32 bit words (8 uint4s)\n", - " # #log (\"data is:\", data, data.shape)\n", - " message_len = data.shape[0] * 8 # denoted as 'l' in spec\n", - " # find padding length 'k'\n", - " k = (((448 - 1 - message_len) % 512) + 512) % 512 \n", - " # #log (\"k is:\", k)\n", - " zero_pad_width_in_bits = k\n", - " padstring = \"1\" + \"0\" * zero_pad_width_in_bits + str(uint64_to_bin(message_len))\n", - " #log (\"padstring size:\", len(padstring))\n", - " #log (\"padstring is:\", padstring)\n", - "\n", - " total_size = len(padstring) + message_len\n", - " #log (\"total size:\", total_size)\n", - " assert total_size % 512 == 0\n", - "\n", - " pad = np.array([int(padstring[i:i+8], 2) for i in range(0, len(padstring), 8)], dtype=np.uint8)\n", - " padded = np.concatenate((data, pad))\n", - " words = break_down_data(padded, 8)\n", - " chunks = reshape_data(words)\n", - " return chunks" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "3ox6Zs-ysoLr" - }, - "outputs": [], - "source": [ - "# Number of rounds must be 64 to have correct SHA256\n", - "# If looking to get a faster run, reduce the number of rounds (but it will not be correct)\n", - "\n", - "def sha256(data, number_of_rounds=64):\n", - " h_chunks = fhe.zeros((len(h_in), NUM_CHUNKS))\n", - " k_chunks = fhe.zeros((len(k_in), NUM_CHUNKS))\n", - " h_chunks += h_in\n", - " k_chunks += k_in\n", - "\n", - " num_of_iters = data.shape[0]*32//512\n", - " for chunk_iter in range(0, num_of_iters):\n", - " \n", - " # Initializing the variables\n", - " chunk = data[chunk_iter*16:(chunk_iter+1)*16]\n", - " w = [None for _ in range(number_of_rounds)]\n", - " # Starting the main loop and expansion\n", - " working_vars = h_chunks\n", - " for j in range(0, number_of_rounds):\n", - " if j<16:\n", - " w[j] = chunk[j]\n", - " else:\n", - " w[j] = add_four_32_bits(w[j-16], s0(w[j-15]), w[j-7], s1(w[j-2]))\n", - " w_i_k_i = add_two_32_bits(w[j], k_chunks[j])\n", - " working_vars = main_loop(working_vars,w_i_k_i)\n", - " \n", - " # Accumulating the results\n", - " for j in range(8):\n", - " h_chunks[j] = add_two_32_bits(h_chunks[j], working_vars[j])\n", - " return h_chunks" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "w89rhSOh4In2" - }, - "source": [ - "We can test the correctness of this function as below (this is not in encrypted form yet)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "006LZp7c0yBA", - "outputId": "31588127-23e9-4b49-e481-d14842e336e7" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " SHA256: a412c46b0be134c593b0ad520d4a4c4e1d8aecca799be0be2c4d233ccf455cb7\n", - "Our SHA256: a412c46b0be134c593b0ad520d4a4c4e1d8aecca799be0be2c4d233ccf455cb7\n", - "Match: True\n" - ] - } - ], - "source": [ - "import hashlib\n", - "text = (\n", - " b\"Lorem ipsum dolor sit amet, consectetur adipiscing elit. \"\n", - " b\"Curabitur bibendum, urna eu bibendum egestas, neque augue eleifend odio, et sagittis viverra. and more than 150\"\n", - ")\n", - "\n", - "result = sha256(sha256_preprocess(np.frombuffer(text, dtype=np.uint8)))\n", - "\n", - "m = hashlib.sha256()\n", - "m.update(text)\n", - "\n", - "print(\" SHA256:\", m.hexdigest())\n", - "print(\"Our SHA256:\", chunks_to_hexarray(result))\n", - "print(\"Match:\", chunks_to_hexarray(result)==m.hexdigest())" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "1uHN9GXgla_z" - }, - "outputs": [], - "source": [ - "class HomomorphicSHA:\n", - " circuit: fhe.Circuit\n", - " def __init__(self, input_size_in_bytes=150, number_of_rounds=64) -> None:\n", - " self.input_size_in_bytes=input_size_in_bytes\n", - " assert 0 <= number_of_rounds <= 64, \"Number of rounds must be betweem zero and 64\"\n", - " self.number_of_rounds=number_of_rounds\n", - " inputset=[\n", - " sha256_preprocess(np.random.randint(0, 2**8, size=(input_size_in_bytes,)))\n", - " for _ in range(100)\n", - " ]\n", - " # Compilation of the circuit should take a few minutes\n", - " compiler = fhe.Compiler(lambda data: sha256(data, self.number_of_rounds), {\"data\": \"encrypted\"})\n", - " self.circuit = compiler.compile(\n", - " inputset=inputset,\n", - " configuration=fhe.Configuration(\n", - " enable_unsafe_features=True,\n", - " use_insecure_key_cache=True,\n", - " insecure_key_cache_location=\".keys\",\n", - " dataflow_parallelize=True,\n", - " ),\n", - " verbose=False,\n", - " )\n", - " \n", - " def getSHA(self, data):\n", - " assert len(data) == self.input_size_in_bytes, f\"Input size is not correct, should be {self.input_size_in_bytes} bytes/characters\"\n", - " return self.circuit.encrypt_run_decrypt(sha256_preprocess(data))\n", - "\n", - " def getPlainSHA(self, data):\n", - " return sha256(sha256_preprocess(data), self.number_of_rounds)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "SpxY6dScee-k" - }, - "source": [ - "Now we are ready to compile the circuit! Note that **the compilation will take a long time**, so if you are looking to get a test run, you can set the number of rounds to something smaller than 64." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "id": "P0cMOZUGee-k" - }, - "outputs": [], - "source": [ - "# Warning: This will compile the circuit and will take a few minutes\n", - "\n", - "input_size_in_bytes = 150\n", - "running_small_example=True\n", - "\n", - "if running_small_example:\n", - " number_of_rounds = 2\n", - " sha = HomomorphicSHA(input_size_in_bytes, number_of_rounds)\n", - "else:\n", - " sha = HomomorphicSHA(input_size_in_bytes)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "zz1rd7VWee-k" - }, - "source": [ - "And after compilation, we are ready to run the circuit. Remember that the input size has to match what you gave in the previous cell. Our function will check this first to make sure the input is of the correct size. " - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "EkF0UxTcv_cQ", - "outputId": "c4e2c710-02bc-40e2-a921-4a29ac88380b" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "This cell is disabled. It can takes hours. If you want to run this cell, set accept_a_very_long_run=True\n" - ] - } - ], - "source": [ - "# WARNING: This takes a LONG time\n", - "accept_a_very_long_run = False\n", - "if not accept_a_very_long_run:\n", - " print(\"This cell is disabled. It can takes hours. If you want to run this cell, set accept_a_very_long_run=True\")\n", - "else:\n", - " text = (\n", - " b\"Lorem ipsum dolor sit amet, consectetur adipiscing elit. \"\n", - " b\"Curabitur bibendum, urna eu bibendum egestas, neque augue eleifend odio, et sagittis viverra.\"\n", - " )\n", - " input_bytes = np.frombuffer(text, dtype=np.uint8)\n", - " encrypted_evaluation = sha.getSHA(input_bytes)\n", - "\n", - " print(\"Encrypted Evaluation: \", chunks_to_hexarray(encrypted_evaluation))\n", - " print(\" Plain Evaluation: \", chunks_to_hexarray(sha.getPlainSHA(input_bytes)))" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3.10.7 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.7" - }, - "vscode": { - "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/docs/tutorials/see-all-tutorials.md b/docs/tutorials/see-all-tutorials.md index b89bd8c163..66dd00dafb 100644 --- a/docs/tutorials/see-all-tutorials.md +++ b/docs/tutorials/see-all-tutorials.md @@ -9,12 +9,12 @@ #### Code examples on GitHub -* [Key value database](../application-tutorial/key_value_database.ipynb) -* [SHA-256 ](../application-tutorial/sha256.ipynb) -* [Game of Life](../../frontends/concrete-python/examples/game_of_life/game_of_life.md) -* [XOR distance](../../frontends/concrete-python/examples/xor_distance/xor_distance.md) -* [SHA1 with Modules](../../frontends/concrete-python/examples/sha1/sha1.md) -* [Levenshtein distance with Modules](../../frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.md) +* [Key value database](../../frontends/concrete-python/examples/key_value_database/key_value_database.ipynb) +* [SHA-256 ](../../frontends/concrete-python/examples/sha256/sha256.ipynb) +* [Game of Life](../../frontends/concrete-python/examples/game_of_life/README.md) +* [XOR distance](../../frontends/concrete-python/examples/xor_distance/README.md) +* [SHA1 with Modules](../../frontends/concrete-python/examples/sha1/README.md) +* [Levenshtein distance with Modules](../../frontends/concrete-python/examples/levenshtein_distance/README.md) #### Blog tutorials diff --git a/frontends/concrete-python/examples/game_of_life/game_of_life.md b/frontends/concrete-python/examples/game_of_life/README.md similarity index 100% rename from frontends/concrete-python/examples/game_of_life/game_of_life.md rename to frontends/concrete-python/examples/game_of_life/README.md diff --git a/frontends/concrete-python/examples/key-value-database/dynamic-size.py b/frontends/concrete-python/examples/key_value_database/dynamic-size.py similarity index 100% rename from frontends/concrete-python/examples/key-value-database/dynamic-size.py rename to frontends/concrete-python/examples/key_value_database/dynamic-size.py diff --git a/docs/application-tutorial/key_value_database.ipynb b/frontends/concrete-python/examples/key_value_database/key_value_database.ipynb similarity index 93% rename from docs/application-tutorial/key_value_database.ipynb rename to frontends/concrete-python/examples/key_value_database/key_value_database.ipynb index 93cf418f51..7e569dd8af 100644 --- a/docs/application-tutorial/key_value_database.ipynb +++ b/frontends/concrete-python/examples/key_value_database/key_value_database.ipynb @@ -8,7 +8,7 @@ "\n", "This is an interactive tutorial of an Encrypted Key Value Database. The database allows for three operations, **Insert, Replace, and Query**. All the operations are implemented as fully-homomorphic encrypted circuits.\n", "\n", - "In `examples/key-value-database/`, you will find the following files:\n", + "In `frontends/concrete-python/examples/key_value_database/`, you will find the following files:\n", "\n", "- `static-size.py`: This file contains a static size database implementation, meaning that the number of entries is given as a parameter at the beginning.\n", "- `dynamic-size.py`: This file contains a dynamic size database implementation, meaning that the database starts as a zero entry database, and is grown as needed.\n", @@ -62,7 +62,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# The number of entries in the database\n", "NUMBER_OF_ENTRIES = 5\n", "# The number of bits in each chunk\n", @@ -94,7 +93,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Key and Value size must be a multiple of chunk size\n", "assert KEY_SIZE % CHUNK_SIZE == 0\n", "assert VALUE_SIZE % CHUNK_SIZE == 0\n", @@ -123,7 +121,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Indexers for each part of the state\n", "FLAG = 0\n", "KEY = slice(1, 1 + NUMBER_OF_KEY_CHUNKS)\n", @@ -166,13 +163,15 @@ "source": [ "def encode(number: int, width: int) -> np.array:\n", " binary_repr = np.binary_repr(number, width=width)\n", - " blocks = [binary_repr[i:i+CHUNK_SIZE] for i in range(0, len(binary_repr), CHUNK_SIZE)]\n", + " blocks = [binary_repr[i : i + CHUNK_SIZE] for i in range(0, len(binary_repr), CHUNK_SIZE)]\n", " return np.array([int(block, 2) for block in blocks])\n", "\n", + "\n", "# Encode a number with the key size\n", "def encode_key(number: int) -> np.array:\n", " return encode(number, width=KEY_SIZE)\n", "\n", + "\n", "# Encode a number with the value size\n", "def encode_value(number: int) -> np.array:\n", " return encode(number, width=VALUE_SIZE)" @@ -202,7 +201,7 @@ "def decode(encoded_number: np.array) -> int:\n", " result = 0\n", " for i in range(len(encoded_number)):\n", - " result += 2**(CHUNK_SIZE*i) * encoded_number[(len(encoded_number) - i) - 1]\n", + " result += 2 ** (CHUNK_SIZE * i) * encoded_number[(len(encoded_number) - i) - 1]\n", " return result" ] }, @@ -224,10 +223,10 @@ "outputs": [], "source": [ "def keep_selected(value, selected):\n", - " if selected:\n", - " return value\n", - " else:\n", - " return 0" + " if selected:\n", + " return value\n", + " else:\n", + " return 0" ] }, { @@ -269,8 +268,8 @@ "outputs": [], "source": [ "def keep_selected_using_lut(value, selected):\n", - " packed = (2 ** CHUNK_SIZE) * selected + value\n", - " return keep_selected_lut[packed]" + " packed = (2**CHUNK_SIZE) * selected + value\n", + " return keep_selected_lut[packed]" ] }, { @@ -308,7 +307,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Insert a key value pair into the database\n", "# - state: The state of the database\n", "# - key: The key to insert\n", @@ -334,11 +332,11 @@ " # | 1 | 1 | -> Used, skip\n", " packed_flag_and_found = (found * 2) + flags[i]\n", " # Use the packed flag and found bit to determine if the entry is unused\n", - " is_selected = (packed_flag_and_found == 0)\n", + " is_selected = packed_flag_and_found == 0\n", "\n", " # Update the selection array\n", " selection[i] = is_selected\n", - " # Update the found bit, so all entries will be \n", + " # Update the found bit, so all entries will be\n", " # skipped after the first unused entry is found\n", " found += is_selected\n", "\n", @@ -352,12 +350,12 @@ "\n", " # Create a packed selection and key array\n", " # This array is used to update the key of the selected entry\n", - " packed_selection_and_key = (selection * (2 ** CHUNK_SIZE)) + key\n", + " packed_selection_and_key = (selection * (2**CHUNK_SIZE)) + key\n", " key_update = keep_selected_lut[packed_selection_and_key]\n", "\n", " # Create a packed selection and value array\n", " # This array is used to update the value of the selected entry\n", - " packed_selection_and_value = selection * (2 ** CHUNK_SIZE) + value\n", + " packed_selection_and_value = selection * (2**CHUNK_SIZE) + value\n", " value_update = keep_selected_lut[packed_selection_and_value]\n", "\n", " # Update the state update array with the key and value update arrays\n", @@ -391,7 +389,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Replace the value of a key in the database\n", "# If the key is not in the database, nothing happens\n", "# If the key is in the database, the value is replaced\n", @@ -405,8 +402,6 @@ " keys = state[:, KEY]\n", " values = state[:, VALUE]\n", "\n", - " \n", - "\n", " # Create an equal_rows array\n", " # This array is used to select all entries with the given key\n", " # The equal_rows array is created by comparing the keys in the state\n", @@ -415,7 +410,7 @@ " # keys = [[1, 0, 1, 0], [0, 1, 0, 1, 1]]\n", " # key = [1, 0, 1, 0]\n", " # equal_rows = [1, 0]\n", - " equal_rows = (np.sum((keys - key) == 0, axis=1) == NUMBER_OF_KEY_CHUNKS)\n", + " equal_rows = np.sum((keys - key) == 0, axis=1) == NUMBER_OF_KEY_CHUNKS\n", "\n", " # Create a selection array\n", " # This array is used to select the entry to change the value of\n", @@ -424,10 +419,10 @@ " # The reason for combining the equal_rows array with the flags array\n", " # is to make sure that only used entries are selected\n", " selection = (flags * 2 + equal_rows == 3).reshape((-1, 1))\n", - " \n", + "\n", " # Create a packed selection and value array\n", " # This array is used to update the value of the selected entry\n", - " packed_selection_and_value = selection * (2 ** CHUNK_SIZE) + value\n", + " packed_selection_and_value = selection * (2**CHUNK_SIZE) + value\n", " set_value = keep_selected_lut[packed_selection_and_value]\n", "\n", " # Create an inverse selection array\n", @@ -439,7 +434,7 @@ "\n", " # Create a packed inverse selection and value array\n", " # This array is used to keep the value of the entries that are not selected\n", - " packed_inverse_selection_and_values = inverse_selection * (2 ** CHUNK_SIZE) + values\n", + " packed_inverse_selection_and_values = inverse_selection * (2**CHUNK_SIZE) + values\n", " kept_values = keep_selected_lut[packed_inverse_selection_and_values]\n", "\n", " # Update the values of the state with the new values\n", @@ -470,7 +465,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Query the database for a key and return the value\n", "# - state: The state of the database\n", "# - key: The key to query\n", @@ -500,7 +494,7 @@ "\n", " # Create a packed selection and value array\n", " # This array is used to get the value of the selected entry\n", - " packed_selection_and_values = selection * (2 ** CHUNK_SIZE) + values\n", + " packed_selection_and_values = selection * (2**CHUNK_SIZE) + values\n", " value_selection = keep_selected_lut[packed_selection_and_values]\n", "\n", " # Sum the value selection array to get the value\n", @@ -530,7 +524,7 @@ " A key-value database that uses fully homomorphic encryption circuits to store the data.\n", " \"\"\"\n", "\n", - " # The state of the database, it holds all the \n", + " # The state of the database, it holds all the\n", " # keys and values as a table of entries\n", " _state: np.ndarray\n", "\n", @@ -541,15 +535,15 @@ "\n", " # Below is the initialization of the database.\n", "\n", - " # First, we initialize the state, and provide the necessary input sets. \n", - " # In versions later than concrete-numpy.0.9.0, we can use the `direct circuit` \n", - " # functionality to define the bit-widths of encrypted values rather than using \n", - " # `input sets`. Input sets are used to determine the required bit-width of the \n", - " # encrypted values. Hence, we add the largest possible value in the database \n", + " # First, we initialize the state, and provide the necessary input sets.\n", + " # In versions later than concrete-numpy.0.9.0, we can use the `direct circuit`\n", + " # functionality to define the bit-widths of encrypted values rather than using\n", + " # `input sets`. Input sets are used to determine the required bit-width of the\n", + " # encrypted values. Hence, we add the largest possible value in the database\n", " # to the input sets.\n", "\n", - " # Within the initialization phase, we create the required configuration, \n", - " # compilers, circuits, and keys. Circuit and key generation phase is \n", + " # Within the initialization phase, we create the required configuration,\n", + " # compilers, circuits, and keys. Circuit and key generation phase is\n", " # timed and printed in the output.\n", "\n", " def __init__(self):\n", @@ -562,16 +556,16 @@ " # The input set for the query circuit\n", " inputset_binary = [\n", " (\n", - " np.zeros(STATE_SHAPE, dtype=np.int64), # state\n", - " np.ones(NUMBER_OF_KEY_CHUNKS, dtype=np.int64) * (2**CHUNK_SIZE - 1), # key\n", + " np.zeros(STATE_SHAPE, dtype=np.int64), # state\n", + " np.ones(NUMBER_OF_KEY_CHUNKS, dtype=np.int64) * (2**CHUNK_SIZE - 1), # key\n", " )\n", " ]\n", " # The input set for the insert and replace circuits\n", " inputset_ternary = [\n", " (\n", - " np.zeros(STATE_SHAPE, dtype=np.int64), # state\n", - " np.ones(NUMBER_OF_KEY_CHUNKS, dtype=np.int64) * (2**CHUNK_SIZE - 1), # key\n", - " np.ones(NUMBER_OF_VALUE_CHUNKS, dtype=np.int64) * (2**CHUNK_SIZE - 1), # value\n", + " np.zeros(STATE_SHAPE, dtype=np.int64), # state\n", + " np.ones(NUMBER_OF_KEY_CHUNKS, dtype=np.int64) * (2**CHUNK_SIZE - 1), # key\n", + " np.ones(NUMBER_OF_VALUE_CHUNKS, dtype=np.int64) * (2**CHUNK_SIZE - 1), # value\n", " )\n", " ]\n", "\n", @@ -591,18 +585,12 @@ " # - \"encrypted\": The input is encrypted\n", " # - \"plain\": The input is not encrypted\n", " insert_compiler = fhe.Compiler(\n", - " _insert_impl,\n", - " {\"state\": \"encrypted\", \"key\": \"encrypted\", \"value\": \"encrypted\"}\n", + " _insert_impl, {\"state\": \"encrypted\", \"key\": \"encrypted\", \"value\": \"encrypted\"}\n", " )\n", " replace_compiler = fhe.Compiler(\n", - " _replace_impl,\n", - " {\"state\": \"encrypted\", \"key\": \"encrypted\", \"value\": \"encrypted\"}\n", + " _replace_impl, {\"state\": \"encrypted\", \"key\": \"encrypted\", \"value\": \"encrypted\"}\n", " )\n", - " query_compiler = fhe.Compiler(\n", - " _query_impl,\n", - " {\"state\": \"encrypted\", \"key\": \"encrypted\"}\n", - " )\n", - "\n", + " query_compiler = fhe.Compiler(_query_impl, {\"state\": \"encrypted\", \"key\": \"encrypted\"})\n", "\n", " ## Compile the circuits\n", " # The circuits are compiled with the input set and the configuration\n", @@ -659,10 +647,10 @@ " print(f\"(took {end - start:.3f} seconds)\")\n", "\n", " ### The Interface Functions\n", - " \n", - " # The following methods are used to interact with the database. \n", - " # They are used to insert, replace and query the database. \n", - " # The methods are implemented by encrypting the inputs, \n", + "\n", + " # The following methods are used to interact with the database.\n", + " # They are used to insert, replace and query the database.\n", + " # The methods are implemented by encrypting the inputs,\n", " # running the circuit and decrypting the output.\n", "\n", " # Insert a key-value pair into the database\n", @@ -703,16 +691,14 @@ " print()\n", " print(f\"Querying...\")\n", " start = time.time()\n", - " result = self._query_circuit.encrypt_run_decrypt(\n", - " self._state, encode_key(key)\n", - " )\n", + " result = self._query_circuit.encrypt_run_decrypt(self._state, encode_key(key))\n", " end = time.time()\n", " print(f\"(took {end - start:.3f} seconds)\")\n", "\n", " if result[0] == 0:\n", " return None\n", "\n", - " return decode(result[1:])\n" + " return decode(result[1:])" ] }, { @@ -992,10 +978,10 @@ "source": [ "# Define lower/upper bounds for the key\n", "minimum_key = 1\n", - "maximum_key = 2 ** KEY_SIZE - 1\n", + "maximum_key = 2**KEY_SIZE - 1\n", "# Define lower/upper bounds for the value\n", "minimum_value = 1\n", - "maximum_value = 2 ** VALUE_SIZE - 1" + "maximum_value = 2**VALUE_SIZE - 1" ] }, { @@ -1076,6 +1062,9 @@ "metadata": { "execution": { "timeout": 10800 + }, + "language_info": { + "name": "python" } }, "nbformat": 4, diff --git a/frontends/concrete-python/examples/key-value-database/static-size.py b/frontends/concrete-python/examples/key_value_database/static-size.py similarity index 100% rename from frontends/concrete-python/examples/key-value-database/static-size.py rename to frontends/concrete-python/examples/key_value_database/static-size.py diff --git a/frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.md b/frontends/concrete-python/examples/levenshtein_distance/README.md similarity index 100% rename from frontends/concrete-python/examples/levenshtein_distance/levenshtein_distance.md rename to frontends/concrete-python/examples/levenshtein_distance/README.md diff --git a/frontends/concrete-python/examples/sha1/sha1.md b/frontends/concrete-python/examples/sha1/README.md similarity index 100% rename from frontends/concrete-python/examples/sha1/sha1.md rename to frontends/concrete-python/examples/sha1/README.md diff --git a/frontends/concrete-python/examples/sha256/sha256.ipynb b/frontends/concrete-python/examples/sha256/sha256.ipynb new file mode 100644 index 0000000000..a873d787cb --- /dev/null +++ b/frontends/concrete-python/examples/sha256/sha256.ipynb @@ -0,0 +1,844 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "_FTzVxUkjQno" + }, + "source": [ + "# SHA-256 Implementation Using Concrete\n", + "\n", + "In this tutorial, we will explore the implementation of SHA-256, a widely used hashing algorithm, using concrete-python. Details about the algorithm can be found [here](https://en.wikipedia.org/wiki/SHA-2).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zXozpJvmcBH1", + "outputId": "79dfc00b-10cc-4ffd-d4b9-a10f18d8d01e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: concrete-python in /usr/local/lib/python3.10/dist-packages (1.0.0)\n", + "Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.10/dist-packages (from concrete-python) (1.24.3)\n", + "Requirement already satisfied: scipy>=1.10 in /usr/local/lib/python3.10/dist-packages (from concrete-python) (1.10.1)\n", + "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.10/dist-packages (from concrete-python) (2.0.0+cu118)\n", + "Requirement already satisfied: networkx>=2.6 in /usr/local/lib/python3.10/dist-packages (from concrete-python) (3.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (4.5.0)\n", + "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (2.0.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (1.11.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (3.1.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->concrete-python) (3.12.0)\n", + "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13->concrete-python) (3.25.2)\n", + "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13->concrete-python) (16.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13->concrete-python) (2.1.2)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13->concrete-python) (1.3.0)\n" + ] + } + ], + "source": [ + "# Uncomment this line to install dependency\n", + "# ! pip install concrete-python\n", + "\n", + "# Required libraries\n", + "from concrete import fhe\n", + "import numpy as np" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "oCfjYazikbm_" + }, + "source": [ + "## Data Representation\n", + "As mentioned in the wiki page, all variables are $32$-bit unsigned integers. Additions should be calculated modulo $2^{32}$.\n", + "\n", + "While addition of 32-bit numbers are possible in the library, any other operations such modulizing, rotations, and bitwise operations are currently not possible. These operations require a lookup table with 32-bit inputs, but as of writing this tutorial, concrete-python supports up to 16-bit lookup tables. Higher precision lookup tables is still a research challenge in the homomorphic world and such a table would be dificult to compile and store at this moment.\n", + "\n", + "Thus, we need to break all the variables to **chunks** and work at the chunk level. Throughtout the code, *WIDTH* refers to the bitwidth of a chunk, and *NUM_CHUNKS* shows the number of chunks we need to represent a 32-bit data. These parameters are set at the begining of the code. We vary these parameters to see the impact of the *WIDTH* on the performance of the compiler and the circuit.\n", + "\n", + "![chunks.jpg]()\n", + "\n", + "Figure 1: Shows a break down of 32 bit of data into 4 chunks of 8 bit. This is not the only way to chunk the input." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "yaz8cNzjQ1UW" + }, + "outputs": [], + "source": [ + "# Bitwidth of each chunk and number of chunks in each 32-bit number.\n", + "WIDTH, NUM_CHUNKS = 4, 8\n", + "\n", + "## Some other valid parameter sets\n", + "# WIDTH, NUM_CHUNKS= 8, 4\n", + "# WIDTH, NUM_CHUNKS= 2, 16\n", + "\n", + "assert WIDTH * NUM_CHUNKS == 32\n", + "\n", + "\n", + "def break_down_data(data, data_size):\n", + " all_chunks = [\n", + " [(x >> i * WIDTH) % (2**WIDTH) for i in range(data_size // WIDTH)[::-1]] for x in data\n", + " ]\n", + " return all_chunks\n", + "\n", + "\n", + "def reshape_data(data):\n", + " return np.array(data).reshape(-1, NUM_CHUNKS)\n", + "\n", + "\n", + "def chunks_to_uint32(chunks):\n", + " return int(sum([2 ** ((NUM_CHUNKS - 1 - i) * WIDTH) * x for i, x in enumerate(chunks)]))\n", + "\n", + "\n", + "def chunks_to_hexarray(chunks):\n", + " hexes = [hex(chunks_to_uint32(word))[2:] for word in chunks]\n", + " hexes = [\n", + " \"0\" * (8 - len(y)) + y for y in hexes\n", + " ] # Appending leadning zero to the ones that are less than 8 characters TODO: write better\n", + " result = \"\".join(hexes)\n", + " return result" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "u7pA-B3As9u4" + }, + "source": [ + "### Creating Chunks\n", + "There are two list of constants in the algorithm, K and H. Before executing the algorithm, we need to break them to chunks using `split_to_chunks` function.\n", + "\n", + "\n", + "The input of the algorithm is arbitrary bytes. We might need to break each byte to smaller chunks based on the value of *WIDTH* after padding the data as per instructed by the algorithm. `break_down_data` function returns a numpy array of shape (48,NUM_CHUNKS)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "b8rlvVf42CIa" + }, + "source": [ + "## Operations\n", + "Now that the data is stores as chunks, we must modify all operations we need to work at the level of chunks. In this section we explain how we implemented the required operations. The main three category of operations that we need to implement SHA-256 are:\n", + "\n", + "* Bitwise operations (AND, OR, XOR, NEGATE)\n", + "* Shifts and Rotations\n", + "* Modular Addition " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "zlM1RN-NnjDn" + }, + "source": [ + "### Bitwise Operations\n", + "Bitwise operations are easily implemented in concrete-numpy. A bitwise operation over a 32-bit number is equivalent to the same operation over the chunks." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "CxCwJOao2KCt" + }, + "source": [ + "### Rotation and Shifts\n", + "To understand how rotations work, consider a small example with 4 chunks of width 4, representing a 16-bit number, as shown in Figure 1. Most significant bits are located at index 0. So a 16-bit number will be `[[chunk_0], [chunk_1], [chunk_2], [chunk_3]]` with WIDTH=4. There are two possible scenario for rotations:\n", + "\n", + "1. Any rotation by a multiple of WIDTH (in this case, 4) will result in rotating the array of chunks. For example, right rotate(4) will be `[[chunk_3], [chunk_0], [chunk_1], [chunk_2]]`.\n", + "\n", + "2. For rotations less than WIDTH, for example `y`, we break every chunk into two parts of bitlength, `WIDTH-y` and `y`. We need to add the low `y`-bits of each chunk with the high `WIDTH-y` bits of the next chunk. Figure 2 illustrated this process. We leverage two lookup tables to extract the two segments of each chunk.\n", + "\n", + "\n", + "3. Rotations by other amounts are broken into the two steps described above.\n", + "\n", + "![Rotation.jpg]()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "575ogsJhFDIo" + }, + "source": [ + "### Shift\n", + "The shift operation is the same as rotation, but we prepend the encrypted scalar zero when we move the bits to the right." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "TRAFRZime-Jv" + }, + "outputs": [], + "source": [ + "def right_rotate_list_of_chunks(list_to_rotate, amount):\n", + " return np.concatenate((list_to_rotate[-amount:], list_to_rotate[:-amount]))\n", + "\n", + "\n", + "def right_shift_list_of_chunks(list_to_rotate, amount):\n", + " return np.concatenate(([0] * list_to_rotate[-amount:].shape[0], list_to_rotate[:-amount]))\n", + "\n", + "\n", + "def left_shift_list_of_chunks(list_to_rotate, amount):\n", + " return np.concatenate((list_to_rotate[amount:], [0] * list_to_rotate[:amount].shape[0]))\n", + "\n", + "\n", + "def rotate_less_than_width(chunks, shift):\n", + " raised_low_bits = fhe.univariate(lambda x: (x % 2**shift) << (WIDTH - shift))(chunks)\n", + " shifted_raised_low_bits = right_rotate_list_of_chunks(raised_low_bits, 1)\n", + "\n", + " high_bits = chunks >> shift\n", + " return shifted_raised_low_bits + high_bits\n", + "\n", + "\n", + "def right_rotate(chunks, rotate_amount):\n", + " x = rotate_amount // WIDTH\n", + " y = rotate_amount % WIDTH\n", + " if x != 0:\n", + " rotated_chunks = right_rotate_list_of_chunks(chunks, x)\n", + " else:\n", + " rotated_chunks = chunks\n", + " if y != 0:\n", + " rotated = rotate_less_than_width(rotated_chunks, y)\n", + " else:\n", + " rotated = rotated_chunks\n", + "\n", + " return rotated\n", + "\n", + "\n", + "def right_shift(chunks, shift_amount):\n", + " x = shift_amount // WIDTH\n", + " y = shift_amount % WIDTH\n", + " if x != 0:\n", + " shifted_chunks = right_shift_list_of_chunks(chunks, x)\n", + " else:\n", + " shifted_chunks = chunks\n", + " if y != 0:\n", + " # shift within chunks\n", + " raised_low_bits = fhe.univariate(lambda x: (x % 2**y) << (WIDTH - y))(shifted_chunks)\n", + " shifted_raised_low_bits = right_shift_list_of_chunks(raised_low_bits, 1)\n", + " high_bits = shifted_chunks >> y\n", + " result = shifted_raised_low_bits + high_bits\n", + " else:\n", + " result = shifted_chunks\n", + " return result" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "SKg8mKFOPXSV" + }, + "source": [ + "### Modular 32-bit Addition\n", + "Modular 32-bit addition is frequently used in SHA256. While Concrete supports additions of 32-bit numbers, modulizing the result requires a lookup table which is too large for Concrete. Hence, the addition must be done over chunks.\n", + "\n", + "Below is the function to add two 32-bit numbers mod $2^{32}$." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "EJEPvp2wQms9" + }, + "outputs": [], + "source": [ + "def add_two_32_bits(a, b):\n", + " added = np.sum([a, b], axis=0)\n", + "\n", + " for i in range(NUM_CHUNKS):\n", + " results = added % (2**WIDTH)\n", + " if i < NUM_CHUNKS - 1:\n", + " carries = added >> WIDTH\n", + " added = left_shift_list_of_chunks(carries, 1) + results\n", + "\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "Uo6o_QMQn_fw" + }, + "outputs": [], + "source": [ + "# Testing the addition function, adding four 32-bit numbers\n", + "test_inputs = np.random.randint(0, 2**32, size=(2,))\n", + "input_chunks = break_down_data(test_inputs, 32)\n", + "\n", + "assert chunks_to_uint32(add_two_32_bits(input_chunks[0], input_chunks[1])) == np.sum(\n", + " test_inputs\n", + ") % (2**32)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "IOr8DTRJRYTl" + }, + "source": [ + "Adding two 4-bit numbers results in a 5-bit number. We then use two lookup tables:\n", + "\n", + "* `extract_carry` which extracts the carry of adding two chunks\n", + "* `extract_result` which extracts the 4-bit chunk which results from adding two chunks (without the carry)\n", + "\n", + "Each carry must now be added to the chunk next chunk and this process is repeated for as many chunks as there are. The figure below illustrates this process.\n", + "\n", + "![add-chunks.png]()\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "4-dQk0wbtPOe" + }, + "source": [ + "The benefit of this addition algorithm is that it can be extended to the case where more two 32-bit numbers are added. The only difference is that the carry from the first iteration of the loop can be larger than 1. Specifically, by adding $k$ 4-bit numbers, the carry can be as big as $\\log_2 k$. For correctness, $\\log_2 k$ must be less than 4 or $k<16$.\n", + "\n", + "In our implementation of SHA-256, we only have two input and four input additions, so we only implement those.\n", + "\n", + "For four input addition, he first iteration of the loop, we use a different lookup table that extract a 2-bit carry and rest of the chunk. The rest of the algorithm does not change." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "obO8wHRbXHfj" + }, + "outputs": [], + "source": [ + "def add_four_32_bits(a, b, c, d):\n", + " added = np.sum([a, b, c, d], axis=0)\n", + "\n", + " # First iteration of the loop is seperated\n", + " carries = added >> WIDTH\n", + " results = added % (2**WIDTH)\n", + " shifted_carries = left_shift_list_of_chunks(carries, 1)\n", + " added = shifted_carries + results\n", + "\n", + " for i in range(1, NUM_CHUNKS):\n", + " results = added % (2**WIDTH)\n", + "\n", + " # In the last iteration, carries need not be calculated\n", + " if i != NUM_CHUNKS - 1:\n", + " carries = added >> WIDTH\n", + " shifted_carries = left_shift_list_of_chunks(carries, 1)\n", + " added = shifted_carries + results\n", + "\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "zcwnDdPFdqE1" + }, + "outputs": [], + "source": [ + "# Testing the addition function, adding four 32-bit numbers\n", + "\n", + "for _ in range(1000):\n", + " test_inputs = np.random.randint(0, 2**32, size=(4,))\n", + " input_chunks = break_down_data(test_inputs, 32)\n", + "\n", + " assert chunks_to_uint32(\n", + " add_four_32_bits(input_chunks[0], input_chunks[1], input_chunks[2], input_chunks[3])\n", + " ) == np.sum(test_inputs) % (2**32)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "1g6eEdhGoJl9" + }, + "source": [ + "## Operations for SHA-256\n", + "\n", + "Using the basic operations from the previous section, we can now implement all the necessary functions for SHA256" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "uo7HfO1DpVFK" + }, + "outputs": [], + "source": [ + "# Used in the expansion\n", + "\n", + "\n", + "def s0(w):\n", + " return right_rotate(w, 7) ^ right_rotate(w, 18) ^ right_shift(w, 3)\n", + "\n", + "\n", + "def s1(w):\n", + " return right_rotate(w, 17) ^ right_rotate(w, 19) ^ right_shift(w, 10)\n", + "\n", + "\n", + "# Used in main loop\n", + "\n", + "\n", + "def S0(a_word):\n", + " return right_rotate(a_word, 2) ^ right_rotate(a_word, 13) ^ right_rotate(a_word, 22)\n", + "\n", + "\n", + "def S1(e_word):\n", + " return right_rotate(e_word, 6) ^ right_rotate(e_word, 11) ^ right_rotate(e_word, 25)\n", + "\n", + "\n", + "def Ch(e_word, f_word, g_word):\n", + " return (e_word & f_word) ^ ((2**WIDTH - 1 - e_word) & g_word)\n", + "\n", + "\n", + "def Maj(a_word, b_word, c_word):\n", + " return (a_word & b_word) ^ (a_word & c_word) ^ (b_word & c_word)\n", + "\n", + "\n", + "def main_loop(args, w_i_plus_k_i):\n", + " a, b, c, d, e, f, g, h = args\n", + " temp1 = add_four_32_bits(h, S1(e), Ch(e, f, g), w_i_plus_k_i)\n", + " temp2 = add_two_32_bits(S0(a), Maj(a, b, c))\n", + " new_a = add_two_32_bits(temp1, temp2)\n", + " new_e = add_two_32_bits(d, temp1)\n", + " return np.array([new_a, a, b, c, new_e, e, f, g])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "biM997KmvwUL" + }, + "source": [ + "We also need a function to pad the input as the first step of SHA256." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "fZ-3sEH5vopA" + }, + "source": [ + "Moreover, we need a function to parse the input given to the program. The input is given as bytes, but the chunks might be smaller. We extract smaller chunks from bytes using lookup tables." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "L4leg-z_skkU" + }, + "source": [ + "## Bringing it all together\n", + "Using all the components from the above, we can implement SHA256 as shown below." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "tmSfvdpyrwUx" + }, + "outputs": [], + "source": [ + "K = [\n", + " 0x428A2F98,\n", + " 0x71374491,\n", + " 0xB5C0FBCF,\n", + " 0xE9B5DBA5,\n", + " 0x3956C25B,\n", + " 0x59F111F1,\n", + " 0x923F82A4,\n", + " 0xAB1C5ED5,\n", + " 0xD807AA98,\n", + " 0x12835B01,\n", + " 0x243185BE,\n", + " 0x550C7DC3,\n", + " 0x72BE5D74,\n", + " 0x80DEB1FE,\n", + " 0x9BDC06A7,\n", + " 0xC19BF174,\n", + " 0xE49B69C1,\n", + " 0xEFBE4786,\n", + " 0x0FC19DC6,\n", + " 0x240CA1CC,\n", + " 0x2DE92C6F,\n", + " 0x4A7484AA,\n", + " 0x5CB0A9DC,\n", + " 0x76F988DA,\n", + " 0x983E5152,\n", + " 0xA831C66D,\n", + " 0xB00327C8,\n", + " 0xBF597FC7,\n", + " 0xC6E00BF3,\n", + " 0xD5A79147,\n", + " 0x06CA6351,\n", + " 0x14292967,\n", + " 0x27B70A85,\n", + " 0x2E1B2138,\n", + " 0x4D2C6DFC,\n", + " 0x53380D13,\n", + " 0x650A7354,\n", + " 0x766A0ABB,\n", + " 0x81C2C92E,\n", + " 0x92722C85,\n", + " 0xA2BFE8A1,\n", + " 0xA81A664B,\n", + " 0xC24B8B70,\n", + " 0xC76C51A3,\n", + " 0xD192E819,\n", + " 0xD6990624,\n", + " 0xF40E3585,\n", + " 0x106AA070,\n", + " 0x19A4C116,\n", + " 0x1E376C08,\n", + " 0x2748774C,\n", + " 0x34B0BCB5,\n", + " 0x391C0CB3,\n", + " 0x4ED8AA4A,\n", + " 0x5B9CCA4F,\n", + " 0x682E6FF3,\n", + " 0x748F82EE,\n", + " 0x78A5636F,\n", + " 0x84C87814,\n", + " 0x8CC70208,\n", + " 0x90BEFFFA,\n", + " 0xA4506CEB,\n", + " 0xBEF9A3F7,\n", + " 0xC67178F2,\n", + "]\n", + "H = [0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "NHGCiC-Gk_tw" + }, + "outputs": [], + "source": [ + "k_in = reshape_data(break_down_data(K, 32))\n", + "h_in = reshape_data(break_down_data(H, 32))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "yTiMkmBsHmKy" + }, + "outputs": [], + "source": [ + "def uint64_to_bin(uint64: int):\n", + " return \"\".join([str(uint64 >> i & 1) for i in range(63, -1, -1)])\n", + "\n", + "\n", + "def sha256_preprocess(text):\n", + " \"\"\"\n", + " Takes a message of arbitrary length and returns a message\n", + " of length that is a multiple of 512 bits, with the original message padded\n", + " with a 1 bit, followed by 0 bits, followed by the original message length\n", + " in bits\n", + " \"\"\"\n", + " data = text\n", + " # convert to uint4 and group into 32 bit words (8 uint4s)\n", + " # #log (\"data is:\", data, data.shape)\n", + " message_len = data.shape[0] * 8 # denoted as 'l' in spec\n", + " # find padding length 'k'\n", + " k = (((448 - 1 - message_len) % 512) + 512) % 512\n", + " # #log (\"k is:\", k)\n", + " zero_pad_width_in_bits = k\n", + " padstring = \"1\" + \"0\" * zero_pad_width_in_bits + str(uint64_to_bin(message_len))\n", + " # log (\"padstring size:\", len(padstring))\n", + " # log (\"padstring is:\", padstring)\n", + "\n", + " total_size = len(padstring) + message_len\n", + " # log (\"total size:\", total_size)\n", + " assert total_size % 512 == 0\n", + "\n", + " pad = np.array(\n", + " [int(padstring[i : i + 8], 2) for i in range(0, len(padstring), 8)], dtype=np.uint8\n", + " )\n", + " padded = np.concatenate((data, pad))\n", + " words = break_down_data(padded, 8)\n", + " chunks = reshape_data(words)\n", + " return chunks" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "3ox6Zs-ysoLr" + }, + "outputs": [], + "source": [ + "# Number of rounds must be 64 to have correct SHA256\n", + "# If looking to get a faster run, reduce the number of rounds (but it will not be correct)\n", + "\n", + "\n", + "def sha256(data, number_of_rounds=64):\n", + " h_chunks = fhe.zeros((len(h_in), NUM_CHUNKS))\n", + " k_chunks = fhe.zeros((len(k_in), NUM_CHUNKS))\n", + " h_chunks += h_in\n", + " k_chunks += k_in\n", + "\n", + " num_of_iters = data.shape[0] * 32 // 512\n", + " for chunk_iter in range(0, num_of_iters):\n", + "\n", + " # Initializing the variables\n", + " chunk = data[chunk_iter * 16 : (chunk_iter + 1) * 16]\n", + " w = [None for _ in range(number_of_rounds)]\n", + " # Starting the main loop and expansion\n", + " working_vars = h_chunks\n", + " for j in range(0, number_of_rounds):\n", + " if j < 16:\n", + " w[j] = chunk[j]\n", + " else:\n", + " w[j] = add_four_32_bits(w[j - 16], s0(w[j - 15]), w[j - 7], s1(w[j - 2]))\n", + " w_i_k_i = add_two_32_bits(w[j], k_chunks[j])\n", + " working_vars = main_loop(working_vars, w_i_k_i)\n", + "\n", + " # Accumulating the results\n", + " for j in range(8):\n", + " h_chunks[j] = add_two_32_bits(h_chunks[j], working_vars[j])\n", + " return h_chunks" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "w89rhSOh4In2" + }, + "source": [ + "We can test the correctness of this function as below (this is not in encrypted form yet)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "006LZp7c0yBA", + "outputId": "31588127-23e9-4b49-e481-d14842e336e7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " SHA256: a412c46b0be134c593b0ad520d4a4c4e1d8aecca799be0be2c4d233ccf455cb7\n", + "Our SHA256: a412c46b0be134c593b0ad520d4a4c4e1d8aecca799be0be2c4d233ccf455cb7\n", + "Match: True\n" + ] + } + ], + "source": [ + "import hashlib\n", + "\n", + "text = (\n", + " b\"Lorem ipsum dolor sit amet, consectetur adipiscing elit. \"\n", + " b\"Curabitur bibendum, urna eu bibendum egestas, neque augue eleifend odio, et sagittis viverra. and more than 150\"\n", + ")\n", + "\n", + "result = sha256(sha256_preprocess(np.frombuffer(text, dtype=np.uint8)))\n", + "\n", + "m = hashlib.sha256()\n", + "m.update(text)\n", + "\n", + "print(\" SHA256:\", m.hexdigest())\n", + "print(\"Our SHA256:\", chunks_to_hexarray(result))\n", + "print(\"Match:\", chunks_to_hexarray(result) == m.hexdigest())" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "1uHN9GXgla_z" + }, + "outputs": [], + "source": [ + "class HomomorphicSHA:\n", + " circuit: fhe.Circuit\n", + "\n", + " def __init__(self, input_size_in_bytes=150, number_of_rounds=64) -> None:\n", + " self.input_size_in_bytes = input_size_in_bytes\n", + " assert 0 <= number_of_rounds <= 64, \"Number of rounds must be betweem zero and 64\"\n", + " self.number_of_rounds = number_of_rounds\n", + " inputset = [\n", + " sha256_preprocess(np.random.randint(0, 2**8, size=(input_size_in_bytes,)))\n", + " for _ in range(100)\n", + " ]\n", + " # Compilation of the circuit should take a few minutes\n", + " compiler = fhe.Compiler(\n", + " lambda data: sha256(data, self.number_of_rounds), {\"data\": \"encrypted\"}\n", + " )\n", + " self.circuit = compiler.compile(\n", + " inputset=inputset,\n", + " configuration=fhe.Configuration(\n", + " enable_unsafe_features=True,\n", + " use_insecure_key_cache=True,\n", + " insecure_key_cache_location=\".keys\",\n", + " dataflow_parallelize=True,\n", + " ),\n", + " verbose=False,\n", + " )\n", + "\n", + " def getSHA(self, data):\n", + " assert (\n", + " len(data) == self.input_size_in_bytes\n", + " ), f\"Input size is not correct, should be {self.input_size_in_bytes} bytes/characters\"\n", + " return self.circuit.encrypt_run_decrypt(sha256_preprocess(data))\n", + "\n", + " def getPlainSHA(self, data):\n", + " return sha256(sha256_preprocess(data), self.number_of_rounds)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "SpxY6dScee-k" + }, + "source": [ + "Now we are ready to compile the circuit! Note that **the compilation will take a long time**, so if you are looking to get a test run, you can set the number of rounds to something smaller than 64." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "P0cMOZUGee-k" + }, + "outputs": [], + "source": [ + "# Warning: This will compile the circuit and will take a few minutes\n", + "\n", + "input_size_in_bytes = 150\n", + "running_small_example = True\n", + "\n", + "if running_small_example:\n", + " number_of_rounds = 2\n", + " sha = HomomorphicSHA(input_size_in_bytes, number_of_rounds)\n", + "else:\n", + " sha = HomomorphicSHA(input_size_in_bytes)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "zz1rd7VWee-k" + }, + "source": [ + "And after compilation, we are ready to run the circuit. Remember that the input size has to match what you gave in the previous cell. Our function will check this first to make sure the input is of the correct size. " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EkF0UxTcv_cQ", + "outputId": "c4e2c710-02bc-40e2-a921-4a29ac88380b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This cell is disabled. It can takes hours. If you want to run this cell, set accept_a_very_long_run=True\n" + ] + } + ], + "source": [ + "# WARNING: This takes a LONG time\n", + "accept_a_very_long_run = False\n", + "if not accept_a_very_long_run:\n", + " print(\n", + " \"This cell is disabled. It can takes hours. If you want to run this cell, set accept_a_very_long_run=True\"\n", + " )\n", + "else:\n", + " text = (\n", + " b\"Lorem ipsum dolor sit amet, consectetur adipiscing elit. \"\n", + " b\"Curabitur bibendum, urna eu bibendum egestas, neque augue eleifend odio, et sagittis viverra.\"\n", + " )\n", + " input_bytes = np.frombuffer(text, dtype=np.uint8)\n", + " encrypted_evaluation = sha.getSHA(input_bytes)\n", + "\n", + " print(\"Encrypted Evaluation: \", chunks_to_hexarray(encrypted_evaluation))\n", + " print(\" Plain Evaluation: \", chunks_to_hexarray(sha.getPlainSHA(input_bytes)))" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3.10.7 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.7" + }, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/frontends/concrete-python/examples/xor_distance/xor_distance.md b/frontends/concrete-python/examples/xor_distance/README.md similarity index 100% rename from frontends/concrete-python/examples/xor_distance/xor_distance.md rename to frontends/concrete-python/examples/xor_distance/README.md