From c5b54c69b0972db7ef8f91107e6953e022885b02 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Wed, 6 Nov 2024 09:11:48 -0800 Subject: [PATCH 01/23] Masking files - wip --- src/abr_libs/config/compile.yml | 7 +- .../rtl/abr_masked_N_bit_Arith_adder.sv | 71 +++++++ .../rtl/abr_masked_N_bit_mult_two_share.sv | 91 +++++++++ src/abr_libs/tb/abr_masked_N_bit_mult_tb.sv | 8 +- .../tb/abr_masked_N_bit_mult_two_share_tb.sv | 193 ++++++++++++++++++ src/ntt_top/Model/maksed_gadgets.py | 19 +- src/ntt_top/config/compile.yml | 5 +- src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv | 174 ++++++++++++++++ src/ntt_top/rtl/ntt_masked_BFU_mult.sv | 111 ++++++++++ src/ntt_top/rtl/ntt_masked_gs_butterfly.sv | 71 +++++++ src/ntt_top/tb/ntt_top_tb.sv | 88 ++++++-- 11 files changed, 804 insertions(+), 34 deletions(-) create mode 100644 src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv create mode 100644 src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv create mode 100644 src/abr_libs/tb/abr_masked_N_bit_mult_two_share_tb.sv create mode 100644 src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv create mode 100644 src/ntt_top/rtl/ntt_masked_BFU_mult.sv create mode 100644 src/ntt_top/rtl/ntt_masked_gs_butterfly.sv diff --git a/src/abr_libs/config/compile.yml b/src/abr_libs/config/compile.yml index f7a3b2c..75a65cf 100644 --- a/src/abr_libs/config/compile.yml +++ b/src/abr_libs/config/compile.yml @@ -26,6 +26,7 @@ targets: - $COMPILE_ROOT/rtl/abr_masked_N_bit_Boolean_adder.sv - $COMPILE_ROOT/rtl/abr_masked_B2A_conv.sv - $COMPILE_ROOT/rtl/abr_masked_N_bit_mult.sv + - $COMPILE_ROOT/rtl/abr_masked_N_bit_mult_two_share.sv - $COMPILE_ROOT/rtl/abr_adder.sv - $COMPILE_ROOT/rtl/abr_add_sub_mod.sv rtl: @@ -48,8 +49,10 @@ targets: - $COMPILE_ROOT/rtl/abr_masked_full_adder.sv - $COMPILE_ROOT/rtl/abr_masked_A2B_conv.sv - $COMPILE_ROOT/rtl/abr_masked_N_bit_Boolean_adder.sv + - $COMPILE_ROOT/rtl/abr_masked_N_bit_Arith_adder.sv - $COMPILE_ROOT/rtl/abr_masked_B2A_conv.sv - $COMPILE_ROOT/rtl/abr_masked_N_bit_mult.sv + - $COMPILE_ROOT/rtl/abr_masked_N_bit_mult_two_share.sv - $COMPILE_ROOT/rtl/abr_adder.sv - $COMPILE_ROOT/rtl/abr_add_sub_mod.sv --- @@ -65,6 +68,7 @@ targets: - $COMPILE_ROOT/rtl/abr_masked_N_bit_Boolean_adder.sv - $COMPILE_ROOT/rtl/abr_masked_B2A_conv.sv - $COMPILE_ROOT/rtl/abr_masked_N_bit_mult.sv + - $COMPILE_ROOT/rtl/abr_masked_N_bit_mult_two_share.sv tb: directories: [$COMPILE_ROOT/tb] files: @@ -72,8 +76,9 @@ targets: - $COMPILE_ROOT/tb/abr_masked_N_bit_Boolean_adder_tb.sv - $COMPILE_ROOT/tb/abr_masked_B2A_conv_tb.sv - $COMPILE_ROOT/tb/abr_masked_N_bit_mult_tb.sv + - $COMPILE_ROOT/tb/abr_masked_N_bit_mult_two_share_tb.sv - tops: [abr_masked_N_bit_mult_tb] + tops: [abr_masked_N_bit_mult_two_share_tb] --- provides: [mldsa_uvm_lib] schema_version: 2.4.0 diff --git a/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv b/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv new file mode 100644 index 0000000..15e1552 --- /dev/null +++ b/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// Module: abr_masked_N_bit_Arith_adder +// Description: This module performs addition of two arithmetic shared inputs, producing an arithmetic shared output. +// +// +// Functionality: +// - It takes 1 cycle latency (due to output flop) +// - It does not require fresh randomness for every execution since inputs are already split +// +//====================================================================== + + module abr_masked_N_bit_Arith_adder #( + parameter WIDTH = 8 // Default width is 8 bits +)( + input wire clk, // Clock signal + input wire rst_n, // Active low reset signal + input wire zeroize, // Zeroize signal + input wire [1:0][WIDTH-1:0] x, // WIDTH-bit input operand x + input wire [1:0][WIDTH-1:0] y, // WIDTH-bit input operand y + + output logic [1:0][WIDTH-1:0] s +); + + logic [1:0][WIDTH-1:0] add_res; + logic [1:0] s_reg [WIDTH-1:0]; + + always_comb begin + add_res[0] = x[0] + y[0]; + add_res[1] = x[1] + y[1]; + end + + // Final output assignment + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + // for (int i = 0; i < WIDTH; i++) begin + // s[i] <= 2'b0; + // end + s <= 'h0; + end + else if (zeroize) begin + // for (int i = 0; i < WIDTH; i++) begin + // s[i] <= 2'b0; + // end + s <= 'h0; + end + else begin + // for (int i = 0; i < WIDTH; i++) begin + // s[i][0] <= add_res[0][i]; // Pass-through without masking + // s[i][1] <= add_res[1][i]; // Pass-through without masking + // end + s <= add_res; + end + end + + +endmodule : abr_masked_N_bit_Arith_adder diff --git a/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv b/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv new file mode 100644 index 0000000..02950fe --- /dev/null +++ b/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// Module: abr_masked_N_bit_mult_two_share +// Description: This module implements a masked N-bit multiplier with two-share masking. +// The masking technique enhances security by using a random bit to obscure +// intermediate values, reducing vulnerability to side-channel attacks. +// +// Functionality: +// - The module calculates the multiplication operation between WIDTH-bit inputs x and y. +// - Intermediate results are masked since two shares are masked. +// - Final output is obtained by combining the reshared and masked intermediate results. +// - It requires fresh randomness. +// - This design assumes that both x and y are secret, although y input from top level is usually public +// - It has one cycle latency and can accept a new input set at every clock. +// +//====================================================================== + + module abr_masked_N_bit_mult_two_share + #(parameter WIDTH = 8) // Parameter to define the width of the operands + ( + input wire clk, // Clock signal + input wire rst_n, // Active low reset signal + input wire zeroize, // Zeroize signal + input wire [WIDTH-1:0] random, // Intermediate randomness + input wire [1:0][WIDTH-1:0] x, // WIDTH-bit arithmetic shares operand x + input wire [1:0][WIDTH-1:0] y, // WIDTH-bit arithmetic shares operand y + output logic [1:0] z [WIDTH-1:0] // WIDTH-bit arithmetic shares output z + ); + + // Intermediate calculation logic for multiplication operations + logic [WIDTH-1:0] calculation [3:0]; + logic [WIDTH-1:0] calculation_rand [1:0]; + logic [WIDTH-1:0] final_res [1:0]; + logic [WIDTH-1:0] x0, x1, y0, y1; + + // Format organization stage + always_comb begin + // x0 = x[0]; + // x1 = x[1]; + // y0 = y[0]; + // y1 = y[1]; + for (int i = 0; i < WIDTH; i++) begin + x0[i] = x[i][0]; + x1[i] = x[i][1]; + y0[i] = y[i][0]; + y1[i] = y[i][1]; + end + end + + // Calculation stage + always_comb begin + calculation[0] = x0 * y0; // Multiplication of the first share x and first share y + calculation[1] = x1 * y0; // Multiplication of the second share x and first share y + calculation[2] = x0 * y1; // Multiplication of the first share x and second share y + calculation[3] = x1 * y1; // Multiplication of the second share x and second share y + + calculation_rand[0] = calculation[2] + random; + calculation_rand[1] = calculation[1] - random; + + final_res[0] = calculation[0] + calculation_rand[0]; + final_res[1] = calculation[3] + calculation_rand[1]; + end + + // Final output assignment + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + z <= 'h0; + end + else if (zeroize) begin + z <= 'h0; + end + else begin + z <= final_res; + end + end + + endmodule: abr_masked_N_bit_mult_two_share diff --git a/src/abr_libs/tb/abr_masked_N_bit_mult_tb.sv b/src/abr_libs/tb/abr_masked_N_bit_mult_tb.sv index e486b3f..4334737 100644 --- a/src/abr_libs/tb/abr_masked_N_bit_mult_tb.sv +++ b/src/abr_libs/tb/abr_masked_N_bit_mult_tb.sv @@ -32,7 +32,7 @@ module abr_masked_N_bit_mult_tb; logic [1:0] z [WIDTH-1:0]; logic [WIDTH-1:0] expected_z; logic [WIDTH-1:0] actual_z; - logic [HALF_WIDTH-1:0] actuan_input; + logic [HALF_WIDTH-1:0] actual_input; logic [WIDTH-1:0] x0, z0, tmp0; logic [WIDTH-1:0] x1, z1, tmp1; @@ -95,9 +95,9 @@ module abr_masked_N_bit_mult_tb; #3; if (i < num_vectors) begin // Generate random inputs - actuan_input = $random; + actual_input = $random; tmp1 = $random; - tmp0 = (actuan_input - tmp1) % MOD; + tmp0 = (actual_input - tmp1) % MOD; for (int j = 0; j < WIDTH; j = j + 1) begin x[j] = {tmp1[j], tmp0[j]}; end @@ -109,7 +109,7 @@ module abr_masked_N_bit_mult_tb; end input_queue[0] = '{x: x, y: y}; - if (DEBUG) $display("[%0t] Input pushed: x = %d ({%d,%d}), y = %d", $time, actuan_input, tmp0, tmp1, y); + if (DEBUG) $display("[%0t] Input pushed: x = %d ({%d,%d}), y = %d", $time, actual_input, tmp0, tmp1, y); end else begin // Shift left and insert new inputs diff --git a/src/abr_libs/tb/abr_masked_N_bit_mult_two_share_tb.sv b/src/abr_libs/tb/abr_masked_N_bit_mult_two_share_tb.sv new file mode 100644 index 0000000..a819c73 --- /dev/null +++ b/src/abr_libs/tb/abr_masked_N_bit_mult_two_share_tb.sv @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== + +module abr_masked_N_bit_mult_two_share_tb; + + // Parameters + localparam WIDTH = 22; + localparam HALF_WIDTH = WIDTH/2; + localparam MOD = 2**WIDTH; + localparam NUM_OF_TEST_VECTOR = 32'd0_100_000; + localparam DEBUG = 0; + + // Testbench signals + logic clk; + logic rst_n; + logic zeroize; + logic [1:0] x [WIDTH-1:0]; + logic [1:0] y [WIDTH-1:0]; + logic [1:0] z [WIDTH-1:0]; + logic [WIDTH-1:0] expected_z; + logic [WIDTH-1:0] actual_z; + logic [WIDTH-1:0] random_tb; + logic [HALF_WIDTH-1:0] actual_input, actual_y; + logic [WIDTH-1:0] x0, y0, z0, tmp0, y_tmp0; + logic [WIDTH-1:0] x1, y1, z1, tmp1, y_tmp1; + + // Queue to store inputs + typedef struct { + logic [1:0] x [WIDTH-1:0]; + logic [1:0] y [WIDTH-1:0]; + } input_t; + input_t input_queue [(WIDTH + 2)]; + + input_t inputs; + + // Instantiate the DUT (Device Under Test) + abr_masked_N_bit_mult_two_share #( + .WIDTH(WIDTH) + ) dut ( + .clk(clk), + .rst_n(rst_n), + .zeroize(zeroize), + .random(random_tb), + .x(x), + .y(y), + .z(z) + ); + + // Clock generation + always #5 clk = ~clk; + + // Task to initialize the inputs + task initialize_inputs(); + integer i; + begin + rst_n = 1; + zeroize = 0; + + expected_z = 'h0; + actual_z = 'h0; + for (i = 0; i < WIDTH; i = i + 1) begin + x[i] = 2'b00; + y[i] = 2'h0; + end + end + endtask + + // Task to apply a reset + task apply_reset(); + begin + rst_n = 0; + #10; + rst_n = 1; + end + endtask + + // Task to perform test with specified number of vectors + task perform_test(input int num_vectors); + + fork + // Drive inputs and push into queue + begin + for (int i = 0; i < num_vectors + 2; i++) begin + @(posedge clk); + #3; + if (i < num_vectors) begin + // Generate random inputs + random_tb = $random; + actual_input = $random; + actual_y = $random; + tmp1 = $random; + tmp0 = (actual_input - tmp1) % MOD; + for (int j = 0; j < WIDTH; j = j + 1) begin + x[j] = {tmp1[j], tmp0[j]}; + end + y_tmp1 = $random; + y_tmp0 = (actual_y - y_tmp1) % MOD; + for (int j = 0; j < WIDTH; j = j+1) begin + y[j] = {y_tmp1[j], y_tmp0[j]}; + end + + // Shift left and insert new inputs + for (int j = WIDTH + 1; j > 0; j = j - 1) begin + input_queue[j] = input_queue[j - 1]; + end + input_queue[0] = '{x: x, y: y}; + + if (DEBUG) $display("[%0t] Input pushed: x = %d ({%d,%d}), y = %d ({%d,%d})", $time, actual_input, tmp0, tmp1, actual_y, y_tmp0, y_tmp1); + end + else begin + // Shift left and insert new inputs + for (int j = WIDTH + 1; j > 0; j = j - 1) begin + input_queue[j] = input_queue[j - 1]; + end + end + end + end + + // Collect results and compare + begin + repeat (1) @(negedge clk); + for (int i = 0; i < num_vectors; i++) begin + @(negedge clk); + + // Access inputs from the queue + inputs = input_queue[1]; + + for (int i = 0; i < WIDTH; i++) begin + x0[i] = inputs.x[i][0]; + x1[i] = inputs.x[i][1]; + + y0[i] = inputs.y[i][0]; + y1[i] = inputs.y[i][1]; + end + if (DEBUG) $display("[%0t] Input popped: x = %d, y = %d", $time, (x0+x1) % MOD, (y0+y1)%MOD); + + // Compare the results (you can define the expected results based on your logic) + #2; // Wait for outputs to stabilize + expected_z = ((x0 + x1) * (y0+y1)) % MOD; + for (int j = 0; j < WIDTH; j = j + 1) begin + z0[j] = z[j][0]; + z1[j] = z[j][1]; + end + actual_z = (z0 + z1) % MOD; + + // Placeholder for comparison logic + if (actual_z !== expected_z) begin + $display("[%0t] Error: Expected z = %p, got %p", $time, expected_z, actual_z); + end + else begin + if (DEBUG) $display("[%0t] Correct: z = %p", $time, actual_z); + end + end + end + join + endtask + + // Initial block to control the simulation + initial begin + // Initialize the clock + clk = 0; + + // Initialize inputs + initialize_inputs(); + + // Apply reset + apply_reset(); + #10; + + // Perform test with specified number of vectors + perform_test(NUM_OF_TEST_VECTOR); + + // Wait for some time to observe the outputs + #100; + + // Finish the simulation + $finish; + end + +endmodule diff --git a/src/ntt_top/Model/maksed_gadgets.py b/src/ntt_top/Model/maksed_gadgets.py index 6bd17ca..fe943d4 100644 --- a/src/ntt_top/Model/maksed_gadgets.py +++ b/src/ntt_top/Model/maksed_gadgets.py @@ -173,13 +173,17 @@ def maskedN_bitBooleanAdder(x0, x1, y0, y1, num_of_bits): sum_result1 = np.zeros(num_of_bits+1, dtype=np.uint8) # Perform bit-wise addition using full adder from LSB to MSB for i in range(num_of_bits - 1, -1, -1): - c0, c1, sum_result0[i+1], sum_result1[i+1] = maskedFullAdder(x0[i], x1[i], y0[i], y1[i], c0, c1) + c0, c1, sum_result0[i+1], sum_result1[i+1] = maskedFullAdder(x0[i], x1[i], y0[i], y1[i], c0, c1) #sum result is num_bits+1 sum_result0[0] = c0 sum_result1[0] = c1 return sum_result0, sum_result1 + # sum {0, 1,1, 1} 7+1 = {1, 0, 0, 0 } + # sum[4] + # sum[0] + def maskedN_bitBooleanAdder_for_normal_ops(x0, x1, y0, y1, num_of_bits, sub): if sub: c0 = 1 @@ -613,16 +617,25 @@ def B2A(x0, x1): a1 = x1 return a0, a1 +# Roller does the following: +# if a >= 2**23 - 2**13 + 1: +# if a + 2**13 - 1 >= 2**23 + +# 1000 >> 3 --> 1 +# input: u0 = 0, u1 = 1 +# output: red0 = 1010 red1 = 0111 --> combine = 'd17 % 16 = 1 +# def maskedAdderReduction(u0, u1): - uRolled0 = (u0 + Roller) % MultMod + uRolled0 = (u0 + Roller) % MultMod #TODO: forgot what it's for? uRolled1 = u1 # We need its only int(1+numOfBits/2)-bit so the adder size # can be reduced from 46 to 24 + # 2**23 is 1 in 24th bit uBoolean0, uBoolean1 = A2BConv(uRolled0, uRolled1) c0 = (uBoolean0 >> int(numOfBits/2)) & 1 c1 = (uBoolean1 >> int(numOfBits/2)) & 1 - red0, red1 = B2A(c0, c1) + red0, red1 = B2A(c0, c1) #converts to 46-bit arith domain not just 1 bit! (pad inputs on MSB part with 0s if needed) q0 = red0 * ((0-DILITHIUM_Q)% MultMod) q1 = red1 * ((0-DILITHIUM_Q)% MultMod) uReduced0 = (u0+q0) % MultMod diff --git a/src/ntt_top/config/compile.yml b/src/ntt_top/config/compile.yml index 44eafe9..dd5cc04 100755 --- a/src/ntt_top/config/compile.yml +++ b/src/ntt_top/config/compile.yml @@ -46,8 +46,6 @@ targets: - $COMPILE_ROOT/rtl/ntt_butterfly2x2.sv - $COMPILE_ROOT/rtl/ntt_butterfly.sv - $COMPILE_ROOT/rtl/ntt_mult_dsp.sv - # - $COMPILE_ROOT/rtl/ntt_add_sub_mod.sv - # - $COMPILE_ROOT/rtl/ntt_adder.sv - $COMPILE_ROOT/rtl/ntt_mult_reduction.sv - $COMPILE_ROOT/rtl/ntt_special_adder.sv - $COMPILE_ROOT/rtl/ntt_div2.sv @@ -55,6 +53,7 @@ targets: - $COMPILE_ROOT/rtl/ntt_twiddle_lookup.sv - $COMPILE_ROOT/rtl/ntt_ctrl.sv - $COMPILE_ROOT/rtl/ntt_top.sv + - $COMPILE_ROOT/rtl/ntt_masked_BFU_add_sub.sv tb: directories: [$COMPILE_ROOT/rtl] files: @@ -66,8 +65,6 @@ targets: directories: [] waiver_files: - $MSFT_REPO_ROOT/src/ntt/config/design_lint/ntt_top/sglint_waivers - # black_box: - # - ecc_reg --- provides: [ntt_top_tb] schema_version: 2.4.0 diff --git a/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv new file mode 100644 index 0000000..67daf97 --- /dev/null +++ b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// ntt_masked_BFU_adder +//====================================================================== + +module ntt_masked_BFU_add_sub + import ntt_defines_pkg::*; + import mldsa_params_pkg::*; +#( + parameter WIDTH = 46, + parameter HALF_WIDTH = WIDTH/2, + parameter ROLLER = WIDTH'(2**13-1) +) +( + input wire clk, + input wire reset_n, + input wire zeroize, + input wire [1:0][WIDTH-1:0] u, + input wire [1:0][WIDTH-1:0] v, + input wire [WIDTH-1:0] rnd0, rnd1, rnd2, rnd3, + output logic [1:0] res [WIDTH-1:0] +); + + //Internal signals + logic [1:0][WIDTH-1:0] add_res; + logic [1:0][WIDTH-1:0] add_res_reg [2*WIDTH+1:0]; + logic [1:0] add_res_rolled [WIDTH-1:0]; + logic [1:0] add_res_bool [WIDTH-1:0]; + logic [1:0] add_res_arith [WIDTH-1:0]; + logic [WIDTH-1:0] prime0, prime1, add_res_rolled0, add_res_rolled1; + logic [1:0] add_res_reduced [WIDTH-1:0]; + logic [1:0] prime [WIDTH-1:0]; + + //Perform addition on input shares + abr_masked_N_bit_Arith_adder #( + .WIDTH(WIDTH) + ) masked_adder_inst ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .x(u), + .y(v), + .s(add_res) + ); + + //Adder delay flops + always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + for (int i = 0; i < 2*WIDTH+1; i++) begin + add_res_reg[i] <= 'h0; + end + end + else if (zeroize) begin + for (int i = 0; i < 2*WIDTH+1; i++) begin + add_res_reg[i] <= 'h0; + end + end + else begin + add_res_reg <= {add_res, add_res_reg[2*WIDTH+1:1]}; + end + end + + //maskedAdder + reduction: + always_comb begin + add_res_rolled0 = add_res[0] + ROLLER; + add_res_rolled1 = add_res[1]; + for (int i = 0; i < WIDTH; i++) begin + add_res_rolled[i][0] = add_res_rolled0[i]; + add_res_rolled[i][1] = add_res_rolled1[i]; + end + end + + abr_masked_A2B_conv #( + .WIDTH(WIDTH) + ) a2b_inst ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .x(add_res_rolled), + .rnd(rnd1), + .rnd_for_Boolean0(rnd2), + .rnd_for_Boolean1(rnd3), + .s(add_res_bool) + ); + + logic [1:0] temp0 [WIDTH-1:0]; + + // always_comb begin + // for(int i = 0; i< WIDTH; i++) begin + // if (i==0) begin + // temp0[i] = {add_res_bool[HALF_WIDTH][1], add_res_bool[HALF_WIDTH][0]}; + // end + // else begin + // temp0[i] = '0; + // end + // end + // end + + always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + for (int i = 0; i < WIDTH; i++) begin + temp0[i] <= 2'b0; + end + end + else if (zeroize) begin + for (int i = 0; i < WIDTH; i++) begin + temp0[i] <= 2'b0; + end + end + else begin + for (int i = 0; i < WIDTH; i++) begin + if (i==0) begin + temp0[i] <= {add_res_bool[HALF_WIDTH][1], add_res_bool[HALF_WIDTH][0]}; + end + else begin + temp0[i] <= '0; + end + end + end + end + + abr_masked_B2A_conv #( + .WIDTH(WIDTH) + ) b2a_inst ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .rnd(rnd0), + .x_boolean(temp0), + .x_arith(add_res_arith) + ); + + always_comb begin + + // prime0 = add_res_arith[HALF_WIDTH][0] * ('0 - MLDSA_Q); + // prime1 = add_res_arith[HALF_WIDTH][1] * ('0 - MLDSA_Q); + for (int i = 0; i < WIDTH; i++) begin + prime[i][0] = add_res_arith[i][0] * ('0 - WIDTH'(MLDSA_Q)); + prime[i][1] = add_res_arith[i][1] * ('0 - WIDTH'(MLDSA_Q)); + add_res_reduced[i][0] = add_res_reg[0][0][i] + prime[i][0]; + add_res_reduced[i][1] = add_res_reg[0][1][i] + prime[i][1]; + end + end + + always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + for (int i = 0; i < WIDTH; i++) + res[i] <= 2'h0; + end + else if (zeroize) begin + for (int i = 0; i < WIDTH; i++) + res[i] <= 2'h0; + end + else begin + res <= add_res_reduced; //TODO: check with Emre - shares XORed together give actual result. Is this correct? Or should they be added instead? + end + end + + +endmodule \ No newline at end of file diff --git a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv new file mode 100644 index 0000000..68083c8 --- /dev/null +++ b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// ntt_masked_BFU_mult +// Performs two share multiplication and reduction +//====================================================================== + +module ntt_masked_BFU_mult + import ntt_defines_pkg::*; + import mldsa_params_pkg::*; +#( + parameter WIDTH = 46, + parameter HALF_WIDTH = WIDTH/2, + parameter ROLLER = WIDTH'(2**13-1) +) +( + input wire clk, + input wire reset_n, + input wire zeroize, + input wire [1:0][WIDTH-1:0] u, + input wire [1:0][WIDTH-1:0] v, + input wire [WIDTH-1:0] rnd0, rnd1, rnd2, rnd3, rnd4, + output logic [1:0] res [HALF_WIDTH-1:0] +); + + //Internal signals + logic [1:0] mul_res [WIDTH-1:0]; + logic [1:0] mul_res_bool [WIDTH-1:0]; + logic [1:0] mul_res_bool_reduced [WIDTH-1:0]; + logic [1:0] mul_res_reduced [HALF_WIDTH-1:0]; + + //Perform mul on input shares + abr_masked_N_bit_mult_two_share #( + .WIDTH(WIDTH) + ) masked_mult_inst ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .random(rnd0), + .x(u), + .y(v), + .s(mul_res) + ); + + abr_masked_A2B_conv #( + .WIDTH(WIDTH) + ) a2b_inst ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .x(mul_res), + .rnd(rnd1), + .rnd_for_Boolean0(rnd2), + .rnd_for_Boolean1(rnd3), + .s(mul_res_bool) + ); + + //redux46 + abr_masked_N_bit_Boolean_adder #( + .WIDTH(10) //TODO: ask Emre - inputs are 10 bit, output should be 12 bits. Is it ok to put inputs at 12 too? + ) bool_adder_inst0 ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .x({12'(mul_res_bool[22:13][1]), 12'(mul_res_bool[22:13][0])}), + .y({12'(mul_res_bool[32:23][1]), 12'(mul_res_bool[32:23][0])}), + .rnd(rnd4), + .s() + ); + + //B2A + abr_masked_B2A_conv #( + .WIDTH(WIDTH) + ) b2a_inst ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .rnd(), + .x_boolean(mul_res_bool_reduced), + .x_arith(mul_res_reduced) + ); + + always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + for (int i = 0; i < WIDTH; i++) + res[i] <= 2'h0; + end + else if (zeroize) begin + for (int i = 0; i < WIDTH; i++) + res[i] <= 2'h0; + end + else begin + res <= mul_res_reduced; + end + end + + +endmodule \ No newline at end of file diff --git a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv new file mode 100644 index 0000000..2adde19 --- /dev/null +++ b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//====================================================================== +// +// ntt_masked_gs_butterfly.sv +// -------- +// Only performs gs (INTT) mode of operation. All blocks are masked + +module ntt_masked_gs_butterfly + import mldsa_params_pkg::*; + import ntt_defines_pkg::*; + #( + parameter WIDTH = 46 + ) + ( + input wire clk, + input wire reset_n, + input wire zeroize, + + input wire [1:0][WIDTH-1:0] opu_i, + input wire [1:0][WIDTH-1:0] opv_i, + input wire [1:0][WIDTH-1:0] opw_i, + input wire [4:0][WIDTH-1:0] rnd_i, + + output logic [1:0] u_o [WIDTH-1:0], //TODO: make packed? + output logic [1:0] v_o [WIDTH-1:0] + ); + + + ntt_masked_BFU_add_sub #( + .WIDTH(WIDTH) + ) add_inst_0 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .u(opu_i), + .v(opv_i), + .rnd0(rnd_i[0]), + .rnd1(rnd_i[1]), + .rnd2(rnd_i[2]), + .rnd3(rnd_i[3]), + .res() + ); + + ntt_masked_BFU_add_sub #( + .WIDTH(WIDTH) + ) sub_inst_0 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .u(opu_i), + .v(opv_i), + .rnd0(rnd_i[0]), + .rnd1(rnd_i[1]), + .rnd2(rnd_i[2]), + .rnd3(rnd_i[3]), + .res() + ); + +endmodule diff --git a/src/ntt_top/tb/ntt_top_tb.sv b/src/ntt_top/tb/ntt_top_tb.sv index 4f3f95f..7fd778c 100644 --- a/src/ntt_top/tb/ntt_top_tb.sv +++ b/src/ntt_top/tb/ntt_top_tb.sv @@ -74,6 +74,13 @@ pwo_mem_addr_t pwo_mem_base_addr_tb; string operation; +logic [45:0] actual_u, actual_v; +logic [1:0][45:0] u; +logic [1:0][45:0] v; +logic [1:0] actual_sum [45:0]; +logic [46:0] exp_sum; +logic [45:0] rnd0, rnd1, rnd2, rnd3; + //---------------------------------------------------------------- // Device Under Test. //---------------------------------------------------------------- @@ -131,28 +138,35 @@ string operation; // .sampler_valid(svalid_tb) // ); -ntt_wrapper dut ( +// ntt_wrapper dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .mode(mode_tb), +// .ntt_enable(enable_tb), +// .load_tb_values(load_tb_values), +// .load_tb_addr(load_tb_addr), +// .ntt_mem_base_addr(ntt_mem_base_addr_tb), +// .pwo_mem_base_addr(pwo_mem_base_addr_tb), +// .accumulate(acc_tb), +// .sampler_valid(svalid_tb), +// .sampler_mode(sampler_mode_tb), +// .sampler_data(96'hFFFFFF), +// .ntt_done(ntt_done_tb), +// .ntt_busy() +// ); + +ntt_masked_BFU_add_sub dut ( .clk(clk_tb), .reset_n(reset_n_tb), .zeroize(zeroize_tb), - .mode(mode_tb), - .ntt_enable(enable_tb), - .load_tb_values(load_tb_values), - .load_tb_addr(load_tb_addr), - // .src_base_addr(src_base_addr), - // .interim_base_addr(interim_base_addr), - // .dest_base_addr(dest_base_addr), - // .pw_base_addr_a(8'd0), - // .pw_base_addr_b(8'd0), - // .pw_base_addr_c(8'd0), - .ntt_mem_base_addr(ntt_mem_base_addr_tb), - .pwo_mem_base_addr(pwo_mem_base_addr_tb), - .accumulate(acc_tb), - .sampler_valid(svalid_tb), - .sampler_mode(sampler_mode_tb), - .sampler_data(96'hFFFFFF), - .ntt_done(ntt_done_tb), - .ntt_busy() + .u(u), + .v(v), + .rnd0(rnd0), + .rnd1(rnd1), + .rnd2(rnd2), + .rnd3(rnd3), + .res(actual_sum) ); //---------------------------------------------------------------- @@ -164,6 +178,10 @@ always begin : clk_gen #CLK_HALF_PERIOD; clk_tb = !clk_tb; + rnd0 = 'h1; //$random(); + rnd1 = 'h1; //$random(); + rnd2 = 'h1; //$random(); + rnd3 = 'h1; //$random(); end // clk_gen //---------------------------------------------------------------- @@ -238,6 +256,14 @@ task init_sim; svalid_tb = 1'b0; sampler_mode_tb = 1'b0; + //Masking + for (int i = 0; i < 46; i++) begin + u[i] = 2'h0; + v[i] = 2'h0; + end + actual_u = 'h0; + actual_v = 'h0; + $display("End of init\n"); end endtask @@ -488,7 +514,7 @@ task ntt_top_test(); $display("End of test\n"); endtask - +/* task pwm_opt_test(); $display("PWM operation 1\n"); $readmemh("pwm_iter1.hex", ntt_mem_tb); @@ -545,7 +571,7 @@ task pwm_opt_test(); @(posedge clk_tb); end endtask - +*/ task init_mem(); for (int i = 0; i < 32768; i++) begin load_tb_addr = i; @@ -556,6 +582,23 @@ task init_mem(); load_tb_addr = 'h0; endtask +task masked_BFU_adder_test(); + for (int i = 0; i < 10; i++) begin + @(posedge clk_tb); + actual_u = i+1; //$random; + actual_v = i+1; //$random; + $display("actual u = %h, actual v = %h\n", actual_u, actual_v); + // for (int j = 0; j < 46; j++) begin + // u[j] = {'h0, actual_u[j]}; + // v[j] = {'h0, actual_v[j]}; + // end + u[0] = actual_u; + u[1] = 'h0; //rand = 0; + v[0] = actual_v; + v[1] = 'h0; + end +endtask + initial begin init_sim(); reset_dut(); @@ -571,7 +614,8 @@ initial begin // twiddle_rom_test(); // ntt_ctrl_test(); $display("Starting ntt test\n"); - ntt_top_test(); + // ntt_top_test(); + masked_BFU_adder_test(); // pwm_opt_test(); repeat(1000) @(posedge clk_tb); $finish; From a9f51bd8d7134b10343751dc369a0c8b47850f69 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Wed, 6 Nov 2024 09:53:04 -0800 Subject: [PATCH 02/23] Add redux46 file --- src/ntt_top/rtl/ntt_masked_BFU_mult.sv | 22 +++++++------- src/ntt_top/rtl/ntt_masked_mult_redux46.sv | 35 ++++++++++++++++++++++ 2 files changed, 46 insertions(+), 11 deletions(-) create mode 100644 src/ntt_top/rtl/ntt_masked_mult_redux46.sv diff --git a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv index 68083c8..a2313ad 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv @@ -69,17 +69,17 @@ module ntt_masked_BFU_mult ); //redux46 - abr_masked_N_bit_Boolean_adder #( - .WIDTH(10) //TODO: ask Emre - inputs are 10 bit, output should be 12 bits. Is it ok to put inputs at 12 too? - ) bool_adder_inst0 ( - .clk(clk), - .rst_n(reset_n), - .zeroize(zeroize), - .x({12'(mul_res_bool[22:13][1]), 12'(mul_res_bool[22:13][0])}), - .y({12'(mul_res_bool[32:23][1]), 12'(mul_res_bool[32:23][0])}), - .rnd(rnd4), - .s() - ); + // abr_masked_N_bit_Boolean_adder #( + // .WIDTH(10) //TODO: ask Emre - inputs are 10 bit, output should be 12 bits. Is it ok to put inputs at 12 too? + // ) bool_adder_inst0 ( + // .clk(clk), + // .rst_n(reset_n), + // .zeroize(zeroize), + // .x({12'(mul_res_bool[22:13][1]), 12'(mul_res_bool[22:13][0])}), + // .y({12'(mul_res_bool[32:23][1]), 12'(mul_res_bool[32:23][0])}), + // .rnd(rnd4), + // .s() + // ); //B2A abr_masked_B2A_conv #( diff --git a/src/ntt_top/rtl/ntt_masked_mult_redux46.sv b/src/ntt_top/rtl/ntt_masked_mult_redux46.sv new file mode 100644 index 0000000..12e723d --- /dev/null +++ b/src/ntt_top/rtl/ntt_masked_mult_redux46.sv @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// ntt_masked_mult_redux46 +// Performs masked multiplication reduction for MLDSA +//====================================================================== + +module ntt_masked_mult_redux46 + import ntt_defines_pkg::*; + import mldsa_params_pkg::*; +#( + parameter WIDTH = 46 +) +( + input wire clk, + input wire rst_n, + input wire zeroize, + input wire [1:0] x [WIDTH-1:0], + output logic [1:0] y [WIDTH-1:0] +); + +endmodule \ No newline at end of file From f1b35fc9abb923ba0554b53a4021060a87025662 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Mon, 11 Nov 2024 08:59:02 -0800 Subject: [PATCH 03/23] Masked adder, sub, gs, placeholder for mult reduction --- src/abr_libs/rtl/abr_masked_B2A_conv.sv | 16 +- .../rtl/abr_masked_N_bit_mult_two_share.sv | 43 +++-- src/ntt_top/Model/maksed_gadgets.py | 14 ++ src/ntt_top/Model/testForMasking.py | 61 ++++-- src/ntt_top/config/compile.yml | 2 + src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv | 80 +++++--- src/ntt_top/rtl/ntt_masked_BFU_mult.sv | 52 ++++- src/ntt_top/rtl/ntt_masked_gs_butterfly.sv | 72 ++++++- src/ntt_top/tb/ntt_top_tb.sv | 177 +++++++++++++++--- 9 files changed, 411 insertions(+), 106 deletions(-) diff --git a/src/abr_libs/rtl/abr_masked_B2A_conv.sv b/src/abr_libs/rtl/abr_masked_B2A_conv.sv index 5adcc80..6112ddc 100644 --- a/src/abr_libs/rtl/abr_masked_B2A_conv.sv +++ b/src/abr_libs/rtl/abr_masked_B2A_conv.sv @@ -30,7 +30,7 @@ // outputs A and r. // //====================================================================== - +`define DEBUG_MASKING 1 module abr_masked_B2A_conv #( parameter WIDTH = 8 // Default width is 8 bits )( @@ -48,6 +48,20 @@ logic unsigned [1:0] x_arith_next [WIDTH-1:0]; wire [WIDTH-1:0] Gamma; assign Gamma = rnd; +`ifdef DEBUG_MASKING + logic [WIDTH-1:0] actual_input, actual_input0, actual_input1, exp_output, actual_output; + always_comb begin + for (int i = 0; i < WIDTH; i++) begin + actual_input[i] = x_boolean[i][0] ^ x_boolean[i][1]; + end + exp_output = actual_input1; + end + always_ff @(posedge clk) begin + actual_input0 <= actual_input; + actual_input1 <= actual_input0; + actual_output <= A2 + x1; + end +`endif // Register inputs always_ff @ (posedge clk or negedge rst_n) begin diff --git a/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv b/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv index 02950fe..b16663c 100644 --- a/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv +++ b/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv @@ -48,25 +48,25 @@ logic [WIDTH-1:0] x0, x1, y0, y1; // Format organization stage - always_comb begin - // x0 = x[0]; - // x1 = x[1]; - // y0 = y[0]; - // y1 = y[1]; - for (int i = 0; i < WIDTH; i++) begin - x0[i] = x[i][0]; - x1[i] = x[i][1]; - y0[i] = y[i][0]; - y1[i] = y[i][1]; - end - end + // always_comb begin + // // x0 = x[0]; + // // x1 = x[1]; + // // y0 = y[0]; + // // y1 = y[1]; + // for (int i = 0; i < WIDTH; i++) begin + // x0[i] = x[i][0]; + // x1[i] = x[i][1]; + // y0[i] = y[i][0]; + // y1[i] = y[i][1]; + // end + // end // Calculation stage always_comb begin - calculation[0] = x0 * y0; // Multiplication of the first share x and first share y - calculation[1] = x1 * y0; // Multiplication of the second share x and first share y - calculation[2] = x0 * y1; // Multiplication of the first share x and second share y - calculation[3] = x1 * y1; // Multiplication of the second share x and second share y + calculation[0] = x[0] * y[0]; // Multiplication of the first share x and first share y + calculation[1] = x[1] * y[0]; // Multiplication of the second share x and first share y + calculation[2] = x[0] * y[1]; // Multiplication of the first share x and second share y + calculation[3] = x[1] * y[1]; // Multiplication of the second share x and second share y calculation_rand[0] = calculation[2] + random; calculation_rand[1] = calculation[1] - random; @@ -78,13 +78,18 @@ // Final output assignment always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - z <= 'h0; + for (int i = 0; i < WIDTH; i++) + z[i] <= 2'h0; end else if (zeroize) begin - z <= 'h0; + for (int i = 0; i < WIDTH; i++) + z[i] <= 2'h0; end else begin - z <= final_res; + for (int i = 0; i < WIDTH; i++) begin + z[i][0] <= final_res[0][i]; + z[i][1] <= final_res[1][i]; + end end end diff --git a/src/ntt_top/Model/maksed_gadgets.py b/src/ntt_top/Model/maksed_gadgets.py index fe943d4..7afa0bb 100644 --- a/src/ntt_top/Model/maksed_gadgets.py +++ b/src/ntt_top/Model/maksed_gadgets.py @@ -629,21 +629,33 @@ def B2A(x0, x1): def maskedAdderReduction(u0, u1): uRolled0 = (u0 + Roller) % MultMod #TODO: forgot what it's for? uRolled1 = u1 + print(f"rolled_shares = {uRolled0: X}, {uRolled1: X}") + print(f"rolled_combin = {uRolled0+uRolled1: X}") # We need its only int(1+numOfBits/2)-bit so the adder size # can be reduced from 46 to 24 # 2**23 is 1 in 24th bit uBoolean0, uBoolean1 = A2BConv(uRolled0, uRolled1) + print(f"boolean_shares = {uBoolean0: X}, {uBoolean1: X}") + print(f"boolean_combin = {uBoolean0^uBoolean1: X}") c0 = (uBoolean0 >> int(numOfBits/2)) & 1 c1 = (uBoolean1 >> int(numOfBits/2)) & 1 red0, red1 = B2A(c0, c1) #converts to 46-bit arith domain not just 1 bit! (pad inputs on MSB part with 0s if needed) + print(f"arith_shares = {red0: X}, {red1: X}") + print(f"arith_combin = {red0+red1: X}") q0 = red0 * ((0-DILITHIUM_Q)% MultMod) q1 = red1 * ((0-DILITHIUM_Q)% MultMod) + print(f"prime_shares = {q0: X}, {q1: X}") + print(f"prime_combin = {q0+q1: X}") uReduced0 = (u0+q0) % MultMod uReduced1 = (u1+q1) % MultMod + print(f"res_redux_shares = {uReduced0: X}, {uReduced1: X}") + print(f"res_redux_combin = {uReduced0+uReduced1: X}") return uReduced0, uReduced1 def maskedBFUAdder(x0, x1, y0, y1): u0, u1 = maskedAdder(x0, x1, y0, y1) + print(f"add_res_shares = {u0: X}, {u1: X}") + print(f"add_res_combin = {u0+u1: X}") uReduced0, uReduced1 = maskedAdderReduction(u0, u1) return uReduced0, uReduced1 @@ -651,6 +663,8 @@ def maskedBFUAdder(x0, x1, y0, y1): def maskedBFUSub(x0, x1, y0, y1): y_new0 = (DILITHIUM_Q-y0) % MultMod y_new1 = (0-y1) % MultMod + print(f"y_new_shares = {y_new0: X}, {y_new1: X}") + print(f"y_new_combin = {y_new0+y_new1: X}") v0, v1 = maskedAdder(x0, x1, y_new0, y_new1) vReduced0, vReduced1 = maskedAdderReduction(v0, v1) return vReduced0, vReduced1 diff --git a/src/ntt_top/Model/testForMasking.py b/src/ntt_top/Model/testForMasking.py index b5473b9..c0608d5 100644 --- a/src/ntt_top/Model/testForMasking.py +++ b/src/ntt_top/Model/testForMasking.py @@ -281,25 +281,48 @@ def test_maskedBFUAdder(numTest = 10): operands = CustomUnsignedInteger(0, 0, DILITHIUM_Q-1) for i in range(0, numTest): #get a random number ranging [0, DILITHIUM_Q-1] - operands.generate_random() - a = int(operands.value) - operands.generate_random() - b = int(operands.value) - expected = (a+b) % DILITHIUM_Q - randomness.generate_random() - r0 = int(randomness.value) - a0 = int(a-r0) % MultMod - a1 = r0 - randomness.generate_random() - r1 = int(randomness.value) - b0 = int(b-r1) % MultMod - b1 = r1 - a0, a1 = maskedBFUAdder(a0, a1, b0, b1) + # operands.generate_random() + # a = int(operands.value) + # operands.generate_random() + # b = int(operands.value) + # expected = (a+b) % DILITHIUM_Q + # randomness.generate_random() + # r0 = int(randomness.value) + # a0 = int(a-r0) % MultMod + # a1 = r0 + # randomness.generate_random() + # r1 = int(randomness.value) + # b0 = int(b-r1) % MultMod + # b1 = r1 + + # a = int("54bb00", 16) + # b = int("727900", 16) + # r0 = int("782839f0", 16) + # r1 = int("7923d7f2", 16) + + # print("actual inputs:") + # print(f"{a: X}, {b: X}") + # expected = (a+b) % DILITHIUM_Q + # a0 = int(a-r0) % MultMod + # a1 = r0 + # b0 = int(b-r1) % MultMod + # b1 = r1 + a0 = int("00007f0ef2fd", 16) + a1 = int("3fff81075002", 16) + b0 = int("3fffff142bfe", 16) + b1 = int("0000013a0302", 16) + expected = ((a0+a1)-(b0+b1)) % DILITHIUM_Q + print("a shares:") + print(f"{a0: X}, {a1: X}") + print("b shares:") + print(f"{b0: X}, {b1: X}") + a0, a1 = maskedBFUSub(a0, a1, b0, b1) #maskedBFUAdder(a0, a1, b0, b1) gotten = int(a0 + a1) % MultMod + print(f"Gotten = {gotten: X}, Expected = {expected: X}") if gotten != expected: - print(f"Multiplication gives an Error; gotten = {gotten}, while exp = {expected}") + print(f"Addition gives an Error; gotten = {gotten: X}, while exp = {expected: X}") -# test_maskedBFUAdder(numTest = 1000) +test_maskedBFUAdder(numTest = 1) def gs_bf(u, v, z): t = (u - v) % DILITHIUM_Q @@ -377,8 +400,8 @@ def test_maskedBFU_CT(numTest = 10): print(f"CT Lower branch gives an Error; gotten = {vNew}, while exp = {exp_v}") -test_maskedBFU_CT(numTest = 100000) -test_maskedBFU_GS(numTest = 100000) +# test_maskedBFU_CT(numTest = 100000) +# test_maskedBFU_GS(numTest = 100000) def test_masked_inv_NTT2x2_div2(numTest = 10): for test_i in range(numTest): @@ -450,6 +473,6 @@ def test_MaskedmodularOps(numTest = 10): # test_MaskedmodularOps(numTest = 100000) -test_maskedReduction46(numTest = 1000000) +# test_maskedReduction46(numTest = 1000000) diff --git a/src/ntt_top/config/compile.yml b/src/ntt_top/config/compile.yml index dd5cc04..b853ff6 100755 --- a/src/ntt_top/config/compile.yml +++ b/src/ntt_top/config/compile.yml @@ -54,6 +54,8 @@ targets: - $COMPILE_ROOT/rtl/ntt_ctrl.sv - $COMPILE_ROOT/rtl/ntt_top.sv - $COMPILE_ROOT/rtl/ntt_masked_BFU_add_sub.sv + - $COMPILE_ROOT/rtl/ntt_masked_BFU_mult.sv + - $COMPILE_ROOT/rtl/ntt_masked_gs_butterfly.sv tb: directories: [$COMPILE_ROOT/rtl] files: diff --git a/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv index 67daf97..da40be1 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv @@ -14,7 +14,7 @@ // //====================================================================== // -// ntt_masked_BFU_adder +// ntt_masked_BFU_adder - total end-to-end latency = 53 clks //====================================================================== module ntt_masked_BFU_add_sub @@ -29,6 +29,7 @@ module ntt_masked_BFU_add_sub input wire clk, input wire reset_n, input wire zeroize, + input wire sub, input wire [1:0][WIDTH-1:0] u, input wire [1:0][WIDTH-1:0] v, input wire [WIDTH-1:0] rnd0, rnd1, rnd2, rnd3, @@ -36,16 +37,29 @@ module ntt_masked_BFU_add_sub ); //Internal signals - logic [1:0][WIDTH-1:0] add_res; - logic [1:0][WIDTH-1:0] add_res_reg [2*WIDTH+1:0]; + logic [1:0][WIDTH-1:0] v_int, add_res; + logic [1:0][WIDTH-1:0] add_res_reg [WIDTH+4:0]; //TODO parameterize logic [1:0] add_res_rolled [WIDTH-1:0]; logic [1:0] add_res_bool [WIDTH-1:0]; logic [1:0] add_res_arith [WIDTH-1:0]; logic [WIDTH-1:0] prime0, prime1, add_res_rolled0, add_res_rolled1; - logic [1:0] add_res_reduced [WIDTH-1:0]; + logic [1:0][WIDTH-1:0] add_res_reduced, prime_packed; logic [1:0] prime [WIDTH-1:0]; + logic [WIDTH-1:0] add_res_bool0, add_res_bool1, add_res_arith0, add_res_arith1, add_res_reduced0, add_res_reduced1; + // logic [WIDTH-1:0] prime0, prime1; - //Perform addition on input shares + always_comb begin + if (sub) begin + v_int[0] = MLDSA_Q - v[0]; + v_int[1] = (~v[1] + 'h1); + end + else begin + v_int[0] = v[0]; + v_int[1] = v[1]; + end + end + + //Perform addition on input shares - 1 clk latency abr_masked_N_bit_Arith_adder #( .WIDTH(WIDTH) ) masked_adder_inst ( @@ -53,24 +67,24 @@ module ntt_masked_BFU_add_sub .rst_n(reset_n), .zeroize(zeroize), .x(u), - .y(v), + .y(v_int), .s(add_res) ); //Adder delay flops always_ff @(posedge clk or negedge reset_n) begin if (!reset_n) begin - for (int i = 0; i < 2*WIDTH+1; i++) begin + for (int i = 0; i <= WIDTH+4; i++) begin add_res_reg[i] <= 'h0; end end else if (zeroize) begin - for (int i = 0; i < 2*WIDTH+1; i++) begin - add_res_reg[i] <= 'h0; + for (int i = 0; i <= WIDTH+4; i++) begin + add_res_reg[i] <= 'h0; end end else begin - add_res_reg <= {add_res, add_res_reg[2*WIDTH+1:1]}; + add_res_reg <= {add_res, add_res_reg[WIDTH+4:1]}; end end @@ -84,6 +98,7 @@ module ntt_masked_BFU_add_sub end end + //Takes 48 clks abr_masked_A2B_conv #( .WIDTH(WIDTH) ) a2b_inst ( @@ -101,15 +116,18 @@ module ntt_masked_BFU_add_sub // always_comb begin // for(int i = 0; i< WIDTH; i++) begin - // if (i==0) begin - // temp0[i] = {add_res_bool[HALF_WIDTH][1], add_res_bool[HALF_WIDTH][0]}; - // end - // else begin - // temp0[i] = '0; - // end + // add_res_bool0[i] = add_res_bool[i][0]; + // add_res_bool1[i] = add_res_bool[i][1]; + // // if (i==0) begin + // // temp0[i] = {add_res_bool[HALF_WIDTH][1], add_res_bool[HALF_WIDTH][0]}; + // // end + // // else begin + // // temp0[i] = '0; + // // end // end // end + //Convert 1 bit to 46 bit to pass to B2A converter - 1 clk always_ff @(posedge clk or negedge reset_n) begin if (!reset_n) begin for (int i = 0; i < WIDTH; i++) begin @@ -133,6 +151,7 @@ module ntt_masked_BFU_add_sub end end + //Convert 24th bit to arithmetic domain - 2 clks abr_masked_B2A_conv #( .WIDTH(WIDTH) ) b2a_inst ( @@ -144,18 +163,27 @@ module ntt_masked_BFU_add_sub .x_arith(add_res_arith) ); + //Organize wires for easier use and debug always_comb begin - - // prime0 = add_res_arith[HALF_WIDTH][0] * ('0 - MLDSA_Q); - // prime1 = add_res_arith[HALF_WIDTH][1] * ('0 - MLDSA_Q); - for (int i = 0; i < WIDTH; i++) begin - prime[i][0] = add_res_arith[i][0] * ('0 - WIDTH'(MLDSA_Q)); - prime[i][1] = add_res_arith[i][1] * ('0 - WIDTH'(MLDSA_Q)); - add_res_reduced[i][0] = add_res_reg[0][0][i] + prime[i][0]; - add_res_reduced[i][1] = add_res_reg[0][1][i] + prime[i][1]; + for(int i = 0; i< WIDTH; i++) begin + add_res_bool0[i] = add_res_bool[i][0]; + add_res_bool1[i] = add_res_bool[i][1]; + add_res_arith0[i] = add_res_arith[i][0]; + add_res_arith1[i] = add_res_arith[i][1]; + add_res_reduced0[i] = add_res_reduced[i][0]; + add_res_reduced1[i] = add_res_reduced[i][1]; end + + //If bit[23] = 1, subtract Q from adder result + prime0 = WIDTH'(add_res_arith0 * (~MLDSA_Q+'h1)); + prime1 = WIDTH'(add_res_arith1 * (~MLDSA_Q+'h1)); + prime_packed[0] = prime0; + prime_packed[1] = prime1; + add_res_reduced[0] = add_res_reg[0][0]+prime_packed[0]; + add_res_reduced[1] = add_res_reg[0][1]+prime_packed[1]; end + //Output flop - 1 clk always_ff @(posedge clk or negedge reset_n) begin if (!reset_n) begin for (int i = 0; i < WIDTH; i++) @@ -166,7 +194,9 @@ module ntt_masked_BFU_add_sub res[i] <= 2'h0; end else begin - res <= add_res_reduced; //TODO: check with Emre - shares XORed together give actual result. Is this correct? Or should they be added instead? + for (int i = 0; i < WIDTH; i++) begin + res[i] <= {add_res_reduced[1][i],add_res_reduced[0][i]}; //TODO: check with Emre - shares XORed together give actual result. Is this correct? Or should they be added instead? + end end end diff --git a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv index a2313ad..861d242 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv @@ -33,28 +33,47 @@ module ntt_masked_BFU_mult input wire [1:0][WIDTH-1:0] u, input wire [1:0][WIDTH-1:0] v, input wire [WIDTH-1:0] rnd0, rnd1, rnd2, rnd3, rnd4, - output logic [1:0] res [HALF_WIDTH-1:0] + output logic [1:0] res [WIDTH-1:0] ); //Internal signals logic [1:0] mul_res [WIDTH-1:0]; + logic [WIDTH-1:0] mul_res0, mul_res1, mul_res_combined, mul_res_combined_share0; + logic [1:0] mul_res_refresh [WIDTH-1:0]; logic [1:0] mul_res_bool [WIDTH-1:0]; + logic [WIDTH-1:0] mul_res_bool0, mul_res_bool1; + logic [1:0][WIDTH-1:0] temp, final_res; logic [1:0] mul_res_bool_reduced [WIDTH-1:0]; - logic [1:0] mul_res_reduced [HALF_WIDTH-1:0]; + logic [1:0] mul_res_reduced [WIDTH-1:0]; + logic [WIDTH-1:0] mul_res_bool_redux0, mul_res_bool_redux1, mul_res_redux0, mul_res_redux1; - //Perform mul on input shares + //Perform mul on input shares - 1 clk abr_masked_N_bit_mult_two_share #( .WIDTH(WIDTH) - ) masked_mult_inst ( + ) masked_two_share_mult_inst ( .clk(clk), .rst_n(reset_n), .zeroize(zeroize), .random(rnd0), .x(u), .y(v), - .s(mul_res) + .z(mul_res) ); + always_comb begin + for(int i = 0; i < WIDTH; i++) begin + mul_res0[i] = mul_res[i][0]; + mul_res1[i] = mul_res[i][1]; + end + mul_res_combined = (mul_res0 + mul_res1) % MLDSA_Q; + mul_res_combined_share0 = mul_res_combined - rnd0; + for (int i = 0; i < WIDTH; i++) begin + mul_res_refresh[i][0] = mul_res_combined_share0[i]; + mul_res_refresh[i][1] = rnd0[i]; + end + end +/* + //48 clks abr_masked_A2B_conv #( .WIDTH(WIDTH) ) a2b_inst ( @@ -80,18 +99,27 @@ module ntt_masked_BFU_mult // .rnd(rnd4), // .s() // ); + - //B2A + //B2A - 2 clks abr_masked_B2A_conv #( .WIDTH(WIDTH) ) b2a_inst ( .clk(clk), .rst_n(reset_n), .zeroize(zeroize), - .rnd(), + .rnd(rnd0), .x_boolean(mul_res_bool_reduced), .x_arith(mul_res_reduced) ); +*/ + always_comb begin + + for (int i = 0; i < WIDTH; i++) begin + mul_res_redux0[i] = mul_res_refresh[i][0]; //mul_res_reduced[i][0]; + mul_res_redux1[i] = mul_res_refresh[i][1]; //mul_res_reduced[i][1]; + end + end always_ff @(posedge clk or negedge reset_n) begin if (!reset_n) begin @@ -103,8 +131,16 @@ module ntt_masked_BFU_mult res[i] <= 2'h0; end else begin - res <= mul_res_reduced; + res <= mul_res_refresh; //mul_res_reduced; + end + end + + always_comb begin + for ( int i = 0; i < WIDTH; i++) begin + final_res[0][i] = res[i][0]; + final_res[1][i] = res[i][1]; end + end diff --git a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv index 2adde19..a8861ca 100644 --- a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +++ b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv @@ -30,13 +30,20 @@ module ntt_masked_gs_butterfly input wire [1:0][WIDTH-1:0] opu_i, input wire [1:0][WIDTH-1:0] opv_i, - input wire [1:0][WIDTH-1:0] opw_i, + input wire [1:0][WIDTH-1:0] opw_i, //benefit from splitting? Or should we use one share mult? input wire [4:0][WIDTH-1:0] rnd_i, output logic [1:0] u_o [WIDTH-1:0], //TODO: make packed? output logic [1:0] v_o [WIDTH-1:0] ); + logic [1:0][WIDTH-1:0] w_reg [52:0]; //TODO parameterize + logic [1:0] add_res [WIDTH-1:0]; + logic [1:0] sub_res [WIDTH-1:0]; + logic [1:0] mul_res [WIDTH-1:0]; + logic [1:0][WIDTH-1:0] sub_res_packed; + + logic [WIDTH-1:0] add_res0, add_res1, mul_res0, mul_res1; ntt_masked_BFU_add_sub #( .WIDTH(WIDTH) @@ -44,13 +51,14 @@ module ntt_masked_gs_butterfly .clk(clk), .reset_n(reset_n), .zeroize(zeroize), + .sub('b0), .u(opu_i), .v(opv_i), .rnd0(rnd_i[0]), .rnd1(rnd_i[1]), .rnd2(rnd_i[2]), .rnd3(rnd_i[3]), - .res() + .res(add_res) //u+v ); ntt_masked_BFU_add_sub #( @@ -59,13 +67,63 @@ module ntt_masked_gs_butterfly .clk(clk), .reset_n(reset_n), .zeroize(zeroize), + .sub('b1), .u(opu_i), .v(opv_i), - .rnd0(rnd_i[0]), - .rnd1(rnd_i[1]), - .rnd2(rnd_i[2]), - .rnd3(rnd_i[3]), - .res() + .rnd0(rnd_i[1]), //Different rand order + .rnd1(rnd_i[2]), + .rnd2(rnd_i[3]), + .rnd3(rnd_i[0]), + .res(sub_res) //u-v + ); + + always_comb begin + for (int i = 0; i < WIDTH; i++) begin + add_res0[i] = add_res[i][0]; + add_res1[i] = add_res[i][1]; + sub_res_packed[0][i] = sub_res[i][0]; + sub_res_packed[1][i] = sub_res[i][1]; + end + end + + //w delay flops + always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + for (int i = 0; i < 53; i++) begin + w_reg[i] <= 'h0; + end + end + else if (zeroize) begin + for (int i = 0; i < 53; i++) begin + w_reg[i] <= 'h0; + end + end + else begin + w_reg <= {opw_i, w_reg[52:1]}; + end + end + + ntt_masked_BFU_mult #( + .WIDTH(WIDTH) + ) mult_inst_0 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .u(sub_res_packed), + .v(w_reg[0]), + .rnd0(rnd_i[2]), + .rnd1(rnd_i[3]), + .rnd2(rnd_i[0]), + .rnd3(rnd_i[1]), + .rnd4(rnd_i[2]+rnd_i[3]), + .res(mul_res) ); + always_comb begin + for (int i = 0; i < WIDTH; i++) begin + mul_res0[i] = mul_res[i][0]; + mul_res1[i] = mul_res[i][1]; + end + end + endmodule diff --git a/src/ntt_top/tb/ntt_top_tb.sv b/src/ntt_top/tb/ntt_top_tb.sv index 7fd778c..d181f26 100644 --- a/src/ntt_top/tb/ntt_top_tb.sv +++ b/src/ntt_top/tb/ntt_top_tb.sv @@ -74,11 +74,11 @@ pwo_mem_addr_t pwo_mem_base_addr_tb; string operation; -logic [45:0] actual_u, actual_v; +logic sub; +logic [45:0] actual_u, actual_v, actual_w; logic [1:0][45:0] u; logic [1:0][45:0] v; -logic [1:0] actual_sum [45:0]; -logic [46:0] exp_sum; +logic [1:0][45:0] w; logic [45:0] rnd0, rnd1, rnd2, rnd3; //---------------------------------------------------------------- @@ -156,17 +156,44 @@ logic [45:0] rnd0, rnd1, rnd2, rnd3; // .ntt_busy() // ); -ntt_masked_BFU_add_sub dut ( +// ntt_masked_BFU_add_sub dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .sub(sub), +// .u(u), +// .v(v), +// .rnd0(rnd0), +// .rnd1(rnd1), +// .rnd2(rnd2), +// .rnd3(rnd3), +// .res() +// ); + +// ntt_masked_BFU_mult dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .u(u), +// .v(v), +// .rnd0(rnd0), +// .rnd1(rnd1), +// .rnd2(rnd2), +// .rnd3(rnd3), +// .rnd4(rnd0+rnd1), +// .res() +// ); + +ntt_masked_gs_butterfly dut ( .clk(clk_tb), .reset_n(reset_n_tb), .zeroize(zeroize_tb), - .u(u), - .v(v), - .rnd0(rnd0), - .rnd1(rnd1), - .rnd2(rnd2), - .rnd3(rnd3), - .res(actual_sum) + .opu_i(u), + .opv_i(v), + .opw_i(w), + .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), + .u_o(), + .v_o() ); //---------------------------------------------------------------- @@ -178,10 +205,10 @@ always begin : clk_gen #CLK_HALF_PERIOD; clk_tb = !clk_tb; - rnd0 = 'h1; //$random(); - rnd1 = 'h1; //$random(); - rnd2 = 'h1; //$random(); - rnd3 = 'h1; //$random(); + rnd0 = $random(); + rnd1 = $random(); + rnd2 = $random(); + rnd3 = $random(); end // clk_gen //---------------------------------------------------------------- @@ -263,6 +290,7 @@ task init_sim; end actual_u = 'h0; actual_v = 'h0; + sub = 'h0; $display("End of init\n"); end @@ -582,20 +610,113 @@ task init_mem(); load_tb_addr = 'h0; endtask +/* task masked_BFU_adder_test(); + logic [45:0] u_array, v_array; + logic [45:0] rand0, rand1; + sub = 1; + for (int i = 0; i < 1000; i++) begin + @(posedge clk_tb); + fork + begin + actual_u = $random()%PRIME; + actual_v = $random()%PRIME; + u_array = actual_u; + v_array = actual_v; + rand0 = $random(); + rand1 = $random(); + + u[0] = actual_u-rand0; + u[1] = rand0; + v[0] = actual_v-rand1; + v[1] = rand1; + // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); + end + begin + repeat(54) @(posedge clk_tb); + if (!sub) begin + if ((dut.add_res_reduced[1] + dut.add_res_reduced[0]) != ((u_array + v_array)%PRIME)) begin + $error("Addition Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array + v_array)%PRIME, dut.add_res_reduced[0], dut.add_res_reduced[1], dut.add_res_reduced[0] + dut.add_res_reduced[1]); + end + end + else begin + if ((dut.add_res_reduced[1] + dut.add_res_reduced[0]) != ((u_array - v_array + PRIME)%PRIME)) begin + $error("Subtraction Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array + PRIME + (~v_array+'h1))%PRIME, dut.add_res_reduced[0], dut.add_res_reduced[1], dut.add_res_reduced[0] + dut.add_res_reduced[1]); + end + end + end + join + end +endtask +*/ +/* +task masked_BFU_mult_test(); + logic [45:0] u_array, v_array; + logic [45:0] rand0, rand1; + + for (int i = 0; i < 10; i++) begin + @(posedge clk_tb); + fork + begin + actual_u = $random()%PRIME; + actual_v = $random()%PRIME; + u_array = actual_u; + v_array = actual_v; + rand0 = $random(); + rand1 = $random(); + + // $display("actual u = %h, actual v = %h", actual_u, actual_v); + + u[0] = actual_u-rand0; + u[1] = rand0; + v[0] = actual_v-rand1; + v[1] = rand1; + // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); + end + begin + repeat(3) @(posedge clk_tb); + if ((dut.final_res[1] + dut.final_res[0]) != ((u_array * v_array)%PRIME)) begin + $error("Multiplication Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array * v_array)%PRIME, dut.final_res[0], dut.final_res[1], dut.final_res[0] + dut.final_res[1]); + end + end + join + end +endtask +*/ + +task masked_gs_butterfly_test(); + logic [45:0] rand0, rand1, rand2; for (int i = 0; i < 10; i++) begin @(posedge clk_tb); - actual_u = i+1; //$random; - actual_v = i+1; //$random; - $display("actual u = %h, actual v = %h\n", actual_u, actual_v); - // for (int j = 0; j < 46; j++) begin - // u[j] = {'h0, actual_u[j]}; - // v[j] = {'h0, actual_v[j]}; - // end - u[0] = actual_u; - u[1] = 'h0; //rand = 0; - v[0] = actual_v; - v[1] = 'h0; + fork + begin + actual_u = $random()%PRIME; + actual_v = $random()%PRIME; + actual_w = 'h2; + // u_array = actual_u; + // v_array = actual_v; + rand0 = $random(); + rand1 = $random(); + rand2 = $random(); + + // $display("actual u = %h, actual v = %h", actual_u, actual_v); + + u[0] = actual_u-rand0; + u[1] = rand0; + v[0] = actual_v-rand1; + v[1] = rand1; + w[0] = actual_w-rand2; + w[1] = rand2; + // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); + end + //TODO: check with Emre - when doing (u-v), should exp result be ((u-v)+Q) % Q to account for negative nums? FPV had issues with this, so do (if u < v), result + Q + // begin + // repeat(3) @(posedge clk_tb); + // if ((dut.final_res[1] + dut.final_res[0]) != ((u_array * v_array)%PRIME)) begin + // $error("Multiplication Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array * v_array)%PRIME, dut.final_res[0], dut.final_res[1], dut.final_res[0] + dut.final_res[1]); + // end + // end + join end endtask @@ -615,7 +736,9 @@ initial begin // ntt_ctrl_test(); $display("Starting ntt test\n"); // ntt_top_test(); - masked_BFU_adder_test(); + // masked_BFU_adder_test(); + // masked_BFU_mult_test(); + masked_gs_butterfly_test(); // pwm_opt_test(); repeat(1000) @(posedge clk_tb); $finish; From bae564f28d4efda98b92810f4290ef12ec45227b Mon Sep 17 00:00:00 2001 From: Emre Karabulut Date: Mon, 11 Nov 2024 12:35:19 -0800 Subject: [PATCH 04/23] completed redux46 masking module --- src/abr_libs/config/compile.yml | 12 + src/abr_libs/rtl/abr_delay_masked_shares.sv | 77 +++ src/abr_libs/rtl/abr_masked_MUX.sv | 118 +++++ .../rtl/abr_masked_N_bit_Boolean_sub.sv | 141 ++++++ .../rtl/abr_masked_add_sub_mod_Boolean.sv | 141 ++++++ src/ntt_top/config/compile.yml | 21 +- src/ntt_top/rtl/ntt_masked_mult_redux46.sv | 478 ++++++++++++++++++ src/ntt_top/rtl/ntt_masked_special_adder.sv | 109 ++++ .../tb/ntt_masked_mult_redux46_auto_tb.sv | 123 +++++ src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv | 121 +++++ 10 files changed, 1336 insertions(+), 5 deletions(-) create mode 100644 src/abr_libs/rtl/abr_delay_masked_shares.sv create mode 100644 src/abr_libs/rtl/abr_masked_MUX.sv create mode 100644 src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv create mode 100644 src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv create mode 100644 src/ntt_top/rtl/ntt_masked_mult_redux46.sv create mode 100644 src/ntt_top/rtl/ntt_masked_special_adder.sv create mode 100644 src/ntt_top/tb/ntt_masked_mult_redux46_auto_tb.sv create mode 100644 src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv diff --git a/src/abr_libs/config/compile.yml b/src/abr_libs/config/compile.yml index f7a3b2c..0484923 100644 --- a/src/abr_libs/config/compile.yml +++ b/src/abr_libs/config/compile.yml @@ -21,6 +21,10 @@ targets: - $COMPILE_ROOT/rtl/abr_ahb_defines_pkg.sv - $COMPILE_ROOT/rtl/abr_ahb_slv_sif.sv - $COMPILE_ROOT/rtl/abr_masked_AND.sv + - $COMPILE_ROOT/rtl/abr_delay_masked_shares.sv + - $COMPILE_ROOT/rtl/abr_masked_add_sub_mod_Boolean.sv + - $COMPILE_ROOT/rtl/abr_masked_MUX.sv + - $COMPILE_ROOT/rtl/abr_masked_N_bit_Boolean_sub.sv - $COMPILE_ROOT/rtl/abr_masked_full_adder.sv - $COMPILE_ROOT/rtl/abr_masked_A2B_conv.sv - $COMPILE_ROOT/rtl/abr_masked_N_bit_Boolean_adder.sv @@ -45,6 +49,10 @@ targets: - $COMPILE_ROOT/rtl/abr_ahb_defines_pkg.sv - $COMPILE_ROOT/rtl/abr_ahb_slv_sif.sv - $COMPILE_ROOT/rtl/abr_masked_AND.sv + - $COMPILE_ROOT/rtl/abr_delay_masked_shares.sv + - $COMPILE_ROOT/rtl/abr_masked_add_sub_mod_Boolean.sv + - $COMPILE_ROOT/rtl/abr_masked_MUX.sv + - $COMPILE_ROOT/rtl/abr_masked_N_bit_Boolean_sub.sv - $COMPILE_ROOT/rtl/abr_masked_full_adder.sv - $COMPILE_ROOT/rtl/abr_masked_A2B_conv.sv - $COMPILE_ROOT/rtl/abr_masked_N_bit_Boolean_adder.sv @@ -60,6 +68,10 @@ targets: directories: [$COMPILE_ROOT/rtl] files: - $COMPILE_ROOT/rtl/abr_masked_AND.sv + - $COMPILE_ROOT/rtl/abr_delay_masked_shares.sv + - $COMPILE_ROOT/rtl/abr_masked_add_sub_mod_Boolean.sv + - $COMPILE_ROOT/rtl/abr_masked_MUX.sv + - $COMPILE_ROOT/rtl/abr_masked_N_bit_Boolean_sub.sv - $COMPILE_ROOT/rtl/abr_masked_full_adder.sv - $COMPILE_ROOT/rtl/abr_masked_A2B_conv.sv - $COMPILE_ROOT/rtl/abr_masked_N_bit_Boolean_adder.sv diff --git a/src/abr_libs/rtl/abr_delay_masked_shares.sv b/src/abr_libs/rtl/abr_delay_masked_shares.sv new file mode 100644 index 0000000..aacf829 --- /dev/null +++ b/src/abr_libs/rtl/abr_delay_masked_shares.sv @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// abr_delay_masked_shares +// Buffers the masked shares for the N cycle +//====================================================================== + +module abr_delay_masked_shares +#( + parameter WIDTH = 46, // Width of the input array + parameter N = 5 // Number of cycles to delay +) +( + input wire clk, + input wire rst_n, + input wire zeroize, + input wire [1:0] input_reg [WIDTH-1:0], // Input signal + output logic [1:0] delayed_reg [WIDTH-1:0] // Delayed output +); + + // Create an array of shift registers to store the delayed values + logic [1:0] shift_reg [N-1:0][WIDTH-1:0]; + + // Use an always_ff block to implement the shift register + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + // Reset all shift register values to 0 + for (int j = 0; j < N; j = j + 1) begin + for (int i = 0; i < WIDTH; i = i + 1) begin + shift_reg[j][i] <= 2'b0; + end + end + end + else if (zeroize) begin + // Reset all shift register values to 0 + for (int j = 0; j < N; j = j + 1) begin + for (int i = 0; i < WIDTH; i = i + 1) begin + shift_reg[j][i] <= 2'b0; + end + end + end + else begin + // Shift the values through the registers + for (int j = 0; j < N-1; j = j + 1) begin + for (int i = 0; i < WIDTH; i = i + 1) begin + shift_reg[j+1][i] <= shift_reg[j][i]; + end + end + + // Load the input values into the first shift register stage + for (int i = 0; i < WIDTH; i = i + 1) begin + shift_reg[0][i] <= input_reg[i]; + end + end + end + + // Assign the output to the last stage of the shift register + always_comb begin + for (int i = 0; i < WIDTH; i = i + 1) begin + delayed_reg[i] = shift_reg[N-1][i]; + end + end + +endmodule: abr_delay_masked_shares \ No newline at end of file diff --git a/src/abr_libs/rtl/abr_masked_MUX.sv b/src/abr_libs/rtl/abr_masked_MUX.sv new file mode 100644 index 0000000..b9fb7f2 --- /dev/null +++ b/src/abr_libs/rtl/abr_masked_MUX.sv @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// abr_masked_MUX +// Selects either r0 or r1 based on the carry bit input. +// This selection mechanism is masked and performed without combining +// the masked shares. +//====================================================================== + +module abr_masked_MUX #( + parameter WIDTH = 23 +)( + input wire clk, // Clock signal + input wire rst_n, // Active low reset signal + input wire zeroize, // Zeroize signal + input wire sub_i, // Subtract signal (unmasked) + input wire [1:0] carry0, // Masked carry0 input + input wire [1:0] carry1, // Masked carry1 input + input wire [1:0] r0 [WIDTH-1:0], // Masked input r0 + input wire [1:0] r1 [WIDTH-1:0], // Masked input r1 + input wire [WIDTH-1:0] rnd_xor, // Randomness for XOR masking + input wire [WIDTH-1:0] rnd_and, // Randomness for AND gates + output logic [1:0] res_o_masked [WIDTH-1:0] // Masked output +); + + // Internal signals + logic [1:0] s; + logic [1:0] c0c1; + // Compute the masked differences x0y0 and x1y1 + logic [1:0] xy [WIDTH-1:0]; + logic [1:0] xyk [WIDTH-1:0]; + logic [1:0] xy_and_s [WIDTH-1:0]; + logic [1:0] r1_k [WIDTH-1:0]; + logic [1:0] r1_delayed [WIDTH-1:0]; + + // Compute the masked select bits s0 and s1 + always_comb begin + if (sub_i) begin + // When subtracting, select bits are based on carry0 + s = carry0; + end + else begin + // When adding, select bits are based on inverted (carry0 ^ carry1) + c0c1 = carry0 ^ carry1; + s[0] = ~c0c1[0]; + s[1] = c0c1[1]; + end + for (int i = 0; i < WIDTH; i++) begin + xy[i] = r0[i] ^ r1[i]; + xyk[i][0] = xy[i][0] ^ rnd_xor[i]; + xyk[i][1] = xy[i][1] ^ rnd_xor[i]; + r1_k[i][0] = r1[i][0] ^ rnd_xor[i]; + r1_k[i][1] = r1[i][1] ^ rnd_xor[i]; + end + end + + // Instantiate masked AND gates (DOM) for each bit + genvar i_AND; + generate + for (i_AND = 0; i_AND < WIDTH; i_AND = i_AND + 1) begin : gen_DOM_AND + abr_masked_AND and_gate_inst ( + .clk(clk), + .rst_n(rst_n), + .zeroize(zeroize), + .x(xyk[i_AND]), + .y(s), + .rnd(rnd_and[i_AND]), + .c(xy_and_s[i_AND]) + ); + end + endgenerate + + // Delay r1_k to align with masked AND gate outputs + abr_delay_masked_shares #( + .WIDTH(WIDTH), + .N(1) // Delays 1 cycle + ) delay_r1_k ( + .clk(clk), + .rst_n(rst_n), + .zeroize(zeroize), + .input_reg(r1_k), + .delayed_reg(r1_delayed) + ); + + // Compute the final masked result + // Resharing logic to apply masking + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + for (int i = 0; i < WIDTH; i++) begin + res_o_masked[i] <= 2'b0; + end + end + else if (zeroize) begin + for (int i = 0; i < WIDTH; i++) begin + res_o_masked[i] <= 2'b0; + end + end + else begin + for (int i = 0; i < WIDTH; i++) begin + res_o_masked[i] <= xy_and_s[i] ^ r1_delayed[i]; + end + end + end + +endmodule: abr_masked_MUX \ No newline at end of file diff --git a/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv new file mode 100644 index 0000000..2ee4556 --- /dev/null +++ b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// abr_masked_N_bit_Boolean_sub +// Performs Boolean subtraction and check abr_masked_N_bit_Boolean_adder +// for more information about the implementation details +//====================================================================== + +module abr_masked_N_bit_Boolean_sub #( + parameter WIDTH = 8 // Default width is 8 bits +)( + input wire clk, // Clock signal + input wire rst_n, // Active low reset signal + input wire zeroize, // Zeroize signal + input wire sub_i, + input wire [1:0] x [WIDTH-1:0], // WIDTH-bit input operand x + input wire [1:0] y [WIDTH-1:0], // WIDTH-bit input operand y + input wire [WIDTH-1:0] rnd, // Random bits for masking + + output logic [1:0] s [WIDTH-1:0] +); + + // Internal signals + logic [1:0] carry [WIDTH:0]; // Carry signals for each stage + logic [1:0] sum [WIDTH-1:0]; // Sum signals for each stage + logic [1:0] x_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for x + logic [1:0] y_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for y + logic [1:0] sum_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for sum + logic [1:0] the_last_sum; + + // Initialize the first carry input to 0 + assign carry[0] = sub_i ? 2'b01 : 2'b00; + + // Generate the full adders for each bit + genvar i; + generate + for (i = 0; i < WIDTH; i = i + 1) begin : gen_full_adders + // Pipeline registers for x and y inputs + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + for (int j = 0; j < WIDTH; j = j + 1) begin + x_reg[i][j] <= 2'b00; + y_reg[i][j] <= 2'b00; + end + end + else if (zeroize) begin + for (int j = 0; j < WIDTH; j = j + 1) begin + x_reg[i][j] <= 2'b00; + y_reg[i][j] <= 2'b00; + end + end + else begin + for (int j = 0; j < WIDTH; j = j + 1) begin + if (j == 0) begin + x_reg[i][j] <= x[i]; + y_reg[i][j] <= y[i]; + end + else begin + x_reg[i][j] <= x_reg[i][j-1]; + y_reg[i][j] <= y_reg[i][j-1]; + end + end + end + end + + // Pipeline registers for sum output + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + for (int j = 0; j < WIDTH; j = j + 1) begin + sum_reg[i][j] <= 2'b00; + end + end + else if (zeroize) begin + for (int j = 0; j < WIDTH; j = j + 1) begin + sum_reg[i][j] <= 2'b00; + end + end + else begin + for (int j = i; j < WIDTH; j = j + 1) begin + if (j == i && i == WIDTH-1) begin + sum_reg[i][j] <= the_last_sum; + end + else if (j == i) begin + sum_reg[i][j] <= sum[i]; + end + else begin + sum_reg[i][j] <= sum_reg[i][j-1]; + end + end + end + end + if (i<(WIDTH-1)) begin + // Instance of abr_masked_full_adder + abr_masked_full_adder u_abr_masked_full_adder ( + .clk(clk), // Connect clk to clk + .rst_n(rst_n), // Connect rst_n to rst_n + .zeroize(zeroize), // Connect zeroize to zeroize + .x(x_reg[i][i]), // Connect x to the last stage of the x pipeline + .y(y_reg[i][i]), // Connect y to the last stage of the y pipeline + .c_in(carry[i]), // Connect c_in to carry[i] + .rnd(rnd[i]), // Connect rnd to corresponding random bit + .s(sum[i]), // Connect sum to sum[i] + .c_out(carry[i+1]) // Connect carry out to carry[i+1] + ); + end + end + endgenerate + + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + the_last_sum <= 2'b00; + end + else if (zeroize) begin + the_last_sum <= 2'b00; + end + else begin + the_last_sum <= x_reg[WIDTH-1][WIDTH-1] ^ y_reg[WIDTH-1][WIDTH-1] ^ carry[WIDTH-1]; + end + end + + // Assign the outputs + always_comb begin + for (int i =0; i 0; i = i - 1) begin + expected_output_shift_reg[i] <= expected_output_shift_reg[i - 1]; + end + + // Store new expected output + expected_output_shift_reg[0] <= expected_output; + + // Provide random numbers + rnd <= {$urandom, $urandom, $urandom}; + rnd_width3 <= {$urandom, $urandom, $urandom}; + + // Compare output after latency + if (cycle_count >= LATENCY+2) begin + if (y !== expected_output_shift_reg[LATENCY-1]) begin + $display("Mismatch at cycle %0d: expected %0d, got %0d", cycle_count, expected_output_shift_reg[LATENCY-1], y); + end + // else begin + // $display("Match at cycle %0d: output %0d", cycle_count, y); + // end + end + + cycle_count <= cycle_count + 1; + + if (cycle_count >= NUM_OF_TEST_VECTOR) begin + $stop; + end + if ((cycle_count%10000) == 9999) begin + $display("Hit the interval %d", cycle_count); + end + end + end + + // Initialize clock and reset + initial begin + clk = 0; + rst_n = 0; + zeroize = 0; + cycle_count = 0; + rnd = '0; + rnd_width3 = '0; + #10 rst_n = 1; + end + +endmodule: ntt_masked_mult_redux46_auto_tb diff --git a/src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv b/src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv new file mode 100644 index 0000000..fac8bc5 --- /dev/null +++ b/src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv @@ -0,0 +1,121 @@ +`timescale 1ns / 1ps + +module ntt_masked_mult_redux46_tb; + + // Parameters + parameter WIDTH = 46; + parameter WIDTH_RND = 63; + parameter PRIME = 23'd8380417; + + // Clock and reset + reg clk; + reg rst_n; + + // Inputs and outputs + reg zeroize; + logic [WIDTH_RND-1:0] rnd; + reg [WIDTH-1:0] x; + reg [WIDTH/2-1:0] y; + reg [WIDTH-1:0] random_mask; + reg [WIDTH-1:0] actual_input; + reg [10:0] d_10_0; + reg [22:0] d_22_0; + reg [22:0] z_45_23; + reg [23:0] d_22_0_r0c0; + reg [23:0] d_22_0_r1c1; + reg c0c1; + reg [11:0] c_11_0; + reg [13:0] f_14_0; + reg [22:0] e_22_0, res_22_0; + + // Masked input shares + reg [1:0] x_share [WIDTH-1:0]; + wire [1:0] y_share [WIDTH/2-1:0]; + + // Queue to store inputs + typedef struct { + logic [1:0] x_boolean [WIDTH-1:0]; + logic [WIDTH-1:0] rnd; + } input_t; + input_t input_queue [(WIDTH + 2)]; + + always_comb begin + for (int i = 0; i < WIDTH; i = i + 1) begin + x_share[i][0] = actual_input[i] ^ random_mask[i]; + x_share[i][1] = random_mask[i]; + end + for (int i = 0; i < WIDTH/2; i = i + 1) begin + y[i] = y_share[i][0] ^ y_share[i][1]; + end + c_11_0 = actual_input[45:43] + actual_input[42:33] + actual_input[32:23] + actual_input[22:13]; + d_10_0 = c_11_0[11:10] + c_11_0[9:0]; + f_14_0 = actual_input[45:43] + actual_input[45:33] + c_11_0[11:10]; + d_22_0_r0c0 = {d_10_0,actual_input[12:0]}; + d_22_0_r1c1 = d_22_0_r0c0 - 23'd8380417; + c0c1 = d_22_0_r0c0[23] ^ d_22_0_r1c1[23]; + d_22_0 = c0c1 ? d_22_0_r1c1[22:0] : d_22_0_r0c0[22:0]; + e_22_0 = (f_14_0+actual_input[45:23]) % PRIME; + res_22_0 = (d_22_0 - e_22_0) % PRIME; + z_45_23 = actual_input[45:23]; + end + always begin + @(negedge clk); + rnd = 0; + end + + // Instantiate the DUT + ntt_masked_mult_redux46 #( + .WIDTH(WIDTH) + ) dut ( + .clk(clk), + .rst_n(rst_n), + .zeroize(zeroize), + .rnd0_11(rnd[10:0]), + .rnd1_11(rnd[21:11]), + .rnd2_11(rnd[32:22]), + .rnd0_12(rnd[44:33]), + .rnd0_4(rnd[48:45]), + .rnd0_14(rnd[62:49]), + .rnd_3WIDTH(69'h0), + .x(x_share), + .y(y_share) + ); + + // Clock generation + always #5 clk = ~clk; + + // Test vectors + initial begin + // Initialize clock and reset + clk = 0; + rst_n = 0; + zeroize = 0; + + actual_input = 46'd0; + random_mask = 0; + #10 rst_n = 1; + repeat (3) @(negedge clk); + + @(negedge clk); + actual_input = 46'd39066633300384; + random_mask = 0; + $display("%d and mod(Q) = %d",actual_input, (actual_input % PRIME)); + @(negedge clk); + actual_input = 46'd36135737955137; + random_mask = 0; + $display("%d and mod(Q) = %d",actual_input, (actual_input % PRIME)); + @(negedge clk); + actual_input = 46'd39840167205202; + random_mask = 0; + $display("%d and mod(Q) = %d",actual_input, (actual_input % PRIME)); + + + repeat(10000) @(posedge clk); + #1; + $stop; + end + + + + +endmodule From 778b114d478c22a14596c9266ae58a8e07f047ca Mon Sep 17 00:00:00 2001 From: Emre Karabulut <“ekarabulut@microsoft.com”> Date: Mon, 11 Nov 2024 12:58:38 -0800 Subject: [PATCH 05/23] removed files from merged branch --- src/ntt_top/config/compile.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ntt_top/config/compile.yml b/src/ntt_top/config/compile.yml index 026aeb3..e7ce8a8 100755 --- a/src/ntt_top/config/compile.yml +++ b/src/ntt_top/config/compile.yml @@ -55,7 +55,6 @@ targets: - $COMPILE_ROOT/rtl/ntt_twiddle_lookup.sv - $COMPILE_ROOT/rtl/ntt_ctrl.sv - $COMPILE_ROOT/rtl/ntt_top.sv - - $COMPILE_ROOT/rtl/ntt_masked_BFU_add_sub.sv tb: directories: [$COMPILE_ROOT/rtl] files: From b1a7dab8307156add125df3b6926605e235a835a Mon Sep 17 00:00:00 2001 From: Emre Karabulut <“ekarabulut@microsoft.com”> Date: Mon, 11 Nov 2024 13:01:28 -0800 Subject: [PATCH 06/23] added a filed missing in compile order --- src/ntt_top/config/compile.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ntt_top/config/compile.yml b/src/ntt_top/config/compile.yml index e7ce8a8..81ae5ff 100755 --- a/src/ntt_top/config/compile.yml +++ b/src/ntt_top/config/compile.yml @@ -52,6 +52,7 @@ targets: - $COMPILE_ROOT/rtl/ntt_masked_mult_redux46.sv - $COMPILE_ROOT/rtl/ntt_div2.sv - $COMPILE_ROOT/rtl/ntt_buffer.sv + - $COMPILE_ROOT/rtl/ntt_shuffle_buffer.sv - $COMPILE_ROOT/rtl/ntt_twiddle_lookup.sv - $COMPILE_ROOT/rtl/ntt_ctrl.sv - $COMPILE_ROOT/rtl/ntt_top.sv From 009dfed477030ac495628efe2452956d9aef625f Mon Sep 17 00:00:00 2001 From: Emre Karabulut <“ekarabulut@microsoft.com”> Date: Mon, 11 Nov 2024 13:07:30 -0800 Subject: [PATCH 07/23] added license header --- .../tb/ntt_masked_mult_redux46_auto_tb.sv | 22 ++++++++++++++++++- src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv | 22 ++++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/ntt_top/tb/ntt_masked_mult_redux46_auto_tb.sv b/src/ntt_top/tb/ntt_masked_mult_redux46_auto_tb.sv index 3c01fb4..2f5dfcc 100644 --- a/src/ntt_top/tb/ntt_masked_mult_redux46_auto_tb.sv +++ b/src/ntt_top/tb/ntt_masked_mult_redux46_auto_tb.sv @@ -1,4 +1,24 @@ -`timescale 1ns / 1ps +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// ntt_masked_mult_redux46_auto_tb.sv +// -------- +// +// +//====================================================================== module ntt_masked_mult_redux46_auto_tb; diff --git a/src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv b/src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv index fac8bc5..ec620e1 100644 --- a/src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv +++ b/src/ntt_top/tb/ntt_masked_mult_redux46_tb.sv @@ -1,4 +1,24 @@ -`timescale 1ns / 1ps +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// ntt_masked_mult_redux46_tb.sv +// -------- +// +// +//====================================================================== module ntt_masked_mult_redux46_tb; From 06127fc7472eb6c91c82d3ea5e5fdc325f05c545 Mon Sep 17 00:00:00 2001 From: Emre Karabulut Date: Mon, 11 Nov 2024 21:27:30 +0000 Subject: [PATCH 08/23] MICROSOFT AUTOMATED PIPELINE: Stamp 'user/dev/ekarabulut/reduction46_0' with updated timestamp and hash after successful run --- .github/workflow_metadata/pr_hash | 2 +- .github/workflow_metadata/pr_timestamp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflow_metadata/pr_hash b/.github/workflow_metadata/pr_hash index 5ce3982..8ad5712 100644 --- a/.github/workflow_metadata/pr_hash +++ b/.github/workflow_metadata/pr_hash @@ -1 +1 @@ -0498b952cc0e6254259daf7a40977490ef109005599f452d7c850f0399d1d571c41792c8008c4147c6cbbe088b49c43c \ No newline at end of file +118850a491e532ac621ec73467e2d3cdaf25cd8b03a66199309c73c46dbd1efad2c4408e7cdd37355272723613956138 \ No newline at end of file diff --git a/.github/workflow_metadata/pr_timestamp b/.github/workflow_metadata/pr_timestamp index ed296b3..f66cbcd 100644 --- a/.github/workflow_metadata/pr_timestamp +++ b/.github/workflow_metadata/pr_timestamp @@ -1 +1 @@ -1730913274 \ No newline at end of file +1731360448 \ No newline at end of file From 98987333d42123765cc289014d8d9a4e7c5ca1af Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Thu, 14 Nov 2024 10:20:34 -0800 Subject: [PATCH 09/23] Masking WIP --- src/ntt_top/config/compile.yml | 3 + src/ntt_top/rtl/ntt_ctrl.sv | 2 +- src/ntt_top/rtl/ntt_defines_pkg.sv | 13 +- src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv | 367 ++++++++++++++++++++ src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv | 1 + src/ntt_top/rtl/ntt_masked_BFU_mult.sv | 57 +-- src/ntt_top/rtl/ntt_masked_butterfly1x2.sv | 157 +++++++++ src/ntt_top/rtl/ntt_masked_gs_butterfly.sv | 59 +++- src/ntt_top/rtl/ntt_masked_pwm.sv | 110 ++++++ src/ntt_top/rtl/ntt_top.sv | 26 +- src/ntt_top/tb/ntt_top_tb.sv | 301 ++++++++++++++-- src/ntt_top/tb/ntt_wrapper.sv | 4 + 12 files changed, 1034 insertions(+), 66 deletions(-) create mode 100644 src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv create mode 100644 src/ntt_top/rtl/ntt_masked_butterfly1x2.sv create mode 100644 src/ntt_top/rtl/ntt_masked_pwm.sv diff --git a/src/ntt_top/config/compile.yml b/src/ntt_top/config/compile.yml index 95ff017..5215523 100755 --- a/src/ntt_top/config/compile.yml +++ b/src/ntt_top/config/compile.yml @@ -59,6 +59,9 @@ targets: - $COMPILE_ROOT/rtl/ntt_masked_BFU_add_sub.sv - $COMPILE_ROOT/rtl/ntt_masked_BFU_mult.sv - $COMPILE_ROOT/rtl/ntt_masked_gs_butterfly.sv + - $COMPILE_ROOT/rtl/ntt_masked_pwm.sv + - $COMPILE_ROOT/rtl/ntt_masked_butterfly1x2.sv + - $COMPILE_ROOT/rtl/ntt_hybrid_butterfly_2x2.sv tb: directories: [$COMPILE_ROOT/rtl] files: diff --git a/src/ntt_top/rtl/ntt_ctrl.sv b/src/ntt_top/rtl/ntt_ctrl.sv index 51bb9d5..f8b6eb6 100644 --- a/src/ntt_top/rtl/ntt_ctrl.sv +++ b/src/ntt_top/rtl/ntt_ctrl.sv @@ -376,7 +376,7 @@ end //------------------------------------------ -//Twiddle addr logic +//Twiddle addr logic - TODO: shuffling+masking (adjust latency) //------------------------------------------ always_comb begin unique case(rounds_count) diff --git a/src/ntt_top/rtl/ntt_defines_pkg.sv b/src/ntt_top/rtl/ntt_defines_pkg.sv index 122b6aa..975b3cd 100644 --- a/src/ntt_top/rtl/ntt_defines_pkg.sv +++ b/src/ntt_top/rtl/ntt_defines_pkg.sv @@ -27,6 +27,7 @@ package ntt_defines_pkg; import mldsa_params_pkg::*; parameter NTT_REG_SIZE = REG_SIZE-1; +parameter MASKED_WIDTH = 46; // parameter MEM_DEPTH = 2**MLDSA_MEM_ADDR_WIDTH; @@ -37,7 +38,8 @@ localparam ct =3'd0, gs =3'd1, pwm=3'd2, pwa=3'd3, - pws=3'd4; + pws=3'd4, + pwm_intt = 3'd5; typedef logic [2:0] mode_t; @@ -60,6 +62,15 @@ typedef struct packed { logic [NTT_REG_SIZE-1:0] v21_o; } bf_uvo_t; +typedef struct packed { + logic [1:0][MASKED_WIDTH-1:0] u00_i; + logic [1:0][MASKED_WIDTH-1:0] u01_i; + logic [1:0][MASKED_WIDTH-1:0] v00_i; + logic [1:0][MASKED_WIDTH-1:0] v01_i; + logic [1:0][MASKED_WIDTH-1:0] w00_i; + logic [1:0][MASKED_WIDTH-1:0] w01_i; +} masked_bf_uvwi_t; //Only used in masked INTT stage 1 + typedef struct packed { logic [MLDSA_MEM_ADDR_WIDTH-1:0] src_base_addr; logic [MLDSA_MEM_ADDR_WIDTH-1:0] interim_base_addr; diff --git a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv new file mode 100644 index 0000000..c3a216a --- /dev/null +++ b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// ntt_hybrid_butterfly_2x2.sv +// -------- +// This module consists of masked PWMs, followed by 1st stage of masked and unmasked BFUs followed by +// 2nd stage of unmasked BFUs. In case of masking_en, PWMs are triggered and +// masked branch is taken for computing 1st stage outputs. In case of unmasked operation, +// both branches are enabled but unmasked outputs are passed to next stage. Final outputs are 23-bit values + +module ntt_hybrid_butterfly_2x2 + import mldsa_params_pkg::*; + import ntt_defines_pkg::*; +#( + parameter WIDTH = 46, + parameter HALF_WIDTH = WIDTH/2, + parameter UNMASKED_BF_LATENCY = 10, //5 cycles per butterfly * 2 instances in serial = 10 clks + parameter UNMASKED_PWM_LATENCY = 5, //latency of modular multiplier + modular addition to perform accumulation + parameter UNMASKED_PWA_LATENCY = 1, //latency of modular addition + parameter UNMASKED_PWS_LATENCY = 1, //latency of modular subtraction + parameter UNMASKED_BF_STAGE1_LATENCY = UNMASKED_BF_LATENCY/2, + parameter MASKED_BF_STAGE1_LATENCY = 260, //For 1 masked butterfly + parameter MASKED_PWM_LATENCY = 207, //For 1 masked pwm operation + parameter MASKED_INTT_LATENCY = MASKED_BF_STAGE1_LATENCY + UNMASKED_BF_STAGE1_LATENCY, + parameter MASKED_PWM_INTT_LATENCY = MASKED_PWM_LATENCY + MASKED_INTT_LATENCY +) +( + input wire clk, + input wire reset_n, + input wire zeroize, + + input mode_t mode, + input wire enable, + input wire masking_en, + input bf_uvwi_t uvw_i, //Inputs are original form + input pwo_uvwi_t pw_uvw_i, //PWO inputs are original form + input wire [4:0][WIDTH-1:0] rnd_i, + input wire accumulate, + + output bf_uvo_t uv_o, //Outputs are original form + output pwo_t pwo_uv_o, + output logic ready_o +); + +//---------------------- +//Unmasked wires +//---------------------- +//Inputs to 1st stage +logic [HALF_WIDTH-1:0] u00, u01, v00, v01; +logic [HALF_WIDTH-1:0] w00, w01, w10, w11; +//Outputs of 1st stage +logic [HALF_WIDTH-1:0] u10_int, u11_int, v10_int, v11_int; +//Inputs to 2nd stage +logic [HALF_WIDTH-1:0] u10, u11, v10, v11; +//Outputs of 2nd stage +logic [HALF_WIDTH-1:0] u20, u21, v20, v21; + +//Other internal wires +logic [UNMASKED_BF_STAGE1_LATENCY-1:0][WIDTH-1:0] w10_reg, w11_reg; //Shift w10 by 5 cycles to match 1st stage BF latency +logic [MASKED_BF_STAGE1_LATENCY-1:0][WIDTH-1:0] masked_w10_reg, masked_w11_reg; +logic pwo_mode, pwm_intt_mode; +logic [UNMASKED_BF_LATENCY-1:0] ready_reg; +logic [MASKED_PWM_INTT_LATENCY-1:0] masked_ready_reg; + +//Shares - TODO replace with struct? +logic [1:0][WIDTH-1:0] u00_share, u01_share, v00_share, v01_share, u10_share, v10_share, u11_share, v11_share; +logic [1:0][WIDTH-1:0] w00_share, w01_share, w10_share, w11_share, w10_reg_share, w11_reg_share; +logic [1:0][WIDTH-1:0] uv00_share, uv01_share, uv10_share, uv11_share; +bf_uvo_t masked_gs_stage1_uvo; + +//w delay flops +//Flop the twiddle factor 5x to correctly pass in values to the 2nd set of bf units +always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + w10_reg <= 'h0; + w11_reg <= 'h0; + end + else if (zeroize) begin + w10_reg <= 'h0; + w11_reg <= 'h0; + end + else begin + w10_reg <= {uvw_i.w10_i, w10_reg[UNMASKED_BF_STAGE1_LATENCY-1:1]}; + w11_reg <= {uvw_i.w11_i, w11_reg[UNMASKED_BF_STAGE1_LATENCY-1:1]}; + end +end + +always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + masked_w10_reg <= 'h0; + masked_w11_reg <= 'h0; + end + else if (zeroize) begin + masked_w10_reg <= 'h0; + masked_w11_reg <= 'h0; + end + else begin + masked_w10_reg <= {uvw_i.w10_i, masked_w10_reg[MASKED_BF_STAGE1_LATENCY-1:1]}; + masked_w11_reg <= {uvw_i.w11_i, masked_w11_reg[MASKED_BF_STAGE1_LATENCY-1:1]}; + end +end + +assign pwo_mode = (mode inside {pwm, pwa, pws}); +assign pwm_intt_mode = (mode == pwm_intt) & masking_en; + +//Input assignments - TODO: add input flops for u, v, w, and rnd? +always_comb begin + if (pwo_mode | pwm_intt_mode) begin //pwm_intt mode, inputs are driven on pw_uvw_i interface. TODO: check again + u00 = pw_uvw_i.u0_i; + v00 = pw_uvw_i.v0_i; + w00 = pw_uvw_i.w0_i; + + u01 = pw_uvw_i.u1_i; + v01 = pw_uvw_i.v1_i; + w01 = pw_uvw_i.w1_i; + + u10 = pw_uvw_i.u2_i; + v10 = pw_uvw_i.v2_i; + w10 = pw_uvw_i.w2_i; + + u11 = pw_uvw_i.u3_i; + v11 = pw_uvw_i.v3_i; + w11 = pw_uvw_i.w3_i; + + end + else begin //Only applies to unmasked ops since in masking, intt receives inputs from pwm and not from the API + u00 = uvw_i.u00_i; + v00 = uvw_i.v00_i; + w00 = uvw_i.w00_i; + + u01 = uvw_i.u01_i; + v01 = uvw_i.v01_i; + w01 = uvw_i.w01_i; + + u10 = u10_int; + v10 = v10_int; + w10 = w10_reg[0]; + + u11 = u11_int; + v11 = v11_int; + w11 = w11_reg[0]; + end +end + +//Split into shares +always_comb begin + //TODO: check randomness with Emre + //Split u inputs + u00_share[0] = /*uvw_i.u00_i*/u00 - rnd_i[0]; + u00_share[1] = rnd_i[0]; + + u01_share[0] = /*uvw_i.u01_i*/u01 - rnd_i[1]; + u01_share[1] = rnd_i[1]; + + u10_share[0] = u10 - rnd_i[0]; + u10_share[1] = rnd_i[0]; + + u11_share[0] = u11 - rnd_i[0]; + u11_share[1] = rnd_i[0]; + + //Split v inputs + v00_share[0] = /*uvw_i.v00_i*/v00 - rnd_i[2]; + v00_share[1] = rnd_i[2]; + + v01_share[0] = /*uvw_i.v01_i*/v01 - rnd_i[3]; + v01_share[1] = rnd_i[3]; + + v10_share[0] = v10 - rnd_i[2]; + v10_share[1] = rnd_i[2]; + + v11_share[0] = v11 - rnd_i[2]; + v11_share[1] = rnd_i[2]; + + //Split w inputs + w00_share[0] = /*uvw_i.w00_i*/w00 - rnd_i[4]; + w00_share[1] = rnd_i[4]; + + w01_share[0] = /*uvw_i.w01_i*/w01 - rnd_i[0]; + w01_share[1] = rnd_i[0]; + + w10_reg_share[0] = w10_reg[0] - rnd_i[1]; + w10_reg_share[1] = rnd_i[1]; + + w11_reg_share[0] = w11_reg[0] - rnd_i[2]; + w11_reg_share[1] = rnd_i[2]; +end + +//---------------------------------------------------- +//Masked PWMs - Used in masked PWM+INTT mode only +//---------------------------------------------------- +ntt_masked_pwm #( + .WIDTH(WIDTH) +) pwm_inst00 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .u(u00_share), + .v(v00_share), + .w(w00_share), + .rnd({rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1], rnd_i[0]}), + .res(uv00_share) +); + +ntt_masked_pwm #( + .WIDTH(WIDTH) +) pwm_inst01 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .u(u01_share), + .v(v01_share), + .w(w01_share), + .rnd({rnd_i[0], rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1]}), + .res(uv01_share) +); + +ntt_masked_pwm #( + .WIDTH(WIDTH) +) pwm_inst10 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .u(u10_share), + .v(v10_share), + .w(w10_share), + .rnd({rnd_i[1], rnd_i[0], rnd_i[4], rnd_i[3], rnd_i[2]}), + .res(uv10_share) +); + +ntt_masked_pwm #( + .WIDTH(WIDTH) +) pwm_inst11 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .u(u11_share), + .v(v11_share), + .w(w11_share), + .rnd({rnd_i[2], rnd_i[1], rnd_i[0], rnd_i[4], rnd_i[3]}), + .res(uv11_share) +); + +//---------------------------------------------------- +//Masked BFU stage 1 - Used in masked PWM+INTT mode only +//PWM outputs: uv00[1:0], uv01[1:0], uv10[1:0], uv11[1:0] +//---------------------------------------------------- +ntt_masked_butterfly1x2 #( + .WIDTH(WIDTH) +) masked_bf_1x2_inst0 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + // .enable() + .uvw_i({uv00_share, uv01_share, uv10_share, uv11_share}), //TODO check connection + .rnd_i({rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1], rnd_i[0]}), + .uv_o(masked_gs_stage1_uvo) +); + +//---------------------------------------------------- +//Unmasked BFU stage 1 - Used in all other modes +//---------------------------------------------------- +ntt_butterfly #( + .REG_SIZE(HALF_WIDTH) +) unmasked_bf_inst00 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .mode(mode), + .opu_i(masking_en ? 'h0 : u00), + .opv_i(masking_en ? 'h0 : v00), + .opw_i(masking_en ? 'h0 : w00), + .accumulate(accumulate), + .u_o(u10_int), + .v_o(u11_int), + .pwm_res_o(pwo_uv_o.uv0) +); + +ntt_butterfly #( + .REG_SIZE(HALF_WIDTH) +) unmasked_bf_inst01 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .mode(mode), + .opu_i(masking_en ? 'h0 : u01), + .opv_i(masking_en ? 'h0 : v01), + .opw_i(masking_en ? 'h0 : w01), + .accumulate(accumulate), + .u_o(v10_int), + .v_o(v11_int), + .pwm_res_o(pwo_uv_o.uv1) +); + +//---------------------------------------------------- +//Unmasked BFU stage 2 - Used in all modes (irrespective of masking_en) +//---------------------------------------------------- +ntt_butterfly #( + .REG_SIZE(HALF_WIDTH) +) unmasked_bf_inst10 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .mode(mode), + .opu_i(masking_en ? masked_gs_stage1_uvo.u20_o : u10), + .opv_i(masking_en ? masked_gs_stage1_uvo.v20_o : v10), + .opw_i(masking_en ? masked_w10_reg[0] : w10_reg[0]), //TODO: delayed w10 + .accumulate(accumulate), + .u_o(uv_o.u20_o), + .v_o(uv_o.v20_o), + .pwm_res_o(pwo_uv_o.uv2) +); + +ntt_butterfly #( + .REG_SIZE(HALF_WIDTH) +) unmasked_bf_inst11 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .mode(mode), + .opu_i(masking_en ? masked_gs_stage1_uvo.u21_o : u11), + .opv_i(masking_en ? masked_gs_stage1_uvo.v21_o : v11), + .opw_i(masking_en ? masked_w11_reg[0] : w11_reg[0]), //TODO: delayed w10 + .accumulate(accumulate), + .u_o(uv_o.u21_o), + .v_o(uv_o.v21_o), + .pwm_res_o(pwo_uv_o.uv3) +); + +//---------------------------------------------------- +//Determine when results are ready +//---------------------------------------------------- +//ready_o logic +always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) + masked_ready_reg <= 'b0; + else if (zeroize) + masked_ready_reg <= 'b0; + else begin + unique case(mode) //471:0 + ct: masked_ready_reg <= {462'h0, enable, masked_ready_reg[UNMASKED_BF_LATENCY-1:1]}; + gs: masked_ready_reg <= {462'h0, enable, masked_ready_reg[UNMASKED_BF_LATENCY-1:1]}; + pwm: masked_ready_reg <= accumulate ? {467'h0, enable, masked_ready_reg[UNMASKED_PWM_LATENCY-1:1]} : {6'h0, enable, masked_ready_reg[UNMASKED_PWM_LATENCY-2:1]}; + pwm_intt: masked_ready_reg <= accumulate ? {enable, masked_ready_reg[MASKED_PWM_INTT_LATENCY-1:1]} : {1'b0, enable, masked_ready_reg[MASKED_PWM_INTT_LATENCY-2:1]}; //TODO revisit + pwa: masked_ready_reg <= {471'h0, enable}; + pws: masked_ready_reg <= {471'h0, enable}; + default: masked_ready_reg <= 'h0; + endcase + end +end + +assign ready_o = masked_ready_reg[0]; + + +endmodule diff --git a/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv index da40be1..8dc9880 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv @@ -48,6 +48,7 @@ module ntt_masked_BFU_add_sub logic [WIDTH-1:0] add_res_bool0, add_res_bool1, add_res_arith0, add_res_arith1, add_res_reduced0, add_res_reduced1; // logic [WIDTH-1:0] prime0, prime1; + //Add flops to inputs to avoid pruning TODO always_comb begin if (sub) begin v_int[0] = MLDSA_Q - v[0]; diff --git a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv index 861d242..5214824 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv @@ -15,7 +15,7 @@ //====================================================================== // // ntt_masked_BFU_mult -// Performs two share multiplication and reduction +// Performs two share multiplication and reduction - total latency = 207 clks //====================================================================== module ntt_masked_BFU_mult @@ -43,7 +43,8 @@ module ntt_masked_BFU_mult logic [1:0] mul_res_bool [WIDTH-1:0]; logic [WIDTH-1:0] mul_res_bool0, mul_res_bool1; logic [1:0][WIDTH-1:0] temp, final_res; - logic [1:0] mul_res_bool_reduced [WIDTH-1:0]; + logic [1:0] mul_res_bool_reduced [HALF_WIDTH-1:0]; + logic [1:0] mul_res_bool_reduced_padded [WIDTH-1:0]; logic [1:0] mul_res_reduced [WIDTH-1:0]; logic [WIDTH-1:0] mul_res_bool_redux0, mul_res_bool_redux1, mul_res_redux0, mul_res_redux1; @@ -72,7 +73,7 @@ module ntt_masked_BFU_mult mul_res_refresh[i][1] = rnd0[i]; end end -/* + //48 clks abr_masked_A2B_conv #( .WIDTH(WIDTH) @@ -87,19 +88,35 @@ module ntt_masked_BFU_mult .s(mul_res_bool) ); - //redux46 - // abr_masked_N_bit_Boolean_adder #( - // .WIDTH(10) //TODO: ask Emre - inputs are 10 bit, output should be 12 bits. Is it ok to put inputs at 12 too? - // ) bool_adder_inst0 ( - // .clk(clk), - // .rst_n(reset_n), - // .zeroize(zeroize), - // .x({12'(mul_res_bool[22:13][1]), 12'(mul_res_bool[22:13][0])}), - // .y({12'(mul_res_bool[32:23][1]), 12'(mul_res_bool[32:23][0])}), - // .rnd(rnd4), - // .s() - // ); + //Mult reduction46 - 156 clks + ntt_masked_mult_redux46 #( + .WIDTH(WIDTH) + ) mult_redux46_inst ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .rnd0_11(rnd0[10:0]), + .rnd1_11(rnd0[21:11]), + .rnd2_11(rnd0[32:22]), + .rnd0_12(rnd0[44:33]), + .rnd0_4(rnd1[3:0]), + .rnd0_14(rnd1[17:4]), + .rnd_3WIDTH({rnd4[HALF_WIDTH-1:0], rnd3[HALF_WIDTH-1:0], rnd2[HALF_WIDTH-1:0]}), + .x(mul_res_bool), + .y(mul_res_bool_reduced) + ); + always_comb begin + for (int i = 0; i < WIDTH; i++) begin + if (i < HALF_WIDTH) begin + mul_res_bool_reduced_padded[i][0] = mul_res_bool_reduced[i][0]; + mul_res_bool_reduced_padded[i][1] = mul_res_bool_reduced[i][1]; + end + else begin + mul_res_bool_reduced_padded[i] = 2'b00; + end + end + end //B2A - 2 clks abr_masked_B2A_conv #( @@ -109,15 +126,15 @@ module ntt_masked_BFU_mult .rst_n(reset_n), .zeroize(zeroize), .rnd(rnd0), - .x_boolean(mul_res_bool_reduced), + .x_boolean(mul_res_bool_reduced_padded), .x_arith(mul_res_reduced) ); -*/ + always_comb begin for (int i = 0; i < WIDTH; i++) begin - mul_res_redux0[i] = mul_res_refresh[i][0]; //mul_res_reduced[i][0]; - mul_res_redux1[i] = mul_res_refresh[i][1]; //mul_res_reduced[i][1]; + mul_res_redux0[i] = mul_res_reduced[i][0]; //mul_res_refresh[i][0]; + mul_res_redux1[i] = mul_res_reduced[i][1]; //mul_res_refresh[i][1]; end end @@ -131,7 +148,7 @@ module ntt_masked_BFU_mult res[i] <= 2'h0; end else begin - res <= mul_res_refresh; //mul_res_reduced; + res <= mul_res_reduced; //mul_res_refresh; end end diff --git a/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv b/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv new file mode 100644 index 0000000..d9c767c --- /dev/null +++ b/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//====================================================================== +// +// ntt_masked_butterfly1x2.sv +// -------- +// 1. Performs 1st stage of masked INTT operation +// 2. Combines output shares +// 3. Performs div2 on combined outputs (unmasked) +// Total latency = 261 clks + +module ntt_masked_butterfly1x2 + import mldsa_params_pkg::*; + import ntt_defines_pkg::*; + #( + parameter WIDTH = 46, + parameter HALF_WIDTH = WIDTH/2, + parameter MASKED_BF_STAGE1_LATENCY = 260 + ) + ( + input wire clk, + input wire reset_n, + input wire zeroize, + // input wire enable, + input masked_bf_uvwi_t uvw_i, + input [4:0][WIDTH-1:0] rnd_i, + + output bf_uvo_t uv_o //, + // output logic ready_o + ); + + logic [1:0][WIDTH-1:0] u00, v00, w00; + logic [1:0][WIDTH-1:0] u01, v01, w01; + logic [1:0] u10_int [WIDTH-1:0]; + logic [1:0] v10_int [WIDTH-1:0]; + logic [1:0] u11_int [WIDTH-1:0]; + logic [1:0] v11_int [WIDTH-1:0]; + logic [1:0][WIDTH-1:0] u10_packed, v10_packed, u11_packed, v11_packed; + logic [HALF_WIDTH-1:0] u10_combined, v10_combined, u11_combined, v11_combined; //TODO: 46 bit or 23 bit? check with Emre + logic [HALF_WIDTH-1:0] u10_div2, v10_div2, u11_div2, v11_div2; //TODO: check width + + always_comb begin + u00 = uvw_i.u00_i; + v00 = uvw_i.v00_i; + w00 = uvw_i.w00_i; + + u01 = uvw_i.u01_i; + v01 = uvw_i.v01_i; + w01 = uvw_i.w01_i; + end + + ntt_masked_gs_butterfly #( + .WIDTH(WIDTH) + ) masked_bf_inst00 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .opu_i(u00), + .opv_i(v00), + .opw_i(w00), + .rnd_i({rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1], rnd_i[0]}), + .u_o(u10_int), + .v_o(v10_int) //TODO: swap outputs here or inputs to 2nd stage? + ); + + ntt_masked_gs_butterfly #( + .WIDTH(WIDTH) + ) masked_bf_inst01 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .opu_i(u01), + .opv_i(v01), + .opw_i(w01), + .rnd_i({rnd_i[0], rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1]}), + .u_o(u11_int), + .v_o(v11_int) //TODO: swap outputs here or inputs to 2nd stage? + ); + + always_comb begin + for (int i = 0; i < WIDTH; i++) begin + u10_packed[0][i] = u10_int[i][0]; + u10_packed[1][i] = u10_int[i][1]; + u11_packed[0][i] = u11_int[i][0]; + u11_packed[1][i] = u11_int[i][1]; + v10_packed[0][i] = v10_int[i][0]; + v10_packed[1][i] = v10_int[i][1]; + v11_packed[0][i] = v11_int[i][0]; + v11_packed[1][i] = v11_int[i][1]; + end + u10_combined = u10_packed[0] + u10_packed[1]; + v10_combined = v10_packed[0] + v10_packed[1]; + u11_combined = u11_packed[0] + u11_packed[1]; + v11_combined = v11_packed[0] + v11_packed[1]; + end + + //Perform div2 on combined outputs + ntt_div2 #( + .REG_SIZE(HALF_WIDTH), + .MLDSA_Q(mldsa_params_pkg::MLDSA_Q) + ) div2_inst0 ( + .op_i(u10_combined), + .res_o(u10_div2) + ); + + ntt_div2 #( + .REG_SIZE(HALF_WIDTH), + .MLDSA_Q(mldsa_params_pkg::MLDSA_Q) + ) div2_inst1 ( + .op_i(v10_combined), + .res_o(v10_div2) + ); + + ntt_div2 #( + .REG_SIZE(HALF_WIDTH), + .MLDSA_Q(mldsa_params_pkg::MLDSA_Q) + ) div2_inst2 ( + .op_i(u11_combined), + .res_o(u11_div2) + ); + + ntt_div2 #( + .REG_SIZE(HALF_WIDTH), + .MLDSA_Q(mldsa_params_pkg::MLDSA_Q) + ) div2_inst3 ( + .op_i(v11_combined), + .res_o(v11_div2) + ); + + always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + uv_o <= 'h0; + end + else if (zeroize) begin + uv_o <= 'h0; + end + else begin + uv_o.u20_o <= u10_div2; + uv_o.u21_o <= u11_div2; //Check connection TODO + uv_o.v20_o <= v10_div2; + uv_o.v21_o <= v11_div2; //Check connection TODO + end + end + + +endmodule \ No newline at end of file diff --git a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv index a8861ca..49cef88 100644 --- a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +++ b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv @@ -16,6 +16,7 @@ // ntt_masked_gs_butterfly.sv // -------- // Only performs gs (INTT) mode of operation. All blocks are masked +// Latency = 260 clks module ntt_masked_gs_butterfly import mldsa_params_pkg::*; @@ -37,21 +38,26 @@ module ntt_masked_gs_butterfly output logic [1:0] v_o [WIDTH-1:0] ); + localparam MASKED_MULT_LATENCY = 207; logic [1:0][WIDTH-1:0] w_reg [52:0]; //TODO parameterize logic [1:0] add_res [WIDTH-1:0]; logic [1:0] sub_res [WIDTH-1:0]; logic [1:0] mul_res [WIDTH-1:0]; logic [1:0][WIDTH-1:0] sub_res_packed; - logic [WIDTH-1:0] add_res0, add_res1, mul_res0, mul_res1; + logic [1:0] add_res_reg [WIDTH-1:0]; + logic [WIDTH-1:0] add_res_reg0, add_res_reg1; + logic [WIDTH-1:0] add_res0, add_res1, mul_res0, mul_res1, u_o_0, u_o_1, v_o_0, v_o_1; + + //53 clks ntt_masked_BFU_add_sub #( .WIDTH(WIDTH) ) add_inst_0 ( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), - .sub('b0), + .sub(1'b0), .u(opu_i), .v(opv_i), .rnd0(rnd_i[0]), @@ -61,13 +67,25 @@ module ntt_masked_gs_butterfly .res(add_res) //u+v ); + abr_delay_masked_shares #( + .WIDTH(WIDTH), + .N(MASKED_MULT_LATENCY) + ) add_res_delay_inst ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .input_reg(add_res), + .delayed_reg(add_res_reg) + ); + + //53 clks ntt_masked_BFU_add_sub #( .WIDTH(WIDTH) ) sub_inst_0 ( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), - .sub('b1), + .sub(1'b1), .u(opu_i), .v(opv_i), .rnd0(rnd_i[1]), //Different rand order @@ -83,6 +101,9 @@ module ntt_masked_gs_butterfly add_res1[i] = add_res[i][1]; sub_res_packed[0][i] = sub_res[i][0]; sub_res_packed[1][i] = sub_res[i][1]; + + add_res_reg0[i] = add_res_reg[i][0]; + add_res_reg1[i] = add_res_reg[i][1]; end end @@ -103,6 +124,7 @@ module ntt_masked_gs_butterfly end end + //207 clks ntt_masked_BFU_mult #( .WIDTH(WIDTH) ) mult_inst_0 ( @@ -116,7 +138,7 @@ module ntt_masked_gs_butterfly .rnd2(rnd_i[0]), .rnd3(rnd_i[1]), .rnd4(rnd_i[2]+rnd_i[3]), - .res(mul_res) + .res(mul_res) //(u-v)*w ); always_comb begin @@ -126,4 +148,33 @@ module ntt_masked_gs_butterfly end end + always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + for (int i = 0; i < WIDTH; i++) begin + u_o[i] <= 2'b0; + v_o[i] <= 2'b0; + end + end + else if (zeroize) begin + for (int i = 0; i < WIDTH; i++) begin + u_o[i] <= 2'b0; + v_o[i] <= 2'b0; + end + end + else begin + u_o <= add_res_reg; //div2 done outside 1st stage of butterfly (in 2x2) + v_o <= mul_res; //div2 done outside 1st stage of butterfly (in 2x2) + end + end + + always_comb begin + for (int i = 0; i < WIDTH; i++) begin + u_o_0[i] = u_o[i][0]; + u_o_1[i] = u_o[i][1]; + + v_o_0[i] = v_o[i][0]; + v_o_1[i] = v_o[i][1]; + end + end + endmodule diff --git a/src/ntt_top/rtl/ntt_masked_pwm.sv b/src/ntt_top/rtl/ntt_masked_pwm.sv new file mode 100644 index 0000000..edfe994 --- /dev/null +++ b/src/ntt_top/rtl/ntt_masked_pwm.sv @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// ntt_masked_pwm.sv +// -------- +// This module performs masked pwm operation with or without accumulate +// on input shares. Always performs (u*v)+w (top level needs to drive 0 +// to the w input if not in accumulate mode) + +module ntt_masked_pwm + import mldsa_params_pkg::*; + import ntt_defines_pkg::*; +#( + parameter WIDTH = 46, + parameter MASKED_MULT_LATENCY = 207 +) +( + input wire clk, + input wire reset_n, + input wire zeroize, + input wire [1:0][WIDTH-1:0] u, + input wire [1:0][WIDTH-1:0] v, + input wire [1:0][WIDTH-1:0] w, + input wire [4:0][WIDTH-1:0] rnd, + output logic [1:0][WIDTH-1:0] res +); + + logic [1:0] mul_res [WIDTH-1:0]; + logic [1:0] w_reg [WIDTH-1:0]; + logic [1:0] w_unpacked [WIDTH-1:0]; + logic [1:0][WIDTH-1:0] mul_res_packed, w_reg_packed; + logic [1:0] res_unpacked [WIDTH-1:0]; + + always_comb begin + for (int i = 0; i < WIDTH; i++) begin + w_unpacked[i][0] = w[0][i]; + w_unpacked[i][1] = w[1][i]; + + w_reg_packed[0][i] = w_reg[i][0]; + w_reg_packed[1][i] = w_reg[i][1]; + + mul_res_packed[0][i] = mul_res[i][0]; + mul_res_packed[1][i] = mul_res[i][1]; + end + end + + ntt_masked_BFU_mult #( + .WIDTH(WIDTH) + ) mult_inst0 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .u(u), + .v(v), + .rnd0(rnd[0]), + .rnd1(rnd[1]), + .rnd2(rnd[2]), + .rnd3(rnd[3]), + .rnd4(rnd[4]), + .res(mul_res) + ); + + abr_delay_masked_shares #( + .WIDTH(WIDTH), + .N(MASKED_MULT_LATENCY) + ) w_delay ( + .clk(clk), + .rst_n(reset_n), + .zeroize(zeroize), + .input_reg(w_unpacked), + .delayed_reg(w_reg) + ); + + ntt_masked_BFU_add_sub #( + .WIDTH(WIDTH) + ) add_inst0 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .sub(1'b0), + .u(mul_res_packed), + .v(w_reg_packed), + .rnd0(rnd[0]), + .rnd1(rnd[1]), + .rnd2(rnd[2]), + .rnd3(rnd[3]), + .res(res_unpacked) + ); + + always_comb begin + for (int i = 0; i < WIDTH; i++) begin + res[0][i] = res_unpacked[i][0]; + res[1][i] = res_unpacked[i][1]; + end + end + +endmodule \ No newline at end of file diff --git a/src/ntt_top/rtl/ntt_top.sv b/src/ntt_top/rtl/ntt_top.sv index fa0b97e..1cb9993 100644 --- a/src/ntt_top/rtl/ntt_top.sv +++ b/src/ntt_top/rtl/ntt_top.sv @@ -40,7 +40,8 @@ module ntt_top parameter MLDSA_N = 256, parameter MLDSA_LOGN = $clog2(MLDSA_N), parameter MEM_ADDR_WIDTH = 15, - parameter MEM_DATA_WIDTH = 4*REG_SIZE + parameter MEM_DATA_WIDTH = 4*REG_SIZE, + parameter WIDTH = 46 ) ( //Clock and reset @@ -65,7 +66,9 @@ module ntt_top input wire sampler_valid, input wire shuffle_en, + input wire masking_en, input wire [5:0] random, + input wire [4:0][WIDTH-1:0] rnd_i, //Memory if //Reuse between pwm c, ntt @@ -269,7 +272,7 @@ module ntt_top endcase end - + /* //Butterfly 2x2 ntt_butterfly2x2 #( .REG_SIZE(NTT_REG_SIZE), @@ -288,6 +291,25 @@ module ntt_top .pwo_uv_o(pwo_uv_o), .ready_o(bf_ready) ); + */ + + ntt_hybrid_butterfly_2x2 #( + .WIDTH(WIDTH) + ) + hybrid_bf2x2 ( + .clk(clk), + .reset_n(reset_n), + .zeroize(zeroize), + .mode(mode), + .enable(bf_enable_mux), + .uvw_i(uvw_i), + .pw_uvw_i(pw_uvw_i), + .rnd_i(rnd_i), + .accumulate(accumulate), + .uv_o(uv_o), + .pwo_uv_o(pwo_uv_o), + .ready_o(bf_ready) + ); always_ff @(posedge clk or negedge reset_n) begin if (!reset_n) begin diff --git a/src/ntt_top/tb/ntt_top_tb.sv b/src/ntt_top/tb/ntt_top_tb.sv index df50b3f..2bb8f1e 100644 --- a/src/ntt_top/tb/ntt_top_tb.sv +++ b/src/ntt_top/tb/ntt_top_tb.sv @@ -84,6 +84,8 @@ logic [45:0] rnd0, rnd1, rnd2, rnd3; logic wren_tb, rden_tb; logic [1:0] wrptr_tb, rdptr_tb; logic [5:0] random_tb; +bf_uvwi_t uvw_i_tb; +pwo_uvwi_t pw_uvw_i_tb; //---------------------------------------------------------------- // Device Under Test. @@ -142,23 +144,23 @@ logic [5:0] random_tb; // .sampler_valid(svalid_tb) // ); -// ntt_wrapper dut ( -// .clk(clk_tb), -// .reset_n(reset_n_tb), -// .zeroize(zeroize_tb), -// .mode(mode_tb), -// .ntt_enable(enable_tb), -// .load_tb_values(load_tb_values), -// .load_tb_addr(load_tb_addr), -// .ntt_mem_base_addr(ntt_mem_base_addr_tb), -// .pwo_mem_base_addr(pwo_mem_base_addr_tb), -// .accumulate(acc_tb), -// .sampler_valid(svalid_tb), -// .sampler_mode(sampler_mode_tb), -// .sampler_data(96'hFFFFFF), -// .ntt_done(ntt_done_tb), -// .ntt_busy() -// ); +ntt_wrapper dut ( + .clk(clk_tb), + .reset_n(reset_n_tb), + .zeroize(zeroize_tb), + .mode(mode_tb), + .ntt_enable(enable_tb), + .load_tb_values(load_tb_values), + .load_tb_addr(load_tb_addr), + .ntt_mem_base_addr(ntt_mem_base_addr_tb), + .pwo_mem_base_addr(pwo_mem_base_addr_tb), + .accumulate(acc_tb), + .sampler_valid(svalid_tb), + .sampler_mode(sampler_mode_tb), + .sampler_data(96'hFFFFFF), + .ntt_done(ntt_done_tb), + .ntt_busy() +); // ntt_masked_BFU_add_sub dut ( // .clk(clk_tb), @@ -202,17 +204,53 @@ logic [5:0] random_tb; // .data_o() // ); -ntt_masked_gs_butterfly dut ( - .clk(clk_tb), - .reset_n(reset_n_tb), - .zeroize(zeroize_tb), - .opu_i(u), - .opv_i(v), - .opw_i(w), - .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), - .u_o(), - .v_o() -); +// ntt_masked_gs_butterfly dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .opu_i(u), +// .opv_i(v), +// .opw_i(w), +// .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), +// .u_o(), +// .v_o() +// ); + +// ntt_masked_pwm dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .u(u), +// .v(v), +// .w(w), +// .rnd({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), +// .res() +// ); + +// ntt_masked_butterfly1x2 dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .uvw_i(uvw_i_tb), +// .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), +// .uv_o() +// ); + +// ntt_hybrid_butterfly_2x2 dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .mode(mode_tb), +// .enable(enable_tb), +// .masking_en(1'b0), +// .uvw_i(uvw_i_tb), +// .pw_uvw_i(pw_uvw_i_tb), +// .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), +// .accumulate(1'b0), +// .uv_o(), +// .pwo_uv_o(), +// .ready_o() +// ); //---------------------------------------------------------------- // clk_gen @@ -311,8 +349,37 @@ task init_sim; end actual_u = 'h0; actual_v = 'h0; + actual_w = 'h0; sub = 'h0; + rnd0 = 'h0; + rnd1 = 'h0; + rnd2 = 'h0; + rnd3 = 'h0; + + uvw_i_tb.u00_i = 'h0; + uvw_i_tb.u01_i = 'h0; + uvw_i_tb.v00_i = 'h0; + uvw_i_tb.v01_i = 'h0; + uvw_i_tb.w00_i = 'h0; + uvw_i_tb.w01_i = 'h0; + + pw_uvw_i_tb.u0_i = 'h0; + pw_uvw_i_tb.v0_i = 'h0; + pw_uvw_i_tb.w0_i = 'h0; + + pw_uvw_i_tb.u1_i = 'h0; + pw_uvw_i_tb.v1_i = 'h0; + pw_uvw_i_tb.w1_i = 'h0; + + pw_uvw_i_tb.u2_i = 'h0; + pw_uvw_i_tb.v2_i = 'h0; + pw_uvw_i_tb.w2_i = 'h0; + + pw_uvw_i_tb.u3_i = 'h0; + pw_uvw_i_tb.v3_i = 'h0; + pw_uvw_i_tb.w3_i = 'h0; + $display("End of init\n"); end endtask @@ -709,8 +776,8 @@ task masked_BFU_adder_test(); join end endtask -*/ -/* + + task masked_BFU_mult_test(); logic [45:0] u_array, v_array; logic [45:0] rand0, rand1; @@ -735,7 +802,7 @@ task masked_BFU_mult_test(); // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); end begin - repeat(3) @(posedge clk_tb); + repeat(210) @(posedge clk_tb); if ((dut.final_res[1] + dut.final_res[0]) != ((u_array * v_array)%PRIME)) begin $error("Multiplication Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array * v_array)%PRIME, dut.final_res[0], dut.final_res[1], dut.final_res[0] + dut.final_res[1]); end @@ -745,7 +812,52 @@ task masked_BFU_mult_test(); endtask */ -task masked_gs_butterfly_test(); + +// task masked_gs_butterfly_test(); +// logic [45:0] rand0, rand1, rand2; +// logic [45:0] actual_u_normalized; +// for (int i = 0; i < 10; i++) begin +// @(posedge clk_tb); +// fork +// begin +// actual_u = $random()%PRIME; +// actual_v = $random()%PRIME; +// actual_w = 'h2; +// if (actual_u < actual_v) +// actual_u_normalized = actual_u + PRIME; +// else +// actual_u_normalized = actual_u; +// // u_array = actual_u; +// // v_array = actual_v; +// rand0 = $random(); +// rand1 = $random(); +// rand2 = $random(); + +// // $display("actual u = %h, actual v = %h", actual_u, actual_v); + +// u[0] = actual_u-rand0; +// u[1] = rand0; +// v[0] = actual_v-rand1; +// v[1] = rand1; +// w[0] = actual_w-rand2; +// w[1] = rand2; +// // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); +// end +// begin +// repeat(264) @(posedge clk_tb); +// if ((dut.u_o_0 + dut.u_o_1) != ((actual_u_normalized + actual_v)%PRIME)) begin +// $error("U = u+v Mismatch: exp_output = %h output shares = %h %h actual output = %h", (actual_u_normalized + actual_v)%PRIME, dut.u_o_0, dut.u_o_1, dut.u_o_0 + dut.u_o_1); +// end +// if ((dut.v_o_0 + dut.v_o_1) != (((actual_u_normalized - actual_v)*actual_w)%PRIME)) begin +// $error("V = (u-v)w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((actual_u_normalized - actual_v)*actual_w)%PRIME, dut.v_o_0, dut.v_o_1, dut.v_o_0 + dut.v_o_1); +// end +// end +// join +// end +// endtask + +/* +task masked_pwm_test(); logic [45:0] rand0, rand1, rand2; for (int i = 0; i < 10; i++) begin @(posedge clk_tb); @@ -754,6 +866,7 @@ task masked_gs_butterfly_test(); actual_u = $random()%PRIME; actual_v = $random()%PRIME; actual_w = 'h2; + // u_array = actual_u; // v_array = actual_v; rand0 = $random(); @@ -770,15 +883,125 @@ task masked_gs_butterfly_test(); w[1] = rand2; // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); end - //TODO: check with Emre - when doing (u-v), should exp result be ((u-v)+Q) % Q to account for negative nums? FPV had issues with this, so do (if u < v), result + Q + begin + repeat(264) @(posedge clk_tb); + if ((dut.res[0] + dut.res[1]) != ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME)) begin + $error("U = u*v+w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME), dut.res[0], dut.res[1], dut.res[0] + dut.res[1]); + end + end + join + end +endtask +*/ + +// task masked_bfu_1x2_test(); +// logic [45:0] rand0, rand1, rand2; +// for (int i = 0; i < 10; i++) begin +// @(posedge clk_tb); +// fork +// begin +// actual_u = $random()%PRIME; +// actual_v = $random()%PRIME; +// actual_w = 'h2; + +// // u_array = actual_u; +// // v_array = actual_v; +// rand0 = $random(); +// rand1 = $random(); +// rand2 = $random(); + +// // $display("actual u = %h, actual v = %h", actual_u, actual_v); + +// u[0] = actual_u-rand0; +// u[1] = rand0; +// v[0] = actual_v-rand1; +// v[1] = rand1; +// w[0] = actual_w-rand2; +// w[1] = rand2; + +// uvw_i_tb.u00_i = u; +// uvw_i_tb.u01_i = u; +// uvw_i_tb.v00_i = v; +// uvw_i_tb.v01_i = v; +// uvw_i_tb.w00_i = w; +// uvw_i_tb.w01_i = w; +// // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); +// end +// // begin +// // repeat(264) @(posedge clk_tb); +// // if ((dut.res[0] + dut.res[1]) != ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME)) begin +// // $error("U = u*v+w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME), dut.res[0], dut.res[1], dut.res[0] + dut.res[1]); +// // end +// // end +// join +// end +// endtask + +task masked_hybrid_bf_2x2_test(); + logic [45:0] rand0, rand1, rand2; + for (int j = 0; j < 6; j++) begin + mode_tb = j; + for (int i = 0; i < 10; i++) begin + @(posedge clk_tb); + enable_tb = 1'b1; + fork + begin + actual_u = $random()%PRIME; + actual_v = $random()%PRIME; + actual_w = 'h2; + + // u_array = actual_u; + // v_array = actual_v; + rand0 = $random(); + rand1 = $random(); + rand2 = $random(); + + // $display("actual u = %h, actual v = %h", actual_u, actual_v); + + u[0] = actual_u-rand0; + u[1] = rand0; + v[0] = actual_v-rand1; + v[1] = rand1; + w[0] = actual_w-rand2; + w[1] = rand2; + + uvw_i_tb.u00_i = actual_u; + uvw_i_tb.u01_i = actual_u; + uvw_i_tb.v00_i = actual_v; + uvw_i_tb.v01_i = actual_v; + uvw_i_tb.w00_i = actual_w; + uvw_i_tb.w01_i = actual_w; + uvw_i_tb.w10_i = actual_w; + uvw_i_tb.w11_i = actual_w; + + pw_uvw_i_tb.u0_i = actual_u; + pw_uvw_i_tb.v0_i = actual_v; + pw_uvw_i_tb.w0_i = actual_w; + + pw_uvw_i_tb.u1_i = actual_u; + pw_uvw_i_tb.v1_i = actual_v; + pw_uvw_i_tb.w1_i = actual_w; + + pw_uvw_i_tb.u2_i = actual_u; + pw_uvw_i_tb.v2_i = actual_v; + pw_uvw_i_tb.w2_i = actual_w; + + pw_uvw_i_tb.u3_i = actual_u; + pw_uvw_i_tb.v3_i = actual_v; + pw_uvw_i_tb.w3_i = actual_w; + //$display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); + end // begin - // repeat(3) @(posedge clk_tb); - // if ((dut.final_res[1] + dut.final_res[0]) != ((u_array * v_array)%PRIME)) begin - // $error("Multiplication Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array * v_array)%PRIME, dut.final_res[0], dut.final_res[1], dut.final_res[0] + dut.final_res[1]); + // repeat(264) @(posedge clk_tb); + // if ((dut.res[0] + dut.res[1]) != ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME)) begin + // $error("U = u*v+w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME), dut.res[0], dut.res[1], dut.res[0] + dut.res[1]); // end // end join end + enable_tb = 1'b0; + @(posedge clk_tb); + end endtask initial begin @@ -789,7 +1012,7 @@ initial begin @(posedge clk_tb); $display("Starting init mem\n"); - init_mem(); + // init_mem(); // $readmemh("ntt_stage67.hex", ntt_mem_tb); @(posedge clk_tb); // buffer_test(); @@ -799,7 +1022,9 @@ initial begin // ntt_top_test(); // masked_BFU_adder_test(); // masked_BFU_mult_test(); - masked_gs_butterfly_test(); + // masked_gs_butterfly_test(); + // masked_pwm_test(); + masked_hybrid_bf_2x2_test(); // pwm_opt_test(); repeat(1000) @(posedge clk_tb); $finish; diff --git a/src/ntt_top/tb/ntt_wrapper.sv b/src/ntt_top/tb/ntt_wrapper.sv index dc511d4..cdb9a4d 100644 --- a/src/ntt_top/tb/ntt_wrapper.sv +++ b/src/ntt_top/tb/ntt_wrapper.sv @@ -38,7 +38,9 @@ module ntt_wrapper input mode_t mode, input wire ntt_enable, input wire shuffle_en, + input wire masking_en, input wire [5:0] random, + input wire [4:0][45:0] rnd_i, //TB purpose - remove later TODO input wire load_tb_values, @@ -181,7 +183,9 @@ module ntt_wrapper .accumulate(accumulate), .sampler_valid(sampler_valid), .shuffle_en(shuffle_en), + .masking_en(1'b0), .random(random), + .rnd_i('h0), //NTT mem IF .mem_wr_req(mem_wr_req), .mem_rd_req(mem_rd_req), From efb2f5e34a11c7cc2dae4c807d965e0fc697c2fb Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Thu, 14 Nov 2024 19:49:32 -0800 Subject: [PATCH 10/23] Integrate into mldsa_top, some temp changes to debug long sim times --- src/mldsa_top/rtl/mldsa_top.sv | 2 + src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv | 107 ++++++++++++++------ src/ntt_top/rtl/ntt_top.sv | 1 + src/ntt_top/tb/ntt_top_tb.sv | 16 +-- 4 files changed, 88 insertions(+), 38 deletions(-) diff --git a/src/mldsa_top/rtl/mldsa_top.sv b/src/mldsa_top/rtl/mldsa_top.sv index 4e3b438..b9c3fdd 100644 --- a/src/mldsa_top/rtl/mldsa_top.sv +++ b/src/mldsa_top/rtl/mldsa_top.sv @@ -516,6 +516,8 @@ generate .sampler_valid(sampler_valid[g_inst]), .shuffle_en(shuffle_en[g_inst]), .random(rand_bits[5:0]), + .masking_en(1'b0), + .rnd_i('h0), //NTT mem IF .mem_wr_req(ntt_mem_wr_req[g_inst]), .mem_rd_req(ntt_mem_rd_req[g_inst]), diff --git a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv index c3a216a..8151ba8 100644 --- a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv +++ b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv @@ -66,13 +66,13 @@ logic [HALF_WIDTH-1:0] u10_int, u11_int, v10_int, v11_int; //Inputs to 2nd stage logic [HALF_WIDTH-1:0] u10, u11, v10, v11; //Outputs of 2nd stage -logic [HALF_WIDTH-1:0] u20, u21, v20, v21; +// logic [HALF_WIDTH-1:0] u20, u21, v20, v21; //Other internal wires logic [UNMASKED_BF_STAGE1_LATENCY-1:0][WIDTH-1:0] w10_reg, w11_reg; //Shift w10 by 5 cycles to match 1st stage BF latency logic [MASKED_BF_STAGE1_LATENCY-1:0][WIDTH-1:0] masked_w10_reg, masked_w11_reg; logic pwo_mode, pwm_intt_mode; -logic [UNMASKED_BF_LATENCY-1:0] ready_reg; +// logic [UNMASKED_BF_LATENCY-1:0] ready_reg; logic [MASKED_PWM_INTT_LATENCY-1:0] masked_ready_reg; //Shares - TODO replace with struct? @@ -159,48 +159,91 @@ end always_comb begin //TODO: check randomness with Emre //Split u inputs - u00_share[0] = /*uvw_i.u00_i*/u00 - rnd_i[0]; - u00_share[1] = rnd_i[0]; + if (masking_en) begin + u00_share[0] = /*uvw_i.u00_i*/u00 - rnd_i[0]; + u00_share[1] = rnd_i[0]; - u01_share[0] = /*uvw_i.u01_i*/u01 - rnd_i[1]; - u01_share[1] = rnd_i[1]; + u01_share[0] = /*uvw_i.u01_i*/u01 - rnd_i[1]; + u01_share[1] = rnd_i[1]; - u10_share[0] = u10 - rnd_i[0]; - u10_share[1] = rnd_i[0]; + u10_share[0] = u10 - rnd_i[0]; + u10_share[1] = rnd_i[0]; - u11_share[0] = u11 - rnd_i[0]; - u11_share[1] = rnd_i[0]; + u11_share[0] = u11 - rnd_i[0]; + u11_share[1] = rnd_i[0]; - //Split v inputs - v00_share[0] = /*uvw_i.v00_i*/v00 - rnd_i[2]; - v00_share[1] = rnd_i[2]; + //Split v inputs + v00_share[0] = /*uvw_i.v00_i*/v00 - rnd_i[2]; + v00_share[1] = rnd_i[2]; - v01_share[0] = /*uvw_i.v01_i*/v01 - rnd_i[3]; - v01_share[1] = rnd_i[3]; + v01_share[0] = /*uvw_i.v01_i*/v01 - rnd_i[3]; + v01_share[1] = rnd_i[3]; - v10_share[0] = v10 - rnd_i[2]; - v10_share[1] = rnd_i[2]; + v10_share[0] = v10 - rnd_i[2]; + v10_share[1] = rnd_i[2]; - v11_share[0] = v11 - rnd_i[2]; - v11_share[1] = rnd_i[2]; + v11_share[0] = v11 - rnd_i[2]; + v11_share[1] = rnd_i[2]; - //Split w inputs - w00_share[0] = /*uvw_i.w00_i*/w00 - rnd_i[4]; - w00_share[1] = rnd_i[4]; + //Split w inputs + w00_share[0] = /*uvw_i.w00_i*/w00 - rnd_i[4]; + w00_share[1] = rnd_i[4]; - w01_share[0] = /*uvw_i.w01_i*/w01 - rnd_i[0]; - w01_share[1] = rnd_i[0]; + w01_share[0] = /*uvw_i.w01_i*/w01 - rnd_i[0]; + w01_share[1] = rnd_i[0]; - w10_reg_share[0] = w10_reg[0] - rnd_i[1]; - w10_reg_share[1] = rnd_i[1]; + w10_reg_share[0] = w10_reg[0] - rnd_i[1]; + w10_reg_share[1] = rnd_i[1]; - w11_reg_share[0] = w11_reg[0] - rnd_i[2]; - w11_reg_share[1] = rnd_i[2]; + w11_reg_share[0] = w11_reg[0] - rnd_i[2]; + w11_reg_share[1] = rnd_i[2]; + end + else begin + u00_share[0] = 'h0; + u00_share[1] = 'h0; + + u01_share[0] = 'h0; + u01_share[1] = 'h0; + + u10_share[0] = 'h0; + u10_share[1] = 'h0; + + u11_share[0] = 'h0; + u11_share[1] = 'h0; + + //Split v input + v00_share[0] = 'h0; + v00_share[1] = 'h0; + + v01_share[0] = 'h0; + v01_share[1] = 'h0; + + v10_share[0] = 'h0; + v10_share[1] = 'h0; + + v11_share[0] = 'h0; + v11_share[1] = 'h0; + + //Split w input + w00_share[0] = 'h0; + w00_share[1] = 'h0; + + w01_share[0] = 'h0; + w01_share[1] = 'h0; + + w10_reg_share[0] = 'h0; + w10_reg_share[1] = 'h0; + + w11_reg_share[0] = 'h0; + w11_reg_share[1] = 'h0; + + end end //---------------------------------------------------- //Masked PWMs - Used in masked PWM+INTT mode only //---------------------------------------------------- +// `ifdef MASKING ntt_masked_pwm #( .WIDTH(WIDTH) ) pwm_inst00 ( @@ -252,7 +295,7 @@ ntt_masked_pwm #( .rnd({rnd_i[2], rnd_i[1], rnd_i[0], rnd_i[4], rnd_i[3]}), .res(uv11_share) ); - +// `endif //---------------------------------------------------- //Masked BFU stage 1 - Used in masked PWM+INTT mode only //PWM outputs: uv00[1:0], uv01[1:0], uv10[1:0], uv11[1:0] @@ -268,7 +311,7 @@ ntt_masked_butterfly1x2 #( .rnd_i({rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1], rnd_i[0]}), .uv_o(masked_gs_stage1_uvo) ); - +// `endif //---------------------------------------------------- //Unmasked BFU stage 1 - Used in all other modes //---------------------------------------------------- @@ -316,7 +359,7 @@ ntt_butterfly #( .mode(mode), .opu_i(masking_en ? masked_gs_stage1_uvo.u20_o : u10), .opv_i(masking_en ? masked_gs_stage1_uvo.v20_o : v10), - .opw_i(masking_en ? masked_w10_reg[0] : w10_reg[0]), //TODO: delayed w10 + .opw_i(masking_en ? masked_w10_reg[0] : pwo_mode ? w10 : w10_reg[0]), //TODO: delayed w10 .accumulate(accumulate), .u_o(uv_o.u20_o), .v_o(uv_o.v20_o), @@ -332,7 +375,7 @@ ntt_butterfly #( .mode(mode), .opu_i(masking_en ? masked_gs_stage1_uvo.u21_o : u11), .opv_i(masking_en ? masked_gs_stage1_uvo.v21_o : v11), - .opw_i(masking_en ? masked_w11_reg[0] : w11_reg[0]), //TODO: delayed w10 + .opw_i(masking_en ? masked_w11_reg[0] : pwo_mode ? w11 : w11_reg[0]), //TODO: delayed w10 .accumulate(accumulate), .u_o(uv_o.u21_o), .v_o(uv_o.v21_o), diff --git a/src/ntt_top/rtl/ntt_top.sv b/src/ntt_top/rtl/ntt_top.sv index 1cb9993..d3afd27 100644 --- a/src/ntt_top/rtl/ntt_top.sv +++ b/src/ntt_top/rtl/ntt_top.sv @@ -302,6 +302,7 @@ module ntt_top .zeroize(zeroize), .mode(mode), .enable(bf_enable_mux), + .masking_en(masking_en), .uvw_i(uvw_i), .pw_uvw_i(pw_uvw_i), .rnd_i(rnd_i), diff --git a/src/ntt_top/tb/ntt_top_tb.sv b/src/ntt_top/tb/ntt_top_tb.sv index 2bb8f1e..20a2fdf 100644 --- a/src/ntt_top/tb/ntt_top_tb.sv +++ b/src/ntt_top/tb/ntt_top_tb.sv @@ -152,6 +152,10 @@ ntt_wrapper dut ( .ntt_enable(enable_tb), .load_tb_values(load_tb_values), .load_tb_addr(load_tb_addr), + .shuffle_en(1'b0), + .random(random_tb), + .masking_en(1'b0), + .rnd_i('h0), .ntt_mem_base_addr(ntt_mem_base_addr_tb), .pwo_mem_base_addr(pwo_mem_base_addr_tb), .accumulate(acc_tb), @@ -729,7 +733,7 @@ task pwm_opt_test(); endtask */ task init_mem(); - for (int i = 0; i < 32768; i++) begin + for (int i = 0; i < 512; i++) begin load_tb_addr = i; load_tb_values = 1'b1; @(posedge clk_tb); @@ -936,7 +940,7 @@ endtask // join // end // endtask - +/* task masked_hybrid_bf_2x2_test(); logic [45:0] rand0, rand1, rand2; for (int j = 0; j < 6; j++) begin @@ -1003,7 +1007,7 @@ task masked_hybrid_bf_2x2_test(); @(posedge clk_tb); end endtask - +*/ initial begin init_sim(); reset_dut(); @@ -1012,19 +1016,19 @@ initial begin @(posedge clk_tb); $display("Starting init mem\n"); - // init_mem(); + init_mem(); // $readmemh("ntt_stage67.hex", ntt_mem_tb); @(posedge clk_tb); // buffer_test(); // twiddle_rom_test(); // ntt_ctrl_test(); $display("Starting ntt test\n"); - // ntt_top_test(); + ntt_top_test(); // masked_BFU_adder_test(); // masked_BFU_mult_test(); // masked_gs_butterfly_test(); // masked_pwm_test(); - masked_hybrid_bf_2x2_test(); + // masked_hybrid_bf_2x2_test(); // pwm_opt_test(); repeat(1000) @(posedge clk_tb); $finish; From 7df740e3c994e7254547ff2c395b52417f6dd255 Mon Sep 17 00:00:00 2001 From: Nitsirks Date: Fri, 15 Nov 2024 12:26:53 -0800 Subject: [PATCH 11/23] optimized flip flops for masking logic --- src/abr_libs/rtl/abr_delay_masked_shares.sv | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/abr_libs/rtl/abr_delay_masked_shares.sv b/src/abr_libs/rtl/abr_delay_masked_shares.sv index aacf829..bb10e9e 100644 --- a/src/abr_libs/rtl/abr_delay_masked_shares.sv +++ b/src/abr_libs/rtl/abr_delay_masked_shares.sv @@ -32,32 +32,26 @@ module abr_delay_masked_shares ); // Create an array of shift registers to store the delayed values - logic [1:0] shift_reg [N-1:0][WIDTH-1:0]; + logic [N-1:0][WIDTH-1:0][1:0] shift_reg ; // Use an always_ff block to implement the shift register always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin // Reset all shift register values to 0 for (int j = 0; j < N; j = j + 1) begin - for (int i = 0; i < WIDTH; i = i + 1) begin - shift_reg[j][i] <= 2'b0; - end + shift_reg[j] <= '0; end end else if (zeroize) begin // Reset all shift register values to 0 for (int j = 0; j < N; j = j + 1) begin - for (int i = 0; i < WIDTH; i = i + 1) begin - shift_reg[j][i] <= 2'b0; - end + shift_reg[j] <= '0; end end else begin // Shift the values through the registers for (int j = 0; j < N-1; j = j + 1) begin - for (int i = 0; i < WIDTH; i = i + 1) begin - shift_reg[j+1][i] <= shift_reg[j][i]; - end + shift_reg[j+1] <= shift_reg[j]; end // Load the input values into the first shift register stage From b8b54207ebc23e59d544249c72d4208ac349bf2a Mon Sep 17 00:00:00 2001 From: Emre Karabulut Date: Fri, 15 Nov 2024 14:05:27 -0800 Subject: [PATCH 12/23] converted array decl format --- src/abr_libs/rtl/abr_masked_A2B_conv.sv | 44 +++++++++++-------- .../rtl/abr_masked_N_bit_Boolean_adder.sv | 36 +++++++-------- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/src/abr_libs/rtl/abr_masked_A2B_conv.sv b/src/abr_libs/rtl/abr_masked_A2B_conv.sv index 04f6336..9b006d7 100644 --- a/src/abr_libs/rtl/abr_masked_A2B_conv.sv +++ b/src/abr_libs/rtl/abr_masked_A2B_conv.sv @@ -42,11 +42,11 @@ ); // Internal signals - logic [1:0] carry [WIDTH:0]; // Carry signals for each stage - logic [1:0] sum [WIDTH-1:0]; // Sum signals for each stage - logic [1:0] x_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for x - logic [1:0] y_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for y - logic [1:0] sum_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for sum + logic [WIDTH:0] [1:0] carry; // Carry signals for each stage + logic [WIDTH-1:0] [1:0] sum; // Sum signals for each stage + logic [WIDTH-1:0][WIDTH-1:0][1:0] x_reg; // Pipeline registers for x + logic [WIDTH-1:0][WIDTH-1:0][1:0] y_reg; // Pipeline registers for y + logic [WIDTH-1:0][WIDTH-1:0][1:0] sum_reg; // Pipeline registers for sum logic [1:0] the_last_sum; // Initialize the first carry input to 0 @@ -59,16 +59,20 @@ // Pipeline registers for x and y inputs always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - x_reg[i][j] <= 2'b00; - y_reg[i][j] <= 2'b00; - end + // for (int j = 0; j < WIDTH; j = j + 1) begin + // x_reg[i][j] <= 2'b00; + // y_reg[i][j] <= 2'b00; + // end + x_reg[i] <= '0; + y_reg[i] <= '0; end else if (zeroize) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - x_reg[i][j] <= 2'b00; - y_reg[i][j] <= 2'b00; - end + // for (int j = 0; j < WIDTH; j = j + 1) begin + // x_reg[i][j] <= 2'b00; + // y_reg[i][j] <= 2'b00; + // end + x_reg[i] <= '0; + y_reg[i] <= '0; end else begin for (int j = 0; j < WIDTH; j = j + 1) begin @@ -87,14 +91,16 @@ // Pipeline registers for sum output always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - sum_reg[i][j] <= 2'b00; - end + // for (int j = 0; j < WIDTH; j = j + 1) begin + // sum_reg[i][j] <= 2'b00; + // end + sum_reg[i] <= '0; end else if (zeroize) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - sum_reg[i][j] <= 2'b00; - end + // for (int j = 0; j < WIDTH; j = j + 1) begin + // sum_reg[i][j] <= 2'b00; + // end + sum_reg[i] <= '0; end else begin for (int j = i; j < WIDTH; j = j + 1) begin diff --git a/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv index 1c4376f..9ff0aa5 100644 --- a/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv +++ b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv @@ -40,11 +40,11 @@ ); // Internal signals - logic [1:0] carry [WIDTH:0]; // Carry signals for each stage - logic [1:0] sum [WIDTH-1:0]; // Sum signals for each stage - logic [1:0] x_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for x - logic [1:0] y_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for y - logic [1:0] sum_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for sum + logic [WIDTH:0] [1:0] carry; // Carry signals for each stage + logic [WIDTH-1:0] [1:0] sum; // Sum signals for each stage + logic [WIDTH-1:0][WIDTH-1:0][1:0] x_reg; // Pipeline registers for x + logic [WIDTH-1:0][WIDTH-1:0][1:0] y_reg; // Pipeline registers for y + logic [WIDTH-1:0][WIDTH-1:0][1:0] sum_reg; // Pipeline registers for sum logic [1:0] the_last_sum; // Initialize the first carry input to 0 @@ -57,16 +57,12 @@ // Pipeline registers for x and y inputs always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - x_reg[i][j] <= 2'b00; - y_reg[i][j] <= 2'b00; - end + x_reg[i] <= '0; + y_reg[i] <= '0; end else if (zeroize) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - x_reg[i][j] <= 2'b00; - y_reg[i][j] <= 2'b00; - end + x_reg[i] <= '0; + y_reg[i] <= '0; end else begin for (int j = 0; j < WIDTH; j = j + 1) begin @@ -85,14 +81,16 @@ // Pipeline registers for sum output always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - sum_reg[i][j] <= 2'b00; - end + // for (int j = 0; j < WIDTH; j = j + 1) begin + // sum_reg[i][j] <= 2'b00; + // end + sum_reg[i] <= '0; end else if (zeroize) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - sum_reg[i][j] <= 2'b00; - end + // for (int j = 0; j < WIDTH; j = j + 1) begin + // sum_reg[i][j] <= 2'b00; + // end + sum_reg[i] <= '0; end else begin for (int j = i; j < WIDTH; j = j + 1) begin From e7128c6aba44fa84e0c4ad0212645103c5c19ebe Mon Sep 17 00:00:00 2001 From: Emre Karabulut Date: Fri, 15 Nov 2024 14:13:31 -0800 Subject: [PATCH 13/23] updated array format of B-sub --- .../rtl/abr_masked_N_bit_Boolean_sub.sv | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv index 2ee4556..ddc715f 100644 --- a/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv +++ b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv @@ -34,11 +34,11 @@ module abr_masked_N_bit_Boolean_sub #( ); // Internal signals - logic [1:0] carry [WIDTH:0]; // Carry signals for each stage - logic [1:0] sum [WIDTH-1:0]; // Sum signals for each stage - logic [1:0] x_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for x - logic [1:0] y_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for y - logic [1:0] sum_reg [WIDTH-1:0][WIDTH-1:0]; // Pipeline registers for sum + logic [WIDTH:0] [1:0] carry; // Carry signals for each stage + logic [WIDTH-1:0] [1:0] sum; // Sum signals for each stage + logic [WIDTH-1:0][WIDTH-1:0][1:0] x_reg; // Pipeline registers for x + logic [WIDTH-1:0][WIDTH-1:0][1:0] y_reg; // Pipeline registers for y + logic [WIDTH-1:0][WIDTH-1:0][1:0] sum_reg; // Pipeline registers for sum logic [1:0] the_last_sum; // Initialize the first carry input to 0 @@ -51,16 +51,12 @@ module abr_masked_N_bit_Boolean_sub #( // Pipeline registers for x and y inputs always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - x_reg[i][j] <= 2'b00; - y_reg[i][j] <= 2'b00; - end + x_reg[i] <= '0; + y_reg[i] <= '0; end else if (zeroize) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - x_reg[i][j] <= 2'b00; - y_reg[i][j] <= 2'b00; - end + x_reg[i] <= '0; + y_reg[i] <= '0; end else begin for (int j = 0; j < WIDTH; j = j + 1) begin @@ -79,14 +75,16 @@ module abr_masked_N_bit_Boolean_sub #( // Pipeline registers for sum output always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - sum_reg[i][j] <= 2'b00; - end + // for (int j = 0; j < WIDTH; j = j + 1) begin + // sum_reg[i][j] <= 2'b00; + // end + sum_reg[i] <= '0; end else if (zeroize) begin - for (int j = 0; j < WIDTH; j = j + 1) begin - sum_reg[i][j] <= 2'b00; - end + // for (int j = 0; j < WIDTH; j = j + 1) begin + // sum_reg[i][j] <= 2'b00; + // end + sum_reg[i] <= '0; end else begin for (int j = i; j < WIDTH; j = j + 1) begin From f1b5102d965a2ce2a4de22bbafed31069a1b5e8b Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Fri, 15 Nov 2024 21:59:27 -0800 Subject: [PATCH 14/23] Fix hybrid interface, update ctrl to incr twiddle --- src/mldsa_top/rtl/mldsa_top.sv | 2 +- src/ntt_top/rtl/ntt_ctrl.sv | 6 +- src/ntt_top/rtl/ntt_defines_pkg.sv | 23 ++++++ src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv | 86 ++++++++++++++++----- src/ntt_top/rtl/ntt_top.sv | 9 ++- 5 files changed, 101 insertions(+), 25 deletions(-) diff --git a/src/mldsa_top/rtl/mldsa_top.sv b/src/mldsa_top/rtl/mldsa_top.sv index b9c3fdd..1ba01d2 100644 --- a/src/mldsa_top/rtl/mldsa_top.sv +++ b/src/mldsa_top/rtl/mldsa_top.sv @@ -517,7 +517,7 @@ generate .shuffle_en(shuffle_en[g_inst]), .random(rand_bits[5:0]), .masking_en(1'b0), - .rnd_i('h0), + .rnd_i(rand_bits[RND_W-1:6]), //NTT mem IF .mem_wr_req(ntt_mem_wr_req[g_inst]), .mem_rd_req(ntt_mem_rd_req[g_inst]), diff --git a/src/ntt_top/rtl/ntt_ctrl.sv b/src/ntt_top/rtl/ntt_ctrl.sv index f8b6eb6..cd3d4a5 100644 --- a/src/ntt_top/rtl/ntt_ctrl.sv +++ b/src/ntt_top/rtl/ntt_ctrl.sv @@ -121,6 +121,7 @@ logic pw_wren_fsm, pw_wren_reg; //Mode flags logic ct_mode, gs_mode, pwo_mode; //point-wise operations mode logic pwm_mode, pwa_mode, pws_mode; +logic pwm_intt_mode; //Addr internal wires logic [MEM_ADDR_WIDTH-1:0] src_base_addr, interim_base_addr, dest_base_addr; @@ -221,6 +222,7 @@ always_comb begin pwm_mode = (ntt_mode == pwm); pwa_mode = (ntt_mode == pwa); pws_mode = (ntt_mode == pws); + pwm_intt_mode = (ntt_mode == pwm_intt); end //------------------------------------------ @@ -731,7 +733,7 @@ always_comb begin else mem_rd_en_fsm = (ntt_mode inside {ct, gs}) ? (mem_rd_addr <= MEM_LAST_ADDR + mem_rd_base_addr) : 1'b0; bf_enable_fsm = pwo_mode ? sampler_valid : 1'b1; - incr_twiddle_addr_fsm = ntt_mode inside {ct, gs}; + incr_twiddle_addr_fsm = ntt_mode inside {ct, gs, pwm_intt}; rd_addr_step = ct_mode ? NTT_READ_ADDR_STEP : INTT_READ_ADDR_STEP; incr_pw_rd_addr = sampler_valid & pwo_mode; pw_rden_fsm = sampler_valid & pwo_mode; @@ -743,7 +745,7 @@ always_comb begin buf_wr_rst_count_ntt = 1'b1; //There are no more mem reads, so buf writes need to halt buf_rd_rst_count_ntt = 1'b0; //There are still some entries in buf that BF2x2 needs to pick up bf_enable_fsm = pwo_mode ? sampler_valid : (buf_count <= 3); - incr_twiddle_addr_fsm = (ct_mode | gs_mode); + incr_twiddle_addr_fsm = (ct_mode | gs_mode | pwm_intt); rd_addr_step = NTT_READ_ADDR_STEP; incr_pw_rd_addr = (pwo_mode & sampler_valid); pw_rden_fsm = (pwo_mode & sampler_valid); diff --git a/src/ntt_top/rtl/ntt_defines_pkg.sv b/src/ntt_top/rtl/ntt_defines_pkg.sv index 975b3cd..606736b 100644 --- a/src/ntt_top/rtl/ntt_defines_pkg.sv +++ b/src/ntt_top/rtl/ntt_defines_pkg.sv @@ -55,6 +55,29 @@ typedef struct packed { logic [NTT_REG_SIZE-1:0] w11_i; } bf_uvwi_t; +typedef struct packed { + //input a + logic [NTT_REG_SIZE-1:0] u0_i; + logic [NTT_REG_SIZE-1:0] u1_i; + logic [NTT_REG_SIZE-1:0] u2_i; + logic [NTT_REG_SIZE-1:0] u3_i; + //input b + logic [NTT_REG_SIZE-1:0] v0_i; + logic [NTT_REG_SIZE-1:0] v1_i; + logic [NTT_REG_SIZE-1:0] v2_i; + logic [NTT_REG_SIZE-1:0] v3_i; + //accumulated input c (comes from dest mem) + logic [NTT_REG_SIZE-1:0] w0_i; + logic [NTT_REG_SIZE-1:0] w1_i; + logic [NTT_REG_SIZE-1:0] w2_i; + logic [NTT_REG_SIZE-1:0] w3_i; + //input w for INTT operation that follows pwm. TODO: for only PWM/PWMA ops, this needs to be 0 + logic [NTT_REG_SIZE-1:0] twiddle_w0_i; + logic [NTT_REG_SIZE-1:0] twiddle_w1_i; + logic [NTT_REG_SIZE-1:0] twiddle_w2_i; + logic [NTT_REG_SIZE-1:0] twiddle_w3_i; +} hybrid_bf_uvwi_t; + typedef struct packed { logic [NTT_REG_SIZE-1:0] u20_o; logic [NTT_REG_SIZE-1:0] u21_o; diff --git a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv index 8151ba8..6034dc0 100644 --- a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv +++ b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv @@ -47,6 +47,7 @@ module ntt_hybrid_butterfly_2x2 input wire masking_en, input bf_uvwi_t uvw_i, //Inputs are original form input pwo_uvwi_t pw_uvw_i, //PWO inputs are original form + input hybrid_bf_uvwi_t hybrid_pw_uvw_i, //PWM+INTT inputs. TODO: combine and mux with pwo inputs? input wire [4:0][WIDTH-1:0] rnd_i, input wire accumulate, @@ -69,16 +70,18 @@ logic [HALF_WIDTH-1:0] u10, u11, v10, v11; // logic [HALF_WIDTH-1:0] u20, u21, v20, v21; //Other internal wires -logic [UNMASKED_BF_STAGE1_LATENCY-1:0][WIDTH-1:0] w10_reg, w11_reg; //Shift w10 by 5 cycles to match 1st stage BF latency -logic [MASKED_BF_STAGE1_LATENCY-1:0][WIDTH-1:0] masked_w10_reg, masked_w11_reg; +logic [UNMASKED_BF_STAGE1_LATENCY-1:0][HALF_WIDTH-1:0] w10_reg, w11_reg; //Shift w10 by 5 cycles to match 1st stage BF latency +logic [MASKED_PWM_LATENCY-1:0][HALF_WIDTH-1:0] masked_w00_reg, masked_w01_reg; +logic [MASKED_BF_STAGE1_LATENCY-1:0][HALF_WIDTH-1:0] masked_w10_reg, masked_w11_reg; logic pwo_mode, pwm_intt_mode; // logic [UNMASKED_BF_LATENCY-1:0] ready_reg; logic [MASKED_PWM_INTT_LATENCY-1:0] masked_ready_reg; //Shares - TODO replace with struct? logic [1:0][WIDTH-1:0] u00_share, u01_share, v00_share, v01_share, u10_share, v10_share, u11_share, v11_share; -logic [1:0][WIDTH-1:0] w00_share, w01_share, w10_share, w11_share, w10_reg_share, w11_reg_share; +logic [1:0][WIDTH-1:0] w00_share, w01_share, w10_share, w11_share; //, w10_reg_share, w11_reg_share; logic [1:0][WIDTH-1:0] uv00_share, uv01_share, uv10_share, uv11_share; +logic [1:0][WIDTH-1:0] twiddle_w00_share, twiddle_w01_share, twiddle_w10_share, twiddle_w11_share; bf_uvo_t masked_gs_stage1_uvo; //w delay flops @@ -100,16 +103,22 @@ end always_ff @(posedge clk or negedge reset_n) begin if (!reset_n) begin + masked_w00_reg <= 'h0; + masked_w01_reg <= 'h0; masked_w10_reg <= 'h0; masked_w11_reg <= 'h0; end else if (zeroize) begin + masked_w00_reg <= 'h0; + masked_w01_reg <= 'h0; masked_w10_reg <= 'h0; masked_w11_reg <= 'h0; end else begin - masked_w10_reg <= {uvw_i.w10_i, masked_w10_reg[MASKED_BF_STAGE1_LATENCY-1:1]}; - masked_w11_reg <= {uvw_i.w11_i, masked_w11_reg[MASKED_BF_STAGE1_LATENCY-1:1]}; + masked_w00_reg <= {hybrid_pw_uvw_i.twiddle_w0_i, masked_w00_reg[MASKED_PWM_LATENCY-1:1]}; + masked_w01_reg <= {hybrid_pw_uvw_i.twiddle_w1_i, masked_w01_reg[MASKED_PWM_LATENCY-1:1]}; + masked_w10_reg <= {hybrid_pw_uvw_i.twiddle_w2_i, masked_w10_reg[MASKED_BF_STAGE1_LATENCY-1:1]}; + masked_w11_reg <= {hybrid_pw_uvw_i.twiddle_w3_i, masked_w11_reg[MASKED_BF_STAGE1_LATENCY-1:1]}; end end @@ -118,7 +127,7 @@ assign pwm_intt_mode = (mode == pwm_intt) & masking_en; //Input assignments - TODO: add input flops for u, v, w, and rnd? always_comb begin - if (pwo_mode | pwm_intt_mode) begin //pwm_intt mode, inputs are driven on pw_uvw_i interface. TODO: check again + if (pwo_mode) begin u00 = pw_uvw_i.u0_i; v00 = pw_uvw_i.v0_i; w00 = pw_uvw_i.w0_i; @@ -136,6 +145,28 @@ always_comb begin w11 = pw_uvw_i.w3_i; end + else if (pwm_intt_mode) begin //TODO: clean up + u00 = hybrid_pw_uvw_i.u0_i; + v00 = hybrid_pw_uvw_i.v0_i; + w00 = hybrid_pw_uvw_i.w0_i; + + u01 = hybrid_pw_uvw_i.u1_i; + v01 = hybrid_pw_uvw_i.v1_i; + w01 = hybrid_pw_uvw_i.w1_i; + + u10 = hybrid_pw_uvw_i.u2_i; + v10 = hybrid_pw_uvw_i.v2_i; + w10 = hybrid_pw_uvw_i.w2_i; + + u11 = hybrid_pw_uvw_i.u3_i; + v11 = hybrid_pw_uvw_i.v3_i; + w11 = hybrid_pw_uvw_i.w3_i; + + // twiddle_w00 = hybrid_pw_uvw_i.twiddle_w0_i; + // twiddle_w01 = hybrid_pw_uvw_i.twiddle_w1_i; + // twiddle_w10 = hybrid_pw_uvw_i.twiddle_w2_i; + // twiddle_w11 = hybrid_pw_uvw_i.twiddle_w3_i; + end else begin //Only applies to unmasked ops since in masking, intt receives inputs from pwm and not from the API u00 = uvw_i.u00_i; v00 = uvw_i.v00_i; @@ -192,11 +223,18 @@ always_comb begin w01_share[0] = /*uvw_i.w01_i*/w01 - rnd_i[0]; w01_share[1] = rnd_i[0]; - w10_reg_share[0] = w10_reg[0] - rnd_i[1]; - w10_reg_share[1] = rnd_i[1]; + // w10_reg_share[0] = w10_reg[0] - rnd_i[1]; + // w10_reg_share[1] = rnd_i[1]; + + // w11_reg_share[0] = w11_reg[0] - rnd_i[2]; + // w11_reg_share[1] = rnd_i[2]; + + twiddle_w00_share[0] = masked_w00_reg[0] - rnd_i[0]; + twiddle_w00_share[1] = rnd_i[0]; + + twiddle_w01_share[0] = masked_w01_reg[0] - rnd_i[1]; + twiddle_w01_share[1] = rnd_i[1]; - w11_reg_share[0] = w11_reg[0] - rnd_i[2]; - w11_reg_share[1] = rnd_i[2]; end else begin u00_share[0] = 'h0; @@ -231,11 +269,17 @@ always_comb begin w01_share[0] = 'h0; w01_share[1] = 'h0; - w10_reg_share[0] = 'h0; - w10_reg_share[1] = 'h0; + // w10_reg_share[0] = 'h0; + // w10_reg_share[1] = 'h0; + + // w11_reg_share[0] = 'h0; + // w11_reg_share[1] = 'h0; + + twiddle_w00_share[0] = 'h0; + twiddle_w00_share[1] = 'h0; - w11_reg_share[0] = 'h0; - w11_reg_share[1] = 'h0; + twiddle_w01_share[0] = 'h0; + twiddle_w01_share[1] = 'h0; end end @@ -307,7 +351,7 @@ ntt_masked_butterfly1x2 #( .reset_n(reset_n), .zeroize(zeroize), // .enable() - .uvw_i({uv00_share, uv01_share, uv10_share, uv11_share}), //TODO check connection + .uvw_i({uv00_share, uv01_share, uv10_share, uv11_share, twiddle_w00_share, twiddle_w01_share}), //TODO check connection .rnd_i({rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1], rnd_i[0]}), .uv_o(masked_gs_stage1_uvo) ); @@ -322,9 +366,9 @@ ntt_butterfly #( .reset_n(reset_n), .zeroize(zeroize), .mode(mode), - .opu_i(masking_en ? 'h0 : u00), - .opv_i(masking_en ? 'h0 : v00), - .opw_i(masking_en ? 'h0 : w00), + .opu_i(masking_en ? HALF_WIDTH'(0) : u00), + .opv_i(masking_en ? HALF_WIDTH'(0) : v00), + .opw_i(masking_en ? HALF_WIDTH'(0) : w00), .accumulate(accumulate), .u_o(u10_int), .v_o(u11_int), @@ -338,9 +382,9 @@ ntt_butterfly #( .reset_n(reset_n), .zeroize(zeroize), .mode(mode), - .opu_i(masking_en ? 'h0 : u01), - .opv_i(masking_en ? 'h0 : v01), - .opw_i(masking_en ? 'h0 : w01), + .opu_i(masking_en ? HALF_WIDTH'(0) : u01), + .opv_i(masking_en ? HALF_WIDTH'(0) : v01), + .opw_i(masking_en ? HALF_WIDTH'(0) : w01), .accumulate(accumulate), .u_o(v10_int), .v_o(v11_int), diff --git a/src/ntt_top/rtl/ntt_top.sv b/src/ntt_top/rtl/ntt_top.sv index d3afd27..1a5c848 100644 --- a/src/ntt_top/rtl/ntt_top.sv +++ b/src/ntt_top/rtl/ntt_top.sv @@ -124,6 +124,8 @@ module ntt_top logic pw_rden, pw_rden_dest_mem; logic sampler_valid_reg; logic [MEM_DATA_WIDTH-1:0] pwm_b_rd_data_reg; + //PWM+INTT IF - masking + hybrid_bf_uvwi_t hybrid_pw_uvw_i; //Flop ntt_ctrl pwm output wr addr to align with BFU output flop logic [MLDSA_MEM_ADDR_WIDTH-1:0] pwm_wr_addr_c_reg; @@ -144,6 +146,7 @@ module ntt_top logic gs_mode; logic pwo_mode; logic pwm_mode, pwa_mode, pws_mode; + logic pwm_intt_mode; assign ct_mode = (mode == ct); assign gs_mode = (mode == gs); @@ -151,6 +154,7 @@ module ntt_top assign pwm_mode = (mode == pwm); assign pwa_mode = (mode == pwa); assign pws_mode = (mode == pws); + assign pwm_intt_mode = (mode == pwm_intt); assign pw_rden_dest_mem = accumulate ? pw_rden : 1'b0; //Mem IF assignments: @@ -249,7 +253,7 @@ module ntt_top uvw_i.w10_i = twiddle_factor[(2*NTT_REG_SIZE)-1:NTT_REG_SIZE]; uvw_i.w11_i = twiddle_factor[(3*NTT_REG_SIZE)-1:(2*NTT_REG_SIZE)]; end - gs: begin + gs, pwm_intt: begin if (shuffle_en) begin uvw_i.w11_i = twiddle_factor[(3*NTT_REG_SIZE)-1:(2*NTT_REG_SIZE)]; uvw_i.w10_i = twiddle_factor[(3*NTT_REG_SIZE)-1:(2*NTT_REG_SIZE)]; @@ -305,6 +309,7 @@ module ntt_top .masking_en(masking_en), .uvw_i(uvw_i), .pw_uvw_i(pw_uvw_i), + .hybrid_pw_uvw_i(hybrid_pw_uvw_i), .rnd_i(rnd_i), .accumulate(accumulate), .uv_o(uv_o), @@ -489,6 +494,8 @@ module ntt_top pw_uvw_i.w3_i = 'h0; end endcase + + hybrid_pw_uvw_i = {pw_uvw_i, uvw_i.w00_i, uvw_i.w01_i, uvw_i.w10_i, uvw_i.w11_i}; end assign bf_enable_mux = ct_mode ? bf_enable : bf_enable_reg; assign mem_wren_mux = ~shuffle_en & ct_mode ? mem_wren_reg : mem_wren; From cbddbf50cb7a2d6efc0d6acbc30c70c6f0baf19e Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Tue, 19 Nov 2024 09:22:33 -0800 Subject: [PATCH 15/23] Lint fixes, rand bits --- src/abr_libs/rtl/abr_masked_MUX.sv | 3 ++ .../rtl/abr_masked_N_bit_mult_two_share.sv | 8 ++--- src/mldsa_top/rtl/mldsa_top.sv | 2 +- src/ntt_top/rtl/ntt_ctrl.sv | 2 +- src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv | 36 ++++++++++++------- src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv | 6 ++-- src/ntt_top/rtl/ntt_masked_BFU_mult.sv | 14 ++++---- src/ntt_top/rtl/ntt_masked_butterfly1x2.sv | 8 ++--- src/ntt_top/rtl/ntt_masked_gs_butterfly.sv | 4 +-- src/ntt_top/rtl/ntt_masked_mult_redux46.sv | 28 +++++++-------- 10 files changed, 61 insertions(+), 50 deletions(-) diff --git a/src/abr_libs/rtl/abr_masked_MUX.sv b/src/abr_libs/rtl/abr_masked_MUX.sv index b9fb7f2..ff16c78 100644 --- a/src/abr_libs/rtl/abr_masked_MUX.sv +++ b/src/abr_libs/rtl/abr_masked_MUX.sv @@ -51,6 +51,7 @@ module abr_masked_MUX #( if (sub_i) begin // When subtracting, select bits are based on carry0 s = carry0; + c0c1 = 2'h0; //verilator end else begin // When adding, select bits are based on inverted (carry0 ^ carry1) @@ -58,6 +59,8 @@ module abr_masked_MUX #( s[0] = ~c0c1[0]; s[1] = c0c1[1]; end + end + always_comb begin //verilator for (int i = 0; i < WIDTH; i++) begin xy[i] = r0[i] ^ r1[i]; xyk[i][0] = xy[i][0] ^ rnd_xor[i]; diff --git a/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv b/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv index b16663c..ef281a8 100644 --- a/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv +++ b/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv @@ -63,10 +63,10 @@ // Calculation stage always_comb begin - calculation[0] = x[0] * y[0]; // Multiplication of the first share x and first share y - calculation[1] = x[1] * y[0]; // Multiplication of the second share x and first share y - calculation[2] = x[0] * y[1]; // Multiplication of the first share x and second share y - calculation[3] = x[1] * y[1]; // Multiplication of the second share x and second share y + calculation[0] = WIDTH'(x[0] * y[0]); // Multiplication of the first share x and first share y + calculation[1] = WIDTH'(x[1] * y[0]); // Multiplication of the second share x and first share y + calculation[2] = WIDTH'(x[0] * y[1]); // Multiplication of the first share x and second share y + calculation[3] = WIDTH'(x[1] * y[1]); // Multiplication of the second share x and second share y calculation_rand[0] = calculation[2] + random; calculation_rand[1] = calculation[1] - random; diff --git a/src/mldsa_top/rtl/mldsa_top.sv b/src/mldsa_top/rtl/mldsa_top.sv index 1ba01d2..0ed7724 100644 --- a/src/mldsa_top/rtl/mldsa_top.sv +++ b/src/mldsa_top/rtl/mldsa_top.sv @@ -517,7 +517,7 @@ generate .shuffle_en(shuffle_en[g_inst]), .random(rand_bits[5:0]), .masking_en(1'b0), - .rnd_i(rand_bits[RND_W-1:6]), + .rnd_i(rand_bits[RND_W-1:6]), //('h0), //rand_bits[RND_W-1:6]), //NTT mem IF .mem_wr_req(ntt_mem_wr_req[g_inst]), .mem_rd_req(ntt_mem_rd_req[g_inst]), diff --git a/src/ntt_top/rtl/ntt_ctrl.sv b/src/ntt_top/rtl/ntt_ctrl.sv index cd3d4a5..a808233 100644 --- a/src/ntt_top/rtl/ntt_ctrl.sv +++ b/src/ntt_top/rtl/ntt_ctrl.sv @@ -745,7 +745,7 @@ always_comb begin buf_wr_rst_count_ntt = 1'b1; //There are no more mem reads, so buf writes need to halt buf_rd_rst_count_ntt = 1'b0; //There are still some entries in buf that BF2x2 needs to pick up bf_enable_fsm = pwo_mode ? sampler_valid : (buf_count <= 3); - incr_twiddle_addr_fsm = (ct_mode | gs_mode | pwm_intt); + incr_twiddle_addr_fsm = (ct_mode | gs_mode | pwm_intt_mode); rd_addr_step = NTT_READ_ADDR_STEP; incr_pw_rd_addr = (pwo_mode & sampler_valid); pw_rden_fsm = (pwo_mode & sampler_valid); diff --git a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv index 6034dc0..11331dc 100644 --- a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv +++ b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv @@ -191,48 +191,54 @@ always_comb begin //TODO: check randomness with Emre //Split u inputs if (masking_en) begin - u00_share[0] = /*uvw_i.u00_i*/u00 - rnd_i[0]; + u00_share[0] = /*uvw_i.u00_i*/WIDTH'(u00) - rnd_i[0]; u00_share[1] = rnd_i[0]; - u01_share[0] = /*uvw_i.u01_i*/u01 - rnd_i[1]; + u01_share[0] = /*uvw_i.u01_i*/WIDTH'(u01) - rnd_i[1]; u01_share[1] = rnd_i[1]; - u10_share[0] = u10 - rnd_i[0]; + u10_share[0] = WIDTH'(u10) - rnd_i[0]; u10_share[1] = rnd_i[0]; - u11_share[0] = u11 - rnd_i[0]; + u11_share[0] = WIDTH'(u11) - rnd_i[0]; u11_share[1] = rnd_i[0]; //Split v inputs - v00_share[0] = /*uvw_i.v00_i*/v00 - rnd_i[2]; + v00_share[0] = /*uvw_i.v00_i*/WIDTH'(v00) - rnd_i[2]; v00_share[1] = rnd_i[2]; - v01_share[0] = /*uvw_i.v01_i*/v01 - rnd_i[3]; + v01_share[0] = /*uvw_i.v01_i*/WIDTH'(v01) - rnd_i[3]; v01_share[1] = rnd_i[3]; - v10_share[0] = v10 - rnd_i[2]; + v10_share[0] = WIDTH'(v10) - rnd_i[2]; v10_share[1] = rnd_i[2]; - v11_share[0] = v11 - rnd_i[2]; + v11_share[0] = WIDTH'(v11) - rnd_i[2]; v11_share[1] = rnd_i[2]; //Split w inputs - w00_share[0] = /*uvw_i.w00_i*/w00 - rnd_i[4]; + w00_share[0] = /*uvw_i.w00_i*/WIDTH'(w00) - rnd_i[4]; w00_share[1] = rnd_i[4]; - w01_share[0] = /*uvw_i.w01_i*/w01 - rnd_i[0]; + w01_share[0] = /*uvw_i.w01_i*/WIDTH'(w01) - rnd_i[0]; w01_share[1] = rnd_i[0]; + w10_share[0] = WIDTH'(w10) - rnd_i[1]; + w10_share[1] = rnd_i[1]; + + w11_share[0] = WIDTH'(w11) - rnd_i[2]; + w11_share[1] = rnd_i[2]; + // w10_reg_share[0] = w10_reg[0] - rnd_i[1]; // w10_reg_share[1] = rnd_i[1]; // w11_reg_share[0] = w11_reg[0] - rnd_i[2]; // w11_reg_share[1] = rnd_i[2]; - twiddle_w00_share[0] = masked_w00_reg[0] - rnd_i[0]; + twiddle_w00_share[0] = WIDTH'(masked_w00_reg[0]) - rnd_i[0]; twiddle_w00_share[1] = rnd_i[0]; - twiddle_w01_share[0] = masked_w01_reg[0] - rnd_i[1]; + twiddle_w01_share[0] = WIDTH'(masked_w01_reg[0]) - rnd_i[1]; twiddle_w01_share[1] = rnd_i[1]; end @@ -269,6 +275,12 @@ always_comb begin w01_share[0] = 'h0; w01_share[1] = 'h0; + w10_share[0] = 'h0; + w10_share[1] = 'h0; + + w11_share[0] = 'h0; + w11_share[1] = 'h0; + // w10_reg_share[0] = 'h0; // w10_reg_share[1] = 'h0; diff --git a/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv index 8dc9880..6fd1119 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv @@ -38,14 +38,14 @@ module ntt_masked_BFU_add_sub //Internal signals logic [1:0][WIDTH-1:0] v_int, add_res; - logic [1:0][WIDTH-1:0] add_res_reg [WIDTH+4:0]; //TODO parameterize + logic [WIDTH+4:0][1:0][WIDTH-1:0] add_res_reg; //TODO parameterize logic [1:0] add_res_rolled [WIDTH-1:0]; logic [1:0] add_res_bool [WIDTH-1:0]; logic [1:0] add_res_arith [WIDTH-1:0]; logic [WIDTH-1:0] prime0, prime1, add_res_rolled0, add_res_rolled1; logic [1:0][WIDTH-1:0] add_res_reduced, prime_packed; logic [1:0] prime [WIDTH-1:0]; - logic [WIDTH-1:0] add_res_bool0, add_res_bool1, add_res_arith0, add_res_arith1, add_res_reduced0, add_res_reduced1; + logic [WIDTH-1:0] add_res_bool0, add_res_bool1, add_res_arith0, add_res_arith1; // logic [WIDTH-1:0] prime0, prime1; //Add flops to inputs to avoid pruning TODO @@ -171,8 +171,6 @@ module ntt_masked_BFU_add_sub add_res_bool1[i] = add_res_bool[i][1]; add_res_arith0[i] = add_res_arith[i][0]; add_res_arith1[i] = add_res_arith[i][1]; - add_res_reduced0[i] = add_res_reduced[i][0]; - add_res_reduced1[i] = add_res_reduced[i][1]; end //If bit[23] = 1, subtract Q from adder result diff --git a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv index 5214824..fd53c44 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv @@ -107,14 +107,12 @@ module ntt_masked_BFU_mult ); always_comb begin - for (int i = 0; i < WIDTH; i++) begin - if (i < HALF_WIDTH) begin - mul_res_bool_reduced_padded[i][0] = mul_res_bool_reduced[i][0]; - mul_res_bool_reduced_padded[i][1] = mul_res_bool_reduced[i][1]; - end - else begin - mul_res_bool_reduced_padded[i] = 2'b00; - end + for (int i = 0; i < HALF_WIDTH; i++) begin + mul_res_bool_reduced_padded[i][0] = mul_res_bool_reduced[i][0]; + mul_res_bool_reduced_padded[i][1] = mul_res_bool_reduced[i][1]; + end + for (int i = HALF_WIDTH; i < WIDTH; i++) begin + mul_res_bool_reduced_padded[i] = 2'b00; end end diff --git a/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv b/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv index d9c767c..99a757d 100644 --- a/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +++ b/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv @@ -99,10 +99,10 @@ module ntt_masked_butterfly1x2 v11_packed[0][i] = v11_int[i][0]; v11_packed[1][i] = v11_int[i][1]; end - u10_combined = u10_packed[0] + u10_packed[1]; - v10_combined = v10_packed[0] + v10_packed[1]; - u11_combined = u11_packed[0] + u11_packed[1]; - v11_combined = v11_packed[0] + v11_packed[1]; + u10_combined = HALF_WIDTH'(u10_packed[0] + u10_packed[1]); + v10_combined = HALF_WIDTH'(v10_packed[0] + v10_packed[1]); + u11_combined = HALF_WIDTH'(u11_packed[0] + u11_packed[1]); + v11_combined = HALF_WIDTH'(v11_packed[0] + v11_packed[1]); end //Perform div2 on combined outputs diff --git a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv index 49cef88..a6512b9 100644 --- a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +++ b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv @@ -39,7 +39,7 @@ module ntt_masked_gs_butterfly ); localparam MASKED_MULT_LATENCY = 207; - logic [1:0][WIDTH-1:0] w_reg [52:0]; //TODO parameterize + logic [52:0][1:0][WIDTH-1:0] w_reg; //TODO parameterize logic [1:0] add_res [WIDTH-1:0]; logic [1:0] sub_res [WIDTH-1:0]; logic [1:0] mul_res [WIDTH-1:0]; @@ -137,7 +137,7 @@ module ntt_masked_gs_butterfly .rnd1(rnd_i[3]), .rnd2(rnd_i[0]), .rnd3(rnd_i[1]), - .rnd4(rnd_i[2]+rnd_i[3]), + .rnd4(WIDTH'(rnd_i[2]+rnd_i[3])), .res(mul_res) //(u-v)*w ); diff --git a/src/ntt_top/rtl/ntt_masked_mult_redux46.sv b/src/ntt_top/rtl/ntt_masked_mult_redux46.sv index 3c58486..dcabec6 100644 --- a/src/ntt_top/rtl/ntt_masked_mult_redux46.sv +++ b/src/ntt_top/rtl/ntt_masked_mult_redux46.sv @@ -41,7 +41,7 @@ module ntt_masked_mult_redux46 ); // Intermediate wires for the sliced values - int cnt; + logic [4:0] cnt; logic [1:0] z_45_23 [22:0]; logic [1:0] z_45_23_delayed [22:0]; logic [1:0] z_45_33 [13:0]; @@ -91,7 +91,7 @@ module ntt_masked_mult_redux46 // Counter for z_45_33 cnt = 0; for (int i = 33; i <= 45; i = i + 1) begin - z_45_33[cnt] = x[i]; + z_45_33[cnt[3:0]] = x[i]; cnt = cnt + 1; end z_45_33[13] = 2'b00; @@ -99,26 +99,26 @@ module ntt_masked_mult_redux46 // Counter for z_45_43 cnt = 0; for (int i = 43; i <= 45; i = i + 1) begin - z_45_43[cnt] = x[i]; - z_45_43_padded_8[cnt] = x[i]; + z_45_43[cnt[1:0]] = x[i]; + z_45_43_padded_8[cnt[3:0]] = x[i]; cnt = cnt + 1; end z_45_43[3] = 2'b00; z_45_43_padded_8[3] = 2'b00; - for (int i = 0; i < 8; i = i + 1) + for (int i = 0; i < 7; i = i + 1) //i < 8 z_45_43_padded_8[i+4] = 2'b00; // Counter for z_12_0 cnt = 0; for (int i = 0; i <= 12; i = i + 1) begin - z_12_0[cnt] = x[i]; + z_12_0[cnt[3:0]] = x[i]; cnt = cnt + 1; end // Counter for z_22_13 cnt = 0; for (int i = 13; i <= 22; i = i + 1) begin - z_22_13[cnt] = x[i]; + z_22_13[cnt[3:0]] = x[i]; cnt = cnt + 1; end z_22_13[10] = 2'b00; @@ -126,7 +126,7 @@ module ntt_masked_mult_redux46 // Counter for z_32_23 cnt = 0; for (int i = 23; i <= 32; i = i + 1) begin - z_32_23[cnt] = x[i]; + z_32_23[cnt[3:0]] = x[i]; cnt = cnt + 1; end z_32_23[10] = 2'b00; @@ -134,16 +134,16 @@ module ntt_masked_mult_redux46 // Counter for z_42_33 cnt = 0; for (int i = 33; i <= 42; i = i + 1) begin - z_42_33[cnt] = x[i]; + z_42_33[cnt[3:0]] = x[i]; cnt = cnt + 1; end z_42_33[10] = 2'b00; cnt = 0; for (int i = 10; i <= 11; i = i + 1) begin - c11_10[cnt] = c0_11[i]; - c11_10_padded_9[cnt] = c0_11[i]; - c11_10_padded_2[cnt] = c0_11[i]; + c11_10[cnt[0]] = c0_11[i]; + c11_10_padded_9[cnt[3:0]] = c0_11[i]; + c11_10_padded_2[cnt[1:0]] = c0_11[i]; cnt = cnt + 1; end c11_10_padded_2[2] = 2'b00; @@ -153,8 +153,8 @@ module ntt_masked_mult_redux46 cnt = 0; for (int i = 0; i <= 9; i = i + 1) begin - c9_0[cnt] = c0_11[i]; - c9_0_padded[cnt] = c0_11[i]; + c9_0[cnt[3:0]] = c0_11[i]; + c9_0_padded[cnt[3:0]] = c0_11[i]; cnt = cnt + 1; end c9_0_padded[10] = 2'b00; From c0e67f30777ea422875fa38d5c79a83fcb728a3e Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Tue, 19 Nov 2024 10:41:12 -0800 Subject: [PATCH 16/23] Mask rand bits when NTT is not in use --- src/mldsa_top/rtl/mldsa_top.sv | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mldsa_top/rtl/mldsa_top.sv b/src/mldsa_top/rtl/mldsa_top.sv index 0ed7724..b863c03 100644 --- a/src/mldsa_top/rtl/mldsa_top.sv +++ b/src/mldsa_top/rtl/mldsa_top.sv @@ -494,7 +494,9 @@ generate end default: begin end - endcase + endcase + + end ntt_top #( @@ -517,7 +519,7 @@ generate .shuffle_en(shuffle_en[g_inst]), .random(rand_bits[5:0]), .masking_en(1'b0), - .rnd_i(rand_bits[RND_W-1:6]), //('h0), //rand_bits[RND_W-1:6]), + .rnd_i(rand_bits[RND_W-1:6] & {(RND_W-6){ntt_busy[g_inst]}}), //('h0), //rand_bits[RND_W-1:6]), //NTT mem IF .mem_wr_req(ntt_mem_wr_req[g_inst]), .mem_rd_req(ntt_mem_rd_req[g_inst]), From a37271ffbb5789b4cdaa136bde678df5bc9e0619 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Tue, 19 Nov 2024 13:02:36 -0800 Subject: [PATCH 17/23] Gate the randomness further to be used in only PWM and INTT masked ops --- src/mldsa_top/rtl/mldsa_top.sv | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mldsa_top/rtl/mldsa_top.sv b/src/mldsa_top/rtl/mldsa_top.sv index b863c03..cff9cd5 100644 --- a/src/mldsa_top/rtl/mldsa_top.sv +++ b/src/mldsa_top/rtl/mldsa_top.sv @@ -118,6 +118,7 @@ module mldsa_top logic [1:0] ntt_done; logic [1:0] ntt_busy; logic [1:0] shuffle_en; + logic [1:0] ntt_random_en; mem_if_t w1_mem_wr_req; logic [3:0] w1_mem_wr_data; @@ -448,6 +449,7 @@ generate sampler_valid[g_inst] = 0; sampler_ntt_mode[g_inst] = 0; shuffle_en[g_inst] = 0; //TODO: temp change for testing, remove and add to opcodes + ntt_random_en[g_inst] = 0; unique case (ntt_mode[g_inst]) inside MLDSA_NTT_NONE: begin @@ -459,6 +461,7 @@ generate MLDSA_INTT: begin mode[g_inst] = gs; shuffle_en[g_inst] = 1; + ntt_random_en[g_inst] = 1; end MLDSA_PWM_SMPL: begin mode[g_inst] = pwm; @@ -475,12 +478,14 @@ generate mode[g_inst] = pwm; sampler_valid[g_inst] = 1; shuffle_en[g_inst] = 1; + ntt_random_en[g_inst] = 1; end MLDSA_PWM_ACCUM: begin mode[g_inst] = pwm; accumulate[g_inst] = 1; sampler_valid[g_inst] = 1; shuffle_en[g_inst] = 1; + ntt_random_en[g_inst] = 1; end MLDSA_PWA: begin mode[g_inst] = pwa; @@ -519,7 +524,7 @@ generate .shuffle_en(shuffle_en[g_inst]), .random(rand_bits[5:0]), .masking_en(1'b0), - .rnd_i(rand_bits[RND_W-1:6] & {(RND_W-6){ntt_busy[g_inst]}}), //('h0), //rand_bits[RND_W-1:6]), + .rnd_i(rand_bits[RND_W-1:6] /*& {(RND_W-6){ntt_busy[g_inst]}}*/ & {(RND_W-6){ntt_random_en[g_inst]}}), //('h0), //rand_bits[RND_W-1:6]), //NTT mem IF .mem_wr_req(ntt_mem_wr_req[g_inst]), .mem_rd_req(ntt_mem_rd_req[g_inst]), From f2418a5146187ac9bd3522e72047395a925e8eab Mon Sep 17 00:00:00 2001 From: Nitsirks Date: Tue, 19 Nov 2024 16:04:35 -0800 Subject: [PATCH 18/23] stashing clean up --- src/mldsa_top/rtl/mldsa_ctrl.sv | 344 ++++++++++++++-------------- src/mldsa_top/rtl/mldsa_ctrl_pkg.sv | 5 +- src/mldsa_top/rtl/mldsa_seq_prim.sv | 3 + src/mldsa_top/rtl/mldsa_seq_sec.sv | 3 + 4 files changed, 181 insertions(+), 174 deletions(-) diff --git a/src/mldsa_top/rtl/mldsa_ctrl.sv b/src/mldsa_top/rtl/mldsa_ctrl.sv index 55cc101..f782456 100644 --- a/src/mldsa_top/rtl/mldsa_ctrl.sv +++ b/src/mldsa_top/rtl/mldsa_ctrl.sv @@ -268,11 +268,12 @@ always_comb mldsa_privkey_lock = '0; logic [MsgWidth-1:0] msg_data, msg_data_nxt; - logic seq_en; - logic [MLDSA_PROG_ADDR_W-1 : 0] prog_cntr, prog_cntr_nxt; - mldsa_seq_instr_t instr; - logic [MLDSA_PROG_ADDR_W-1 : 0] sign_prog_cntr, sign_prog_cntr_nxt; - mldsa_seq_instr_t sign_instr; + logic prim_seq_en; + logic sec_seq_en; + logic [MLDSA_PROG_ADDR_W-1 : 0] prim_prog_cntr, prim_prog_cntr_nxt; + mldsa_seq_instr_t prim_instr; + logic [MLDSA_PROG_ADDR_W-1 : 0] sec_prog_cntr, sec_prog_cntr_nxt; + mldsa_seq_instr_t sec_instr; logic msg_done; logic [MsgStrbW-1:0] last_msg_strobe; @@ -313,7 +314,7 @@ always_comb mldsa_privkey_lock = '0; assign mldsa_reg_hwif_in_o = mldsa_reg_hwif_in; assign mldsa_reg_hwif_out = mldsa_reg_hwif_out_i; - always_comb mldsa_ready = (prog_cntr == MLDSA_RESET); + always_comb mldsa_ready = (prim_prog_cntr == MLDSA_RESET); always_ff @(posedge clk or negedge rst_b) begin if (!rst_b) @@ -375,7 +376,7 @@ always_comb mldsa_privkey_lock = '0; end for (int dword=0; dword < VERIFY_RES_NUM_DWORDS; dword++)begin - mldsa_reg_hwif_in.MLDSA_VERIFY_RES[dword].VERIFY_RES.we = verify_valid & sampler_state_dv_i & (instr.operand3 == MLDSA_DEST_VERIFY_RES_REG_ID); + mldsa_reg_hwif_in.MLDSA_VERIFY_RES[dword].VERIFY_RES.we = verify_valid & sampler_state_dv_i & (prim_instr.operand3 == MLDSA_DEST_VERIFY_RES_REG_ID); mldsa_reg_hwif_in.MLDSA_VERIFY_RES[VERIFY_RES_NUM_DWORDS-1-dword].VERIFY_RES.next = sampler_state_data_i[0][dword*32 +: 32]; mldsa_reg_hwif_in.MLDSA_VERIFY_RES[dword].VERIFY_RES.hwclr = zeroize | clear_verify_valid; end @@ -524,12 +525,12 @@ always_comb mldsa_privkey_lock = '0; privatekey_reg <= '0; end else begin if (sampler_state_dv_i) begin - if (instr.operand3 == MLDSA_DEST_K_RHO_REG_ID) begin + if (prim_instr.operand3 == MLDSA_DEST_K_RHO_REG_ID) begin //HW write rho privatekey_reg.enc.rho <= sampler_state_data_i[0][255:0]; //FIXME optimize this to be shared with pubkey? //HW write K privatekey_reg.enc.K <= sampler_state_data_i[0][1023:768]; - end else if (instr.operand3 == MLDSA_DEST_TR_REG_ID) begin + end else if (prim_instr.operand3 == MLDSA_DEST_TR_REG_ID) begin //HW write tr privatekey_reg.enc.tr <= sampler_state_data_i[0][511:0]; end @@ -675,7 +676,7 @@ always_comb mldsa_privkey_lock = '0; signature_reg <= '0; end else begin //HW write c - if (sampler_state_dv_i & (instr.operand3 == MLDSA_DEST_SIG_C_REG_ID)) begin + if (sampler_state_dv_i & (prim_instr.operand3 == MLDSA_DEST_SIG_C_REG_ID)) begin signature_reg.enc.c <= sampler_state_data_i[0][511:0]; end else if (mldsa_ready & api_sig_c_dec & mldsa_reg_hwif_out.MLDSA_SIGNATURE.req_is_wr) begin for (int dword = 0; dword < SIGNATURE_NUM_DWORDS; dword++) begin @@ -766,7 +767,7 @@ always_comb mldsa_privkey_lock = '0; assign pubkey_ram_rdata_t1 = pubkey_ram_rdata; - always_comb pkdecode_rd_en = instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_PKDECODE); + always_comb pkdecode_rd_en = prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_PKDECODE); always_comb sampler_pk_rd_en = (sampler_src == MLDSA_PK_REG_ID) & (sampler_src_offset inside {[4:342]}) & ~msg_hold; @@ -820,7 +821,7 @@ always_comb mldsa_privkey_lock = '0; publickey_reg <= '0; end else begin //HW write rho - if (sampler_state_dv_i & (instr.operand3 == MLDSA_DEST_K_RHO_REG_ID)) begin + if (sampler_state_dv_i & (prim_instr.operand3 == MLDSA_DEST_K_RHO_REG_ID)) begin publickey_reg.enc.rho <= sampler_state_data_i[0][255:0]; end else if (mldsa_ready & api_pubkey_rho_dec & mldsa_reg_hwif_out.MLDSA_PUBKEY.req_is_wr) begin publickey_reg.enc.rho[api_pk_rho_addr] <= mldsa_reg_hwif_out.MLDSA_PUBKEY.wr_data; @@ -853,7 +854,7 @@ always_comb mldsa_privkey_lock = '0; MLDSA_SIGN_RND_ID: msg_data <= {sign_rnd_reg[{sampler_src_offset[1:0],1'b1}],sign_rnd_reg[{sampler_src_offset[1:0],1'b0}]}; MLDSA_RHO_P_KAPPA_ID: msg_data <= msg_done ? {48'b0,(kappa_reg + sampler_imm[2:0])} : rho_p_reg[sampler_src_offset[2:0]]; MLDSA_SIG_C_REG_ID: msg_data <= {signature_reg.enc.c[{sampler_src_offset[2:0],1'b1}], signature_reg.enc.c[{sampler_src_offset[2:0],1'b0}]}; - MLDSA_PK_REG_ID: msg_data <= {publickey_reg.enc.rho[{sampler_src_offset[1:0],1'b1}],publickey_reg.raw[{sampler_src_offset[1:0],1'b0}]}; + MLDSA_PK_REG_ID: msg_data <= {publickey_reg.enc.rho[{sampler_src_offset[1:0],1'b1}],publickey_reg.enc.rho[{sampler_src_offset[1:0],1'b0}]}; MLDSA_ENTROPY_ID: msg_data <= lfsr_entropy_reg[sampler_src_offset[2:0]]; MLDSA_CNT_ID: msg_data <= counter_reg; default: msg_data <= '0; @@ -878,13 +879,13 @@ always_comb mldsa_privkey_lock = '0; mu_reg <= 0; end else if (sampler_state_dv_i) begin - if (instr.operand3 == MLDSA_DEST_K_RHO_REG_ID) begin + if (prim_instr.operand3 == MLDSA_DEST_K_RHO_REG_ID) begin rho_p_reg <= sampler_state_data_i[0][767:256]; end - else if (instr.operand3 == MLDSA_DEST_MU_REG_ID) begin + else if (prim_instr.operand3 == MLDSA_DEST_MU_REG_ID) begin mu_reg <= sampler_state_data_i[0][511:0]; end - else if (instr.operand3 == MLDSA_DEST_RHO_P_REG_ID) begin + else if (prim_instr.operand3 == MLDSA_DEST_RHO_P_REG_ID) begin rho_p_reg <= sampler_state_data_i[0][511:0]; end end @@ -900,7 +901,7 @@ always_comb mldsa_privkey_lock = '0; lfsr_entropy_reg <= lfsr_entropy_reg ^ entropy_reg; end else if (sampler_state_dv_i) begin - if (instr.operand3 == MLDSA_DEST_LFSR_SEED_REG_ID) begin + if (prim_instr.operand3 == MLDSA_DEST_LFSR_SEED_REG_ID) begin lfsr_seed_o <= sampler_state_data_i[0][2*LFSR_W-1:0]; lfsr_entropy_reg <= sampler_state_data_i[0][2*LFSR_W+511:2*LFSR_W]; end @@ -971,38 +972,33 @@ always_comb mldsa_privkey_lock = '0; end end - //FIXME check if aux mode is set appropriately? + //Clear signature if makehint or normcheck fail always_comb clear_signature_valid = signing_process & ((makehint_done_i & makehint_invalid_i) | (normcheck_done_i & normcheck_invalid_i)); //FIXME jump to done if this happens, could cause x reads (or fix sigdecode to not stop early) always_comb clear_verify_valid = verifying_process & ((normcheck_done_i & normcheck_invalid_i) | - (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_SIGDEC_H) & sigdecode_h_invalid_i)); + (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_SIGDEC_H) & sigdecode_h_invalid_i)); //Primary sequencer for keygen, signing, and verify - - //FIXME need to optimize sequencer - //Should be able to implement opcode to load next keccak data while sampler or ntt is running - //requires a granularity of busy that is context aware of the current and next steps or - //need a way to advance to the next step and understand when it can proceed and when it can't always_comb subcomponent_busy = !(ctrl_fsm_ns inside {MLDSA_CTRL_IDLE, MLDSA_CTRL_MSG_WAIT}) | sampler_busy_i | ntt_busy_i[0]; always_comb error_flag_edge = skdecode_error_i; - always_comb seq_en = 1; + //program counter always_ff @(posedge clk or negedge rst_b) begin if(!rst_b) begin - prog_cntr <= MLDSA_RESET; + prim_prog_cntr <= MLDSA_RESET; end else if(zeroize) begin - prog_cntr <= MLDSA_RESET; + prim_prog_cntr <= MLDSA_RESET; end else begin if (error_flag_edge) begin - prog_cntr <= MLDSA_ERROR; + prim_prog_cntr <= MLDSA_ERROR; end else begin - prog_cntr <= prog_cntr_nxt; + prim_prog_cntr <= prim_prog_cntr_nxt; end end end @@ -1020,127 +1016,129 @@ always_comb mldsa_privkey_lock = '0; update_kappa = 0; set_verify_valid = 0; set_entropy = 0; - prog_cntr_nxt = MLDSA_RESET; + prim_prog_cntr_nxt = MLDSA_RESET; + prim_seq_en = 1; - unique case (prog_cntr) inside + unique case (prim_prog_cntr) inside MLDSA_RESET : begin - // Waiting for new valid command + // Waiting for new valid command unique case (cmd_reg) inside MLDSA_KEYGEN : begin // keygen - prog_cntr_nxt = MLDSA_KG_S; + prim_prog_cntr_nxt = MLDSA_KG_S; keygen_process_nxt = 1; set_entropy = 1; end MLDSA_SIGN : begin // signing - prog_cntr_nxt = MLDSA_SIGN_RND_S; + prim_prog_cntr_nxt = MLDSA_SIGN_RND_S; signing_process_nxt = 1; set_entropy = 1; end MLDSA_VERIFY : begin // verifying - prog_cntr_nxt = MLDSA_VERIFY_S; + prim_prog_cntr_nxt = MLDSA_VERIFY_S; verifying_process_nxt = 1; set_verify_valid = 1; end MLDSA_KEYGEN_SIGN : begin // KEYGEN + SIGNING - prog_cntr_nxt = MLDSA_KG_S; + prim_prog_cntr_nxt = MLDSA_KG_S; keygen_signing_process_nxt = 1; set_entropy = 1; end default : begin - prog_cntr_nxt = MLDSA_RESET; + prim_prog_cntr_nxt = MLDSA_RESET; + prim_seq_en = 0; end endcase end MLDSA_KG_JUMP_SIGN : begin //Jump to signing process if (keygen_signing_process) begin - prog_cntr_nxt = MLDSA_SIGN_S; + prim_prog_cntr_nxt = MLDSA_SIGN_S; signing_process_nxt = 1; end else begin - prog_cntr_nxt = prog_cntr + 1; + prim_prog_cntr_nxt = prim_prog_cntr + 1; end end MLDSA_KG_E : begin // end of keygen - //prog_cntr_nxt = MLDSA_RESET; + //prim_prog_cntr_nxt = MLDSA_RESET; keygen_done = 1; end //START of Y access - check if Y is still valid MLDSA_SIGN_CHECK_Y_CLR : begin - if (y_valid | w0_valid) begin //Stalled until Y can be overwritten //FIXME reads are colliding - prog_cntr_nxt = prog_cntr; + if (y_valid | w0_valid) begin //Stalled until Y and w0 can be overwritten + prim_prog_cntr_nxt = prim_prog_cntr; end else begin - prog_cntr_nxt = prog_cntr + 1; + prim_prog_cntr_nxt = prim_prog_cntr + 1; end end //END of Y access - SET Y valid MLDSA_SIGN_SET_Y : begin set_y_valid = 1; - prog_cntr_nxt = prog_cntr + 1; + prim_prog_cntr_nxt = prim_prog_cntr + 1; end //START of W0 access - check if W0 is still valid MLDSA_SIGN_CHECK_W0_CLR : begin if (w0_valid) begin //Stalled until W0 can be overwritten - prog_cntr_nxt = prog_cntr; + prim_prog_cntr_nxt = prim_prog_cntr; end else begin - prog_cntr_nxt = prog_cntr + 1; + prim_prog_cntr_nxt = prim_prog_cntr + 1; end end //END of W0 access - SET W0 valid MLDSA_SIGN_SET_W0 : begin set_w0_valid = 1; - prog_cntr_nxt = prog_cntr + 1; + prim_prog_cntr_nxt = prim_prog_cntr + 1; end //START of C access - check if C is still valid MLDSA_SIGN_CHECK_C_CLR : begin if (c_valid) begin //Stalled until C can be overwritten - prog_cntr_nxt = prog_cntr; + prim_prog_cntr_nxt = prim_prog_cntr; end else begin - prog_cntr_nxt = prog_cntr + 1; + prim_prog_cntr_nxt = prim_prog_cntr + 1; end end //END of C access - SET C valid MLDSA_SIGN_SET_C : begin set_c_valid = 1; - prog_cntr_nxt = prog_cntr + 1; + prim_prog_cntr_nxt = prim_prog_cntr + 1; end MLDSA_SIGN_E : begin // end of challenge generation //increment kappa value update_kappa = 1; //restart challenge generation - prog_cntr_nxt = MLDSA_SIGN_CHECK_Y_CLR; + prim_prog_cntr_nxt = MLDSA_SIGN_CHECK_Y_CLR; end MLDSA_VERIFY_E : begin // end of verify flow verify_done = 1; end default : begin if (subcomponent_busy) begin //Stalled until sub-component is done - prog_cntr_nxt = prog_cntr; + prim_prog_cntr_nxt = prim_prog_cntr; end else begin - prog_cntr_nxt = prog_cntr + 1; + prim_prog_cntr_nxt = prim_prog_cntr + 1; end end endcase end -//Controller instruction decode - drives sampler and primary ntt +//Controller prim_instr decode - drives sampler and primary ntt //FIXME latch the sampler mode here always_comb begin sampler_mode_o = MLDSA_SAMPLER_NONE; - if (instr.opcode.sampler_en) begin - if (instr.opcode.ntt_en & instr.opcode.mode.ntt_mode inside {MLDSA_PWM_SMPL, MLDSA_PWM_ACCUM_SMPL}) begin + if (prim_instr.opcode.sampler_en) begin + if (prim_instr.opcode.ntt_en & prim_instr.opcode.mode.ntt_mode inside {MLDSA_PWM_SMPL, MLDSA_PWM_ACCUM_SMPL}) begin sampler_mode_o = MLDSA_REJ_SAMPLER; end else begin - sampler_mode_o = instr.opcode.mode.sampler_mode; + sampler_mode_o = prim_instr.opcode.mode.sampler_mode; end - end else if (instr.opcode.keccak_en) begin - sampler_mode_o = instr.opcode.mode.sampler_mode; - end else if (instr.opcode.aux_en) begin - if (instr.opcode.mode.aux_mode == MLDSA_DECOMP) begin + end else if (prim_instr.opcode.keccak_en) begin + sampler_mode_o = prim_instr.opcode.mode.sampler_mode; + end else if (prim_instr.opcode.aux_en) begin + if (prim_instr.opcode.mode.aux_mode == MLDSA_DECOMP) begin sampler_mode_o = MLDSA_SHAKE256; end end @@ -1148,37 +1146,37 @@ always_comb mldsa_privkey_lock = '0; always_comb begin ntt_mode_o[0] = MLDSA_NTT_NONE; - if (instr.opcode.ntt_en) begin - ntt_mode_o[0] = instr.opcode.mode.ntt_mode; + if (prim_instr.opcode.ntt_en) begin + ntt_mode_o[0] = prim_instr.opcode.mode.ntt_mode; end end always_comb sampler_src_offset = {4'b0, msg_cnt}; //fixme //FIXME one interface here? - always_comb ntt_mem_base_addr_o[0] = '{src_base_addr:instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], - interim_base_addr:instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], - dest_base_addr:instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; + always_comb ntt_mem_base_addr_o[0] = '{src_base_addr:prim_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], + interim_base_addr:prim_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], + dest_base_addr:prim_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; - always_comb pwo_mem_base_addr_o[0] = '{pw_base_addr_b:instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src - pw_base_addr_a:instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src or sampler src - pw_base_addr_c:instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; + always_comb pwo_mem_base_addr_o[0] = '{pw_base_addr_b:prim_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src + pw_base_addr_a:prim_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src or sampler src + pw_base_addr_c:prim_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; - always_comb dest_base_addr_o = instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]; - always_comb aux_src0_base_addr_o[0] = instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0]; - always_comb aux_src1_base_addr_o[0] = instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0]; - always_comb aux_dest_base_addr_o[0] = instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]; + always_comb dest_base_addr_o = prim_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]; + always_comb aux_src0_base_addr_o[0] = prim_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0]; + always_comb aux_src1_base_addr_o[0] = prim_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0]; + always_comb aux_dest_base_addr_o[0] = prim_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]; //determine the number of bytes in the last message //operand 2 contains the length of the message being fed to sha3 //shift a zero into the strobe for each byte, and invert to get the valid bytes -always_comb last_msg_strobe = ~(MsgStrbW'('1) << instr.length[$clog2(MsgStrbW)-1:0]); +always_comb last_msg_strobe = ~(MsgStrbW'('1) << prim_instr.length[$clog2(MsgStrbW)-1:0]); always_comb msg_hold = msg_valid_o & ~msg_rdy_i; //Done when msg count is equal to length //length is in bytes - compare against MSB from strobe width gets us the length in msg interface chunks -always_comb msg_done = msg_cnt >= instr.length[MLDSA_OPR_WIDTH-1:$clog2(MsgStrbW)]; +always_comb msg_done = msg_cnt >= prim_instr.length[MLDSA_OPR_WIDTH-1:$clog2(MsgStrbW)]; always_ff @(posedge clk or negedge rst_b) begin if (!rst_b) begin @@ -1201,7 +1199,7 @@ always_ff @(posedge clk or negedge rst_b) begin end end -always_comb decompose_mode_o = instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_USEHINT); +always_comb decompose_mode_o = prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_USEHINT); //State logic always_comb begin : primary_ctrl_fsm_out_combo @@ -1225,10 +1223,10 @@ always_comb begin : primary_ctrl_fsm_out_combo unique case (ctrl_fsm_ps) MLDSA_CTRL_IDLE: begin //load keccak data to SIPO - if (instr.opcode.keccak_en) + if (prim_instr.opcode.keccak_en) ctrl_fsm_ns = MLDSA_CTRL_SHA3_START; //start sampler flow, data already driven to SIPO - else if (instr.opcode.sampler_en | instr.opcode.ntt_en | instr.opcode.aux_en) + else if (prim_instr.opcode.sampler_en | prim_instr.opcode.ntt_en | prim_instr.opcode.aux_en) ctrl_fsm_ns = MLDSA_CTRL_FUNC_START; end MLDSA_CTRL_SHA3_START: begin @@ -1242,45 +1240,45 @@ always_comb begin : primary_ctrl_fsm_out_combo end MLDSA_CTRL_MSG_LOAD: begin msg_valid = 1; - sampler_src = instr.operand1; - sampler_imm = instr.imm; + sampler_src = prim_instr.operand1; + sampler_imm = prim_instr.imm; if (msg_done) begin - if (instr.opcode.sampler_en) ctrl_fsm_ns = MLDSA_CTRL_FUNC_START; + if (prim_instr.opcode.sampler_en) ctrl_fsm_ns = MLDSA_CTRL_FUNC_START; else ctrl_fsm_ns = MLDSA_CTRL_MSG_WAIT; end end MLDSA_CTRL_MSG_WAIT: begin //load another message - if (instr.opcode.keccak_en) ctrl_fsm_ns = MLDSA_CTRL_MSG_LOAD; + if (prim_instr.opcode.keccak_en) ctrl_fsm_ns = MLDSA_CTRL_MSG_LOAD; //kick off the sampler - else if (instr.opcode.sampler_en | instr.opcode.aux_en | instr.opcode.ntt_en) ctrl_fsm_ns = MLDSA_CTRL_FUNC_START; + else if (prim_instr.opcode.sampler_en | prim_instr.opcode.aux_en | prim_instr.opcode.ntt_en) ctrl_fsm_ns = MLDSA_CTRL_FUNC_START; else ctrl_fsm_ns = MLDSA_CTRL_MSG_LOAD; end MLDSA_CTRL_FUNC_START: begin ctrl_fsm_ns = MLDSA_CTRL_DONE; - sampler_start_o = instr.opcode.sampler_en; - ntt_enable_o[0] = instr.opcode.ntt_en; - if (instr.opcode.aux_en) begin - power2round_enable_o = (instr.opcode.mode.aux_mode == MLDSA_PWR2RND); - decompose_enable_o = (instr.opcode.mode.aux_mode inside {MLDSA_DECOMP,MLDSA_USEHINT}); - skencode_enable_o = (instr.opcode.mode.aux_mode == MLDSA_SKENCODE); - pkdecode_enable_o = (instr.opcode.mode.aux_mode == MLDSA_PKDECODE); - sigdecode_h_enable_o = (instr.opcode.mode.aux_mode == MLDSA_SIGDEC_H); - sigdecode_z_enable_o = (instr.opcode.mode.aux_mode == MLDSA_SIGDEC_Z); - normcheck_enable[0] = (instr.opcode.mode.aux_mode == MLDSA_NORMCHK); - lfsr_enable_o = (instr.opcode.mode.aux_mode == MLDSA_LFSR); + sampler_start_o = prim_instr.opcode.sampler_en; + ntt_enable_o[0] = prim_instr.opcode.ntt_en; + if (prim_instr.opcode.aux_en) begin + power2round_enable_o = (prim_instr.opcode.mode.aux_mode == MLDSA_PWR2RND); + decompose_enable_o = (prim_instr.opcode.mode.aux_mode inside {MLDSA_DECOMP,MLDSA_USEHINT}); + skencode_enable_o = (prim_instr.opcode.mode.aux_mode == MLDSA_SKENCODE); + pkdecode_enable_o = (prim_instr.opcode.mode.aux_mode == MLDSA_PKDECODE); + sigdecode_h_enable_o = (prim_instr.opcode.mode.aux_mode == MLDSA_SIGDEC_H); + sigdecode_z_enable_o = (prim_instr.opcode.mode.aux_mode == MLDSA_SIGDEC_Z); + normcheck_enable[0] = (prim_instr.opcode.mode.aux_mode == MLDSA_NORMCHK); + lfsr_enable_o = (prim_instr.opcode.mode.aux_mode == MLDSA_LFSR); end end MLDSA_CTRL_DONE: begin - if ((~sampler_busy_i & ~ntt_busy_i[0] & ~instr.opcode.aux_en) | - (instr.opcode.aux_en & (instr.opcode.mode.aux_mode inside {MLDSA_DECOMP,MLDSA_USEHINT}) & decompose_done_i) | - (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_PWR2RND) & power2round_done_i) | - (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_SKENCODE) & skencode_done_i) | - (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_PKDECODE) & pkdecode_done_i) | - (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_SIGDEC_H) & sigdecode_h_done_i) | - (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_SIGDEC_Z) & sigdecode_z_done_i) | - (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_NORMCHK) & normcheck_done_i) | - (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_LFSR)) ) begin + if ((~sampler_busy_i & ~ntt_busy_i[0] & ~prim_instr.opcode.aux_en) | + (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode inside {MLDSA_DECOMP,MLDSA_USEHINT}) & decompose_done_i) | + (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_PWR2RND) & power2round_done_i) | + (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_SKENCODE) & skencode_done_i) | + (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_PKDECODE) & pkdecode_done_i) | + (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_SIGDEC_H) & sigdecode_h_done_i) | + (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_SIGDEC_Z) & sigdecode_z_done_i) | + (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_NORMCHK) & normcheck_done_i) | + (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_LFSR)) ) begin ctrl_fsm_ns = MLDSA_CTRL_IDLE; @@ -1310,77 +1308,79 @@ mldsa_seq_prim mldsa_seq_prim_inst .rst_b(rst_b), .zeroize(zeroize), - .en_i(seq_en), - .addr_i(prog_cntr_nxt), - .data_o(instr) + .en_i(prim_seq_en), + .addr_i(prim_prog_cntr_nxt), + .data_o(prim_instr) ); //Second sequencer for simultaneous signing operations - - //FIXME need to optimize sequencer - //Should be able to implement opcode to load next keccak data while sampler or ntt is running - //requires a granularity of busy that is context aware of the current and next steps or - //need a way to advance to the next step and understand when it can proceed and when it can't always_comb sign_subcomponent_busy = !(sign_ctrl_fsm_ns inside {MLDSA_CTRL_IDLE}) | ntt_busy_i[1]; //program counter always_ff @(posedge clk or negedge rst_b) begin if(!rst_b) begin - sign_prog_cntr <= MLDSA_RESET; + sec_prog_cntr <= MLDSA_RESET; end else if(zeroize) begin - sign_prog_cntr <= MLDSA_RESET; + sec_prog_cntr <= MLDSA_RESET; end else begin if (error_flag_edge) begin - sign_prog_cntr <= MLDSA_ERROR; + sec_prog_cntr <= MLDSA_ERROR; end else begin - sign_prog_cntr <= sign_prog_cntr_nxt; + sec_prog_cntr <= sec_prog_cntr_nxt; end end end //subroutine decode always_comb begin - sign_prog_cntr_nxt = MLDSA_RESET; + sec_prog_cntr_nxt = MLDSA_RESET; clear_c_valid = 0; clear_y_valid = 0; clear_w0_valid = 0; set_signature_valid = 0; signature_done = 0; - unique case (sign_prog_cntr) inside + sec_seq_en = 1; + + unique case (sec_prog_cntr) inside MLDSA_RESET : begin // Waiting for new valid command unique case (cmd_reg) inside MLDSA_KEYGEN : begin // keygen - sign_prog_cntr_nxt = MLDSA_RESET; + sec_prog_cntr_nxt = MLDSA_RESET; + sec_seq_en = 0; end MLDSA_SIGN : begin // signing - sign_prog_cntr_nxt = MLDSA_SIGN_INIT_S; + sec_prog_cntr_nxt = MLDSA_SIGN_INIT_S; + sec_seq_en = 1; end MLDSA_VERIFY : begin // verifying - sign_prog_cntr_nxt = MLDSA_RESET; + sec_prog_cntr_nxt = MLDSA_RESET; + sec_seq_en = 0; end MLDSA_KEYGEN_SIGN : begin // KEYGEN + SIGNING - sign_prog_cntr_nxt = MLDSA_RESET; + sec_prog_cntr_nxt = MLDSA_RESET; + sec_seq_en = 0; end default : begin - sign_prog_cntr_nxt = MLDSA_RESET; + sec_prog_cntr_nxt = MLDSA_RESET; + sec_seq_en = 0; end endcase - if (keygen_signing_process & (prog_cntr == MLDSA_KG_JUMP_SIGN)) begin - sign_prog_cntr_nxt = MLDSA_SIGN_INIT_S; + if (keygen_signing_process & (prim_prog_cntr == MLDSA_KG_JUMP_SIGN)) begin + sec_prog_cntr_nxt = MLDSA_SIGN_INIT_S; end end //START of C access - check if C is valid MLDSA_SIGN_CHECK_C_VLD : begin if (c_valid) begin set_signature_valid = 1; - sign_prog_cntr_nxt = sign_prog_cntr + 1; + sec_prog_cntr_nxt = sec_prog_cntr + 1; end else begin - sign_prog_cntr_nxt = sign_prog_cntr; + sec_prog_cntr_nxt = sec_prog_cntr; end end //END of C access - SET C valid @@ -1388,42 +1388,42 @@ mldsa_seq_prim mldsa_seq_prim_inst //FIXME another place to consider odd/even counter on challenge/validity MLDSA_SIGN_CLEAR_C : begin clear_c_valid = 1; - sign_prog_cntr_nxt = MLDSA_SIGN_CHECK_C_VLD; + sec_prog_cntr_nxt = MLDSA_SIGN_CHECK_C_VLD; end //START of Y access - check if Y is valid MLDSA_SIGN_CHECK_Y_VLD : begin if (y_valid & w0_valid) begin - sign_prog_cntr_nxt = sign_prog_cntr + 1; + sec_prog_cntr_nxt = sec_prog_cntr + 1; end else begin - sign_prog_cntr_nxt = sign_prog_cntr; + sec_prog_cntr_nxt = sec_prog_cntr; end end //END of Y access - SET Y valid MLDSA_SIGN_CLEAR_Y : begin clear_y_valid = 1; - sign_prog_cntr_nxt = sign_prog_cntr + 1; + sec_prog_cntr_nxt = sec_prog_cntr + 1; end //START of W0 access - check if W0 is valid MLDSA_SIGN_CHECK_W0_VLD : begin if (w0_valid) begin - sign_prog_cntr_nxt = sign_prog_cntr + 1; + sec_prog_cntr_nxt = sec_prog_cntr + 1; end else begin - sign_prog_cntr_nxt = sign_prog_cntr ; + sec_prog_cntr_nxt = sec_prog_cntr ; end end //END of W0 access - SET W0 valid MLDSA_SIGN_CLEAR_W0 : begin clear_w0_valid = 1; - sign_prog_cntr_nxt = sign_prog_cntr + 1; + sec_prog_cntr_nxt = sec_prog_cntr + 1; end MLDSA_SIGN_GEN_S : begin // end of validity checks if (signature_valid) begin - sign_prog_cntr_nxt = sign_prog_cntr + 2; //Jump to encode + sec_prog_cntr_nxt = MLDSA_SIGN_GEN_E; //Jump to end end else begin //restart - sign_prog_cntr_nxt = sign_prog_cntr + 1; + sec_prog_cntr_nxt = MLDSA_SIGN_CLEAR_C; end end MLDSA_SIGN_GEN_E : begin // Successful signature generation @@ -1431,10 +1431,10 @@ mldsa_seq_prim mldsa_seq_prim_inst end default : begin if (sign_subcomponent_busy) begin //Stalled until sub-component is done - sign_prog_cntr_nxt = sign_prog_cntr; + sec_prog_cntr_nxt = sec_prog_cntr; end else begin - sign_prog_cntr_nxt = sign_prog_cntr + 1; + sec_prog_cntr_nxt = sec_prog_cntr + 1; end end endcase @@ -1443,28 +1443,28 @@ mldsa_seq_prim mldsa_seq_prim_inst //instruciton decode - drives secondary ntt always_comb begin ntt_mode_o[1] = MLDSA_NTT_NONE; - if (sign_instr.opcode.ntt_en) begin - ntt_mode_o[1] = sign_instr.opcode.mode.ntt_mode; + if (sec_instr.opcode.ntt_en) begin + ntt_mode_o[1] = sec_instr.opcode.mode.ntt_mode; end end //FIXME one interface here? - always_comb ntt_mem_base_addr_o[1] = '{src_base_addr:sign_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], - interim_base_addr:sign_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], - dest_base_addr:sign_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; - - always_comb pwo_mem_base_addr_o[1] = '{pw_base_addr_b:sign_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src - pw_base_addr_a:sign_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src or sampler src - pw_base_addr_c:sign_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; - - always_comb aux_src0_base_addr_o[1] = sign_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0]; - always_comb aux_src1_base_addr_o[1] = sign_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0]; - always_comb aux_dest_base_addr_o[1] = sign_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]; - - always_comb normcheck_mode_o = (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_NORMCHK)) ? instr.imm[1:0] : - (sign_instr.opcode.aux_en & (sign_instr.opcode.mode.aux_mode == MLDSA_NORMCHK)) ? sign_instr.imm[1:0] : '0; - always_comb normcheck_src_addr_o = (instr.opcode.aux_en & (instr.opcode.mode.aux_mode == MLDSA_NORMCHK)) ? aux_src0_base_addr_o[0] : - (sign_instr.opcode.aux_en & (sign_instr.opcode.mode.aux_mode == MLDSA_NORMCHK)) ? aux_src0_base_addr_o[1] : '0; + always_comb ntt_mem_base_addr_o[1] = '{src_base_addr:sec_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], + interim_base_addr:sec_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], + dest_base_addr:sec_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; + + always_comb pwo_mem_base_addr_o[1] = '{pw_base_addr_b:sec_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src + pw_base_addr_a:sec_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src or sampler src + pw_base_addr_c:sec_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; + + always_comb aux_src0_base_addr_o[1] = sec_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0]; + always_comb aux_src1_base_addr_o[1] = sec_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0]; + always_comb aux_dest_base_addr_o[1] = sec_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]; + + always_comb normcheck_mode_o = (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_NORMCHK)) ? prim_instr.imm[1:0] : + (sec_instr.opcode.aux_en & (sec_instr.opcode.mode.aux_mode == MLDSA_NORMCHK)) ? sec_instr.imm[1:0] : '0; + always_comb normcheck_src_addr_o = (prim_instr.opcode.aux_en & (prim_instr.opcode.mode.aux_mode == MLDSA_NORMCHK)) ? aux_src0_base_addr_o[0] : + (sec_instr.opcode.aux_en & (sec_instr.opcode.mode.aux_mode == MLDSA_NORMCHK)) ? aux_src0_base_addr_o[1] : '0; always_comb normcheck_enable_o = |normcheck_enable; //State logic @@ -1479,27 +1479,27 @@ always_comb begin : secpondary_ctrl_fsm_out_combo unique case (sign_ctrl_fsm_ps) MLDSA_CTRL_IDLE: begin //Start function - if (sign_instr.opcode.sampler_en) + if (sec_instr.opcode.sampler_en) sign_ctrl_fsm_ns = MLDSA_CTRL_ERROR; - if (sign_instr.opcode.ntt_en | sign_instr.opcode.aux_en) + if (sec_instr.opcode.ntt_en | sec_instr.opcode.aux_en) sign_ctrl_fsm_ns = MLDSA_CTRL_FUNC_START; end MLDSA_CTRL_FUNC_START: begin sign_ctrl_fsm_ns = MLDSA_CTRL_DONE; - ntt_enable_o[1] = sign_instr.opcode.ntt_en; - if (sign_instr.opcode.aux_en) begin - skdecode_enable_o = (sign_instr.opcode.mode.aux_mode == MLDSA_SKDECODE); - makehint_enable_o = (sign_instr.opcode.mode.aux_mode == MLDSA_MAKEHINT); - normcheck_enable[1] = (sign_instr.opcode.mode.aux_mode == MLDSA_NORMCHK); - sigencode_enable_o = (sign_instr.opcode.mode.aux_mode == MLDSA_SIGENC); + ntt_enable_o[1] = sec_instr.opcode.ntt_en; + if (sec_instr.opcode.aux_en) begin + skdecode_enable_o = (sec_instr.opcode.mode.aux_mode == MLDSA_SKDECODE); + makehint_enable_o = (sec_instr.opcode.mode.aux_mode == MLDSA_MAKEHINT); + normcheck_enable[1] = (sec_instr.opcode.mode.aux_mode == MLDSA_NORMCHK); + sigencode_enable_o = (sec_instr.opcode.mode.aux_mode == MLDSA_SIGENC); end end MLDSA_CTRL_DONE: begin - if ((sign_instr.opcode.ntt_en & ~ntt_busy_i[1]) | - (sign_instr.opcode.aux_en & (sign_instr.opcode.mode.aux_mode == MLDSA_SKDECODE) & skdecode_done_i) | - (sign_instr.opcode.aux_en & (sign_instr.opcode.mode.aux_mode == MLDSA_MAKEHINT) & makehint_done_i) | - (sign_instr.opcode.aux_en & (sign_instr.opcode.mode.aux_mode == MLDSA_NORMCHK) & normcheck_done_i) | - (sign_instr.opcode.aux_en & (sign_instr.opcode.mode.aux_mode == MLDSA_SIGENC) & sigencode_done_i) ) begin + if ((sec_instr.opcode.ntt_en & ~ntt_busy_i[1]) | + (sec_instr.opcode.aux_en & (sec_instr.opcode.mode.aux_mode == MLDSA_SKDECODE) & skdecode_done_i) | + (sec_instr.opcode.aux_en & (sec_instr.opcode.mode.aux_mode == MLDSA_MAKEHINT) & makehint_done_i) | + (sec_instr.opcode.aux_en & (sec_instr.opcode.mode.aux_mode == MLDSA_NORMCHK) & normcheck_done_i) | + (sec_instr.opcode.aux_en & (sec_instr.opcode.mode.aux_mode == MLDSA_SIGENC) & sigencode_done_i) ) begin sign_ctrl_fsm_ns = MLDSA_CTRL_IDLE; end end @@ -1527,9 +1527,9 @@ mldsa_seq_sec mldsa_seq_sec_inst .rst_b(rst_b), .zeroize(zeroize), - .en_i(seq_en), - .addr_i(sign_prog_cntr_nxt), - .data_o(sign_instr) + .en_i(sec_seq_en), + .addr_i(sec_prog_cntr_nxt), + .data_o(sec_instr) ); @@ -1588,7 +1588,7 @@ mldsa_seq_sec mldsa_seq_sec_inst end else begin if (seq_en) begin - unique case(sign_prog_cntr_nxt) + unique case(sec_prog_cntr_nxt) MLDSA_SIGN_VALID_S : begin //NTT(C) NTT_raw_signal <= 'h1; PWM_raw_signal <= 'h0; diff --git a/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv b/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv index d97e7d7..cbbe1e0 100644 --- a/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv +++ b/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv @@ -36,6 +36,7 @@ package mldsa_ctrl_pkg; localparam MSG_NUM_DWORDS = 16; localparam PRIVKEY_NUM_DWORDS = 1224; localparam PRIVKEY_REG_NUM_DWORDS = 32; + localparam PRIVKEY_REG_RHO_NUM_DWORDS = 8; localparam PRIVKEY_MEM_NUM_DWORDS = PRIVKEY_NUM_DWORDS - PRIVKEY_REG_NUM_DWORDS; localparam SIGN_RND_NUM_DWORDS = 8; localparam PUBKEY_NUM_DWORDS = 648; @@ -76,7 +77,7 @@ package mldsa_ctrl_pkg; localparam PK_ADDR_W = $clog2(PUBKEY_NUM_DWORDS); localparam PK_MEM_ADDR_W = $clog2(PK_MEM_DEPTH); localparam PK_MEM_OFFSET_W = $clog2(PK_MEM_DATA_W/32); - localparam PK_RHO_REG_ADDR_W = $clog2(8); //fixme + localparam PK_RHO_REG_ADDR_W = $clog2(PRIVKEY_REG_RHO_NUM_DWORDS); typedef struct packed { @@ -437,7 +438,7 @@ package mldsa_ctrl_pkg; localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CLEAR_W0 = MLDSA_SIGN_VALID_S + 103; localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_GEN_S = MLDSA_SIGN_VALID_S + 105; localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CLEAR_C = MLDSA_SIGN_GEN_S + 1; - localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_GEN_E = MLDSA_SIGN_GEN_S + 9; + localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_GEN_E = MLDSA_SIGN_GEN_S + 2; endpackage diff --git a/src/mldsa_top/rtl/mldsa_seq_prim.sv b/src/mldsa_top/rtl/mldsa_seq_prim.sv index 6fb4a5a..63ba99d 100644 --- a/src/mldsa_top/rtl/mldsa_seq_prim.sv +++ b/src/mldsa_top/rtl/mldsa_seq_prim.sv @@ -460,6 +460,9 @@ module mldsa_seq_prim default : data_o_rom <= '{opcode: MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; endcase end + else begin + data_o_rom <= '{opcode: MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + end end endmodule diff --git a/src/mldsa_top/rtl/mldsa_seq_sec.sv b/src/mldsa_top/rtl/mldsa_seq_sec.sv index ac9bae9..f38d8ee 100644 --- a/src/mldsa_top/rtl/mldsa_seq_sec.sv +++ b/src/mldsa_top/rtl/mldsa_seq_sec.sv @@ -216,6 +216,9 @@ module mldsa_seq_sec default : data_o_rom <= '{opcode: MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; endcase end + else begin + data_o_rom <= '{opcode: MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + end end endmodule \ No newline at end of file From aaeb31ac8ab8bd315134cecf1260c9a3fe0d7dac Mon Sep 17 00:00:00 2001 From: Nitsirks Date: Wed, 20 Nov 2024 09:27:01 -0800 Subject: [PATCH 19/23] adding mask/shuff enable to sequencer opcodes fixed issue where signing end took a few extra clocks renamed secondary sequencer logic --- src/mldsa_top/rtl/mldsa_ctrl.sv | 13 +++++- src/mldsa_top/rtl/mldsa_ctrl_pkg.sv | 63 +++++++++++++++-------------- src/mldsa_top/rtl/mldsa_top.sv | 18 ++++----- 3 files changed, 50 insertions(+), 44 deletions(-) diff --git a/src/mldsa_top/rtl/mldsa_ctrl.sv b/src/mldsa_top/rtl/mldsa_ctrl.sv index f782456..a85d1d2 100644 --- a/src/mldsa_top/rtl/mldsa_ctrl.sv +++ b/src/mldsa_top/rtl/mldsa_ctrl.sv @@ -67,6 +67,8 @@ module mldsa_ctrl output mldsa_ntt_mode_e [1:0] ntt_mode_o, output ntt_mem_addr_t [1:0] ntt_mem_base_addr_o, output pwo_mem_addr_t [1:0] pwo_mem_base_addr_o, + output logic [1:0] ntt_masking_en_o, + output logic [1:0] ntt_shuffling_en_o, input logic [1:0] ntt_busy_i, //aux interfaces @@ -1146,8 +1148,12 @@ always_comb mldsa_privkey_lock = '0; always_comb begin ntt_mode_o[0] = MLDSA_NTT_NONE; + ntt_masking_en_o[0] = 0; + ntt_shuffling_en_o[0] = 0; if (prim_instr.opcode.ntt_en) begin ntt_mode_o[0] = prim_instr.opcode.mode.ntt_mode; + ntt_masking_en_o[0] = prim_instr.opcode.masking_en; + ntt_shuffling_en_o[0] = prim_instr.opcode.shuffling_en; end end @@ -1354,7 +1360,6 @@ mldsa_seq_prim mldsa_seq_prim_inst end MLDSA_SIGN : begin // signing sec_prog_cntr_nxt = MLDSA_SIGN_INIT_S; - sec_seq_en = 1; end MLDSA_VERIFY : begin // verifying sec_prog_cntr_nxt = MLDSA_RESET; @@ -1362,7 +1367,6 @@ mldsa_seq_prim mldsa_seq_prim_inst end MLDSA_KEYGEN_SIGN : begin // KEYGEN + SIGNING sec_prog_cntr_nxt = MLDSA_RESET; - sec_seq_en = 0; end default : begin sec_prog_cntr_nxt = MLDSA_RESET; @@ -1371,6 +1375,7 @@ mldsa_seq_prim mldsa_seq_prim_inst endcase if (keygen_signing_process & (prim_prog_cntr == MLDSA_KG_JUMP_SIGN)) begin sec_prog_cntr_nxt = MLDSA_SIGN_INIT_S; + sec_seq_en = 1; end end //START of C access - check if C is valid @@ -1443,8 +1448,12 @@ mldsa_seq_prim mldsa_seq_prim_inst //instruciton decode - drives secondary ntt always_comb begin ntt_mode_o[1] = MLDSA_NTT_NONE; + ntt_masking_en_o[1] = 0; + ntt_shuffling_en_o[1] = 0; if (sec_instr.opcode.ntt_en) begin ntt_mode_o[1] = sec_instr.opcode.mode.ntt_mode; + ntt_masking_en_o[1] = sec_instr.opcode.masking_en; + ntt_shuffling_en_o[1] = sec_instr.opcode.shuffling_en; end end diff --git a/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv b/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv index cbbe1e0..6eaa399 100644 --- a/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv +++ b/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv @@ -174,7 +174,8 @@ package mldsa_ctrl_pkg; logic ntt_en; logic aux_en; mldsa_opcode_mode_u mode; - logic sca_en; + logic masking_en; + logic shuffling_en; } mldsa_opcode_t; typedef struct packed { @@ -187,39 +188,39 @@ package mldsa_ctrl_pkg; } mldsa_seq_instr_t; // MLDSA ISA - localparam mldsa_opcode_t MLDSA_UOP_NOP = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SAMPLER_NONE, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_SHAKE256 = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE256, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_SHAKE128 = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE128, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_REJB = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_REJ_BOUNDED, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_REJS_PWM = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWM_SMPL, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_REJS_PWMA = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWM_ACCUM_SMPL, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_SIB = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SAMPLE_IN_BALL, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_EXP_MASK = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_EXP_MASK, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_NTT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_NTT, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_INTT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_INTT, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_PWM = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWM, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_PWA = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWA, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_PWS = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWS, sca_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_NOP = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SAMPLER_NONE, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_SHAKE256 = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE256, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_SHAKE128 = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE128, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_REJB = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_REJ_BOUNDED, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_REJS_PWM = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWM_SMPL, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_REJS_PWMA = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWM_ACCUM_SMPL, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_SIB = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SAMPLE_IN_BALL, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_EXP_MASK = '{keccak_en: 1'b1, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_EXP_MASK, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_NTT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_NTT, masking_en:1'b0, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_INTT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_INTT, masking_en:1'b0, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_PWM = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWM, masking_en:1'b0, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_PWA = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWA, masking_en:1'b0, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_PWS = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWS, masking_en:1'b0, shuffling_en:1'b1}; //Load Keccak with data but don't run it yet - localparam mldsa_opcode_t MLDSA_UOP_LD_SHAKE256 = '{keccak_en: 1'b1, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE256, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_LD_SHAKE128 = '{keccak_en: 1'b1, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE128, sca_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_LD_SHAKE256 = '{keccak_en: 1'b1, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE256, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_LD_SHAKE128 = '{keccak_en: 1'b1, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE128, masking_en:1'b0, shuffling_en:1'b0}; //Run Keccak but don't load it - localparam mldsa_opcode_t MLDSA_UOP_RUN_SHAKE256 = '{keccak_en: 1'b0, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE256, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_RUN_SHAKE128 = '{keccak_en: 1'b0, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE128, sca_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_RUN_SHAKE256 = '{keccak_en: 1'b0, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE256, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_RUN_SHAKE128 = '{keccak_en: 1'b0, sampler_en:1'b1, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE128, masking_en:1'b0, shuffling_en:1'b0}; // Aux functions - localparam mldsa_opcode_t MLDSA_UOP_DECOMP = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_DECOMP, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_SKDECODE = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SKDECODE, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_SKENCODE = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SKENCODE, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_MAKEHINT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_MAKEHINT, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_NORMCHK = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_NORMCHK, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_SIGENCODE = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SIGENC, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_PKDECODE = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_PKDECODE, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_SIGDEC_H = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SIGDEC_H, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_SIGDEC_Z = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SIGDEC_Z, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_HINTSUM = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_HINTSUM, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_USEHINT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_USEHINT, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_PWR2RND = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_PWR2RND, sca_en:1'b0}; - localparam mldsa_opcode_t MLDSA_UOP_LFSR = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_LFSR, sca_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_DECOMP = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_DECOMP, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_SKDECODE = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SKDECODE, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_SKENCODE = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SKENCODE, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_MAKEHINT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_MAKEHINT, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_NORMCHK = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_NORMCHK, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_SIGENCODE = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SIGENC, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_PKDECODE = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_PKDECODE, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_SIGDEC_H = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SIGDEC_H, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_SIGDEC_Z = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_SIGDEC_Z, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_HINTSUM = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_HINTSUM, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_USEHINT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_USEHINT, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_PWR2RND = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_PWR2RND, masking_en:1'b0, shuffling_en:1'b0}; + localparam mldsa_opcode_t MLDSA_UOP_LFSR = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b1, mode:MLDSA_LFSR, masking_en:1'b0, shuffling_en:1'b0}; //Immediate encodings localparam [MLDSA_IMM_WIDTH-1:0] MLDSA_NORMCHK_Z = 'h0000; diff --git a/src/mldsa_top/rtl/mldsa_top.sv b/src/mldsa_top/rtl/mldsa_top.sv index 4e3b438..1022214 100644 --- a/src/mldsa_top/rtl/mldsa_top.sv +++ b/src/mldsa_top/rtl/mldsa_top.sv @@ -117,7 +117,8 @@ module mldsa_top logic [1:0][MLDSA_MEM_DATA_WIDTH-1:0] pwm_b_rd_data; logic [1:0] ntt_done; logic [1:0] ntt_busy; - logic [1:0] shuffle_en; + logic [1:0] ntt_shuffling_en; + logic [1:0] ntt_masking_en; mem_if_t w1_mem_wr_req; logic [3:0] w1_mem_wr_data; @@ -317,6 +318,8 @@ mldsa_ctrl mldsa_ctrl_inst .ntt_mode_o(ntt_mode), .ntt_mem_base_addr_o(ntt_mem_base_addr), .pwo_mem_base_addr_o(pwo_mem_base_addr), + .ntt_masking_en_o(ntt_masking_en), + .ntt_shuffling_en_o(ntt_shuffling_en), .ntt_busy_i(ntt_busy), //aux interface @@ -447,18 +450,15 @@ generate accumulate[g_inst] = '0; sampler_valid[g_inst] = 0; sampler_ntt_mode[g_inst] = 0; - shuffle_en[g_inst] = 0; //TODO: temp change for testing, remove and add to opcodes unique case (ntt_mode[g_inst]) inside MLDSA_NTT_NONE: begin end MLDSA_NTT: begin mode[g_inst] = ct; - shuffle_en[g_inst] = 1; end MLDSA_INTT: begin mode[g_inst] = gs; - shuffle_en[g_inst] = 1; end MLDSA_PWM_SMPL: begin mode[g_inst] = pwm; @@ -474,23 +474,19 @@ generate MLDSA_PWM: begin mode[g_inst] = pwm; sampler_valid[g_inst] = 1; - shuffle_en[g_inst] = 1; end MLDSA_PWM_ACCUM: begin mode[g_inst] = pwm; accumulate[g_inst] = 1; sampler_valid[g_inst] = 1; - shuffle_en[g_inst] = 1; end MLDSA_PWA: begin mode[g_inst] = pwa; sampler_valid[g_inst] = 1; - shuffle_en[g_inst] = 1; end MLDSA_PWS: begin mode[g_inst] = pws; sampler_valid[g_inst] = 1; - shuffle_en[g_inst] = 1; end default: begin end @@ -514,7 +510,7 @@ generate .pwo_mem_base_addr(pwo_mem_base_addr[g_inst]), .accumulate(accumulate[g_inst]), .sampler_valid(sampler_valid[g_inst]), - .shuffle_en(shuffle_en[g_inst]), + .shuffle_en(ntt_shuffling_en[g_inst]), .random(rand_bits[5:0]), //NTT mem IF .mem_wr_req(ntt_mem_wr_req[g_inst]), @@ -953,7 +949,7 @@ always_comb begin for (int bank = 0; bank < 2; bank++) begin ntt_mem_re0_bank[0][bank] = (ntt_mem_rd_req[0].rd_wr_en == RW_READ) & (ntt_mem_rd_req[0].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]) & (ntt_mem_rd_req[0].addr[0] == bank); pwo_a_mem_re0_bank[0][bank] = (pwm_a_rd_req[0].rd_wr_en == RW_READ) & (pwm_a_rd_req[0].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]) & (pwm_a_rd_req[0].addr[0] == bank); - pwo_b_mem_re0_bank[0][bank] = (shuffle_en[0] ? ~sampler_ntt_dv_f : ~sampler_ntt_dv) & (pwm_b_rd_req[0].rd_wr_en == RW_READ) & (pwm_b_rd_req[0].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]) & (pwm_b_rd_req[0].addr[0] == bank); + pwo_b_mem_re0_bank[0][bank] = (ntt_shuffling_en[0] ? ~sampler_ntt_dv_f : ~sampler_ntt_dv) & (pwm_b_rd_req[0].rd_wr_en == RW_READ) & (pwm_b_rd_req[0].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]) & (pwm_b_rd_req[0].addr[0] == bank); ntt_mem_re0_bank[1][bank] = (ntt_mem_rd_req[1].rd_wr_en == RW_READ) & (ntt_mem_rd_req[1].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]) & (ntt_mem_rd_req[1].addr[0] == bank); pwo_a_mem_re0_bank[1][bank] = (pwm_a_rd_req[1].rd_wr_en == RW_READ) & (pwm_a_rd_req[1].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]) & (pwm_a_rd_req[1].addr[0] == bank); @@ -987,7 +983,7 @@ always_comb begin end else begin ntt_mem_re[0][i] = (ntt_mem_rd_req[0].rd_wr_en == RW_READ) & (ntt_mem_rd_req[0].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]); pwo_a_mem_re[0][i] = (pwm_a_rd_req[0].rd_wr_en == RW_READ) & (pwm_a_rd_req[0].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]); - pwo_b_mem_re[0][i] = (shuffle_en[0] ? ~sampler_ntt_dv_f : ~sampler_ntt_dv) & (pwm_b_rd_req[0].rd_wr_en == RW_READ) & (pwm_b_rd_req[0].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]); + pwo_b_mem_re[0][i] = (ntt_shuffling_en[0] ? ~sampler_ntt_dv_f : ~sampler_ntt_dv) & (pwm_b_rd_req[0].rd_wr_en == RW_READ) & (pwm_b_rd_req[0].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]); ntt_mem_re[1][i] = (ntt_mem_rd_req[1].rd_wr_en == RW_READ) & (ntt_mem_rd_req[1].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]); pwo_a_mem_re[1][i] = (pwm_a_rd_req[1].rd_wr_en == RW_READ) & (pwm_a_rd_req[1].addr[MLDSA_MEM_ADDR_WIDTH-1:MLDSA_MEM_ADDR_WIDTH-3] == i[2:0]); From 451c26111336e615c8c7b60f04a576e172f93230 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Wed, 20 Nov 2024 09:55:30 -0800 Subject: [PATCH 20/23] Clean up some commented out code --- .../rtl/abr_masked_N_bit_mult_two_share.sv | 18 +- src/mldsa_top/rtl/mldsa_config_defines.svh | 1 + src/mldsa_top/rtl/mldsa_top.sv | 11 +- .../{maksed_gadgets.py => masked_gadgets.py} | 0 src/ntt_top/Model/testForMasking.py | 2 +- src/ntt_top/config/compile.yml | 12 + src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv | 102 +++-- src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv | 16 +- src/ntt_top/rtl/ntt_masked_BFU_mult.sv | 6 +- src/ntt_top/rtl/ntt_masked_butterfly1x2.sv | 7 +- src/ntt_top/rtl/ntt_masked_gs_butterfly.sv | 2 +- src/ntt_top/tb/ntt_top_tb.sv | 347 ------------------ 12 files changed, 75 insertions(+), 449 deletions(-) rename src/ntt_top/Model/{maksed_gadgets.py => masked_gadgets.py} (100%) diff --git a/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv b/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv index ef281a8..d91478a 100644 --- a/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv +++ b/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv @@ -36,8 +36,8 @@ input wire rst_n, // Active low reset signal input wire zeroize, // Zeroize signal input wire [WIDTH-1:0] random, // Intermediate randomness - input wire [1:0][WIDTH-1:0] x, // WIDTH-bit arithmetic shares operand x - input wire [1:0][WIDTH-1:0] y, // WIDTH-bit arithmetic shares operand y + input wire [1:0][WIDTH-1:0] x, // WIDTH-bit arithmetic shares operand x + input wire [1:0][WIDTH-1:0] y, // WIDTH-bit arithmetic shares operand y output logic [1:0] z [WIDTH-1:0] // WIDTH-bit arithmetic shares output z ); @@ -47,20 +47,6 @@ logic [WIDTH-1:0] final_res [1:0]; logic [WIDTH-1:0] x0, x1, y0, y1; - // Format organization stage - // always_comb begin - // // x0 = x[0]; - // // x1 = x[1]; - // // y0 = y[0]; - // // y1 = y[1]; - // for (int i = 0; i < WIDTH; i++) begin - // x0[i] = x[i][0]; - // x1[i] = x[i][1]; - // y0[i] = y[i][0]; - // y1[i] = y[i][1]; - // end - // end - // Calculation stage always_comb begin calculation[0] = WIDTH'(x[0] * y[0]); // Multiplication of the first share x and first share y diff --git a/src/mldsa_top/rtl/mldsa_config_defines.svh b/src/mldsa_top/rtl/mldsa_config_defines.svh index 8d9932f..80e5393 100644 --- a/src/mldsa_top/rtl/mldsa_config_defines.svh +++ b/src/mldsa_top/rtl/mldsa_config_defines.svh @@ -18,6 +18,7 @@ `include "abr_sva.svh" // `define RV_FPGA_OPTIMIZE // `define RV_FPGA_SCA + `define MLDSA_MASKING `define ABR_ICG abr_clk_gate diff --git a/src/mldsa_top/rtl/mldsa_top.sv b/src/mldsa_top/rtl/mldsa_top.sv index cff9cd5..a32467e 100644 --- a/src/mldsa_top/rtl/mldsa_top.sv +++ b/src/mldsa_top/rtl/mldsa_top.sv @@ -207,6 +207,7 @@ module mldsa_top logic lfsr_enable; logic [1:0][LFSR_W-1:0] lfsr_seed; logic [RND_W-1:0] rand_bits; + logic [RND_W-7:0] ntt_rand_bits; //gasket to assemble reg requests logic mldsa_reg_dv; @@ -222,6 +223,12 @@ module mldsa_top mldsa_reg__in_t mldsa_reg_hwif_in; mldsa_reg__out_t mldsa_reg_hwif_out; + `ifdef MLDSA_MASKING + assign ntt_rand_bits = rand_bits[RND_W-1:6]; + `else + assign ntt_rand_bits = (RND_W-6)'(0); + `endif + abr_ahb_slv_sif #( .AHB_ADDR_WIDTH(AHB_ADDR_WIDTH), .AHB_DATA_WIDTH(AHB_DATA_WIDTH), @@ -449,7 +456,7 @@ generate sampler_valid[g_inst] = 0; sampler_ntt_mode[g_inst] = 0; shuffle_en[g_inst] = 0; //TODO: temp change for testing, remove and add to opcodes - ntt_random_en[g_inst] = 0; + ntt_random_en[g_inst] = 0; //Turn off random in NTT for all ops except PWM, INTT unique case (ntt_mode[g_inst]) inside MLDSA_NTT_NONE: begin @@ -524,7 +531,7 @@ generate .shuffle_en(shuffle_en[g_inst]), .random(rand_bits[5:0]), .masking_en(1'b0), - .rnd_i(rand_bits[RND_W-1:6] /*& {(RND_W-6){ntt_busy[g_inst]}}*/ & {(RND_W-6){ntt_random_en[g_inst]}}), //('h0), //rand_bits[RND_W-1:6]), + .rnd_i(ntt_rand_bits & {(RND_W-6){ntt_random_en[g_inst]}}), //NTT mem IF .mem_wr_req(ntt_mem_wr_req[g_inst]), .mem_rd_req(ntt_mem_rd_req[g_inst]), diff --git a/src/ntt_top/Model/maksed_gadgets.py b/src/ntt_top/Model/masked_gadgets.py similarity index 100% rename from src/ntt_top/Model/maksed_gadgets.py rename to src/ntt_top/Model/masked_gadgets.py diff --git a/src/ntt_top/Model/testForMasking.py b/src/ntt_top/Model/testForMasking.py index 225eec7..393277e 100644 --- a/src/ntt_top/Model/testForMasking.py +++ b/src/ntt_top/Model/testForMasking.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from maksed_gadgets import * +from masked_gadgets import * def test_one_share_mult(numTest = 10): diff --git a/src/ntt_top/config/compile.yml b/src/ntt_top/config/compile.yml index 5215523..b3e1140 100755 --- a/src/ntt_top/config/compile.yml +++ b/src/ntt_top/config/compile.yml @@ -113,6 +113,18 @@ targets: - $COMPILE_ROOT/tb/ntt_masked_mult_redux46_auto_tb.sv tops: [ntt_masked_mult_redux46_auto_tb] +--- +provides: [ntt_top_masking_tb] +schema_version: 2.4.0 +requires: + - ntt_top +targets: + rtl: + directories: [$COMPILE_ROOT/tb] + files: + - $COMPILE_ROOT/tb/ntt_top_masking_tb.sv + tops: [ntt_top_masking_tb] + --- provides: [ntt_utb] schema_version: 2.4.0 diff --git a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv index 11331dc..25fbe33 100644 --- a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv +++ b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv @@ -161,11 +161,6 @@ always_comb begin u11 = hybrid_pw_uvw_i.u3_i; v11 = hybrid_pw_uvw_i.v3_i; w11 = hybrid_pw_uvw_i.w3_i; - - // twiddle_w00 = hybrid_pw_uvw_i.twiddle_w0_i; - // twiddle_w01 = hybrid_pw_uvw_i.twiddle_w1_i; - // twiddle_w10 = hybrid_pw_uvw_i.twiddle_w2_i; - // twiddle_w11 = hybrid_pw_uvw_i.twiddle_w3_i; end else begin //Only applies to unmasked ops since in masking, intt receives inputs from pwm and not from the API u00 = uvw_i.u00_i; @@ -189,12 +184,13 @@ end //Split into shares always_comb begin //TODO: check randomness with Emre + //TODO: add flops here (input side) //Split u inputs if (masking_en) begin - u00_share[0] = /*uvw_i.u00_i*/WIDTH'(u00) - rnd_i[0]; + u00_share[0] = WIDTH'(u00) - rnd_i[0]; u00_share[1] = rnd_i[0]; - u01_share[0] = /*uvw_i.u01_i*/WIDTH'(u01) - rnd_i[1]; + u01_share[0] = WIDTH'(u01) - rnd_i[1]; u01_share[1] = rnd_i[1]; u10_share[0] = WIDTH'(u10) - rnd_i[0]; @@ -204,10 +200,10 @@ always_comb begin u11_share[1] = rnd_i[0]; //Split v inputs - v00_share[0] = /*uvw_i.v00_i*/WIDTH'(v00) - rnd_i[2]; + v00_share[0] = WIDTH'(v00) - rnd_i[2]; v00_share[1] = rnd_i[2]; - v01_share[0] = /*uvw_i.v01_i*/WIDTH'(v01) - rnd_i[3]; + v01_share[0] = WIDTH'(v01) - rnd_i[3]; v01_share[1] = rnd_i[3]; v10_share[0] = WIDTH'(v10) - rnd_i[2]; @@ -217,10 +213,10 @@ always_comb begin v11_share[1] = rnd_i[2]; //Split w inputs - w00_share[0] = /*uvw_i.w00_i*/WIDTH'(w00) - rnd_i[4]; + w00_share[0] = WIDTH'(w00) - rnd_i[4]; w00_share[1] = rnd_i[4]; - w01_share[0] = /*uvw_i.w01_i*/WIDTH'(w01) - rnd_i[0]; + w01_share[0] = WIDTH'(w01) - rnd_i[0]; w01_share[1] = rnd_i[0]; w10_share[0] = WIDTH'(w10) - rnd_i[1]; @@ -229,12 +225,6 @@ always_comb begin w11_share[0] = WIDTH'(w11) - rnd_i[2]; w11_share[1] = rnd_i[2]; - // w10_reg_share[0] = w10_reg[0] - rnd_i[1]; - // w10_reg_share[1] = rnd_i[1]; - - // w11_reg_share[0] = w11_reg[0] - rnd_i[2]; - // w11_reg_share[1] = rnd_i[2]; - twiddle_w00_share[0] = WIDTH'(masked_w00_reg[0]) - rnd_i[0]; twiddle_w00_share[1] = rnd_i[0]; @@ -243,61 +233,54 @@ always_comb begin end else begin - u00_share[0] = 'h0; - u00_share[1] = 'h0; - - u01_share[0] = 'h0; - u01_share[1] = 'h0; - - u10_share[0] = 'h0; - u10_share[1] = 'h0; - - u11_share[0] = 'h0; - u11_share[1] = 'h0; + u00_share[0] = 'h0; + u00_share[1] = 'h0; - //Split v input - v00_share[0] = 'h0; - v00_share[1] = 'h0; + u01_share[0] = 'h0; + u01_share[1] = 'h0; - v01_share[0] = 'h0; - v01_share[1] = 'h0; + u10_share[0] = 'h0; + u10_share[1] = 'h0; - v10_share[0] = 'h0; - v10_share[1] = 'h0; + u11_share[0] = 'h0; + u11_share[1] = 'h0; - v11_share[0] = 'h0; - v11_share[1] = 'h0; + //Split v input + v00_share[0] = 'h0; + v00_share[1] = 'h0; - //Split w input - w00_share[0] = 'h0; - w00_share[1] = 'h0; + v01_share[0] = 'h0; + v01_share[1] = 'h0; - w01_share[0] = 'h0; - w01_share[1] = 'h0; + v10_share[0] = 'h0; + v10_share[1] = 'h0; - w10_share[0] = 'h0; - w10_share[1] = 'h0; + v11_share[0] = 'h0; + v11_share[1] = 'h0; - w11_share[0] = 'h0; - w11_share[1] = 'h0; + //Split w input + w00_share[0] = 'h0; + w00_share[1] = 'h0; - // w10_reg_share[0] = 'h0; - // w10_reg_share[1] = 'h0; + w01_share[0] = 'h0; + w01_share[1] = 'h0; - // w11_reg_share[0] = 'h0; - // w11_reg_share[1] = 'h0; + w10_share[0] = 'h0; + w10_share[1] = 'h0; - twiddle_w00_share[0] = 'h0; - twiddle_w00_share[1] = 'h0; + w11_share[0] = 'h0; + w11_share[1] = 'h0; - twiddle_w01_share[0] = 'h0; - twiddle_w01_share[1] = 'h0; + twiddle_w00_share[0] = 'h0; + twiddle_w00_share[1] = 'h0; + twiddle_w01_share[0] = 'h0; + twiddle_w01_share[1] = 'h0; end end //---------------------------------------------------- -//Masked PWMs - Used in masked PWM+INTT mode only +//Masked PWMs - Used in masked PWM+INTT mode only - 207 clks //---------------------------------------------------- // `ifdef MASKING ntt_masked_pwm #( @@ -353,7 +336,7 @@ ntt_masked_pwm #( ); // `endif //---------------------------------------------------- -//Masked BFU stage 1 - Used in masked PWM+INTT mode only +//Masked BFU stage 1 - Used in masked PWM+INTT mode only - 260 clks //PWM outputs: uv00[1:0], uv01[1:0], uv10[1:0], uv11[1:0] //---------------------------------------------------- ntt_masked_butterfly1x2 #( @@ -362,7 +345,6 @@ ntt_masked_butterfly1x2 #( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), - // .enable() .uvw_i({uv00_share, uv01_share, uv10_share, uv11_share, twiddle_w00_share, twiddle_w01_share}), //TODO check connection .rnd_i({rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1], rnd_i[0]}), .uv_o(masked_gs_stage1_uvo) @@ -415,7 +397,7 @@ ntt_butterfly #( .mode(mode), .opu_i(masking_en ? masked_gs_stage1_uvo.u20_o : u10), .opv_i(masking_en ? masked_gs_stage1_uvo.v20_o : v10), - .opw_i(masking_en ? masked_w10_reg[0] : pwo_mode ? w10 : w10_reg[0]), //TODO: delayed w10 + .opw_i(masking_en ? masked_w10_reg[0] : pwo_mode ? w10 : w10_reg[0]), .accumulate(accumulate), .u_o(uv_o.u20_o), .v_o(uv_o.v20_o), @@ -431,7 +413,7 @@ ntt_butterfly #( .mode(mode), .opu_i(masking_en ? masked_gs_stage1_uvo.u21_o : u11), .opv_i(masking_en ? masked_gs_stage1_uvo.v21_o : v11), - .opw_i(masking_en ? masked_w11_reg[0] : pwo_mode ? w11 : w11_reg[0]), //TODO: delayed w10 + .opw_i(masking_en ? masked_w11_reg[0] : pwo_mode ? w11 : w11_reg[0]), .accumulate(accumulate), .u_o(uv_o.u21_o), .v_o(uv_o.v21_o), @@ -448,7 +430,7 @@ always_ff @(posedge clk or negedge reset_n) begin else if (zeroize) masked_ready_reg <= 'b0; else begin - unique case(mode) //471:0 + unique case(mode) //471:0 delay flop for enable - TODO: optimize ct: masked_ready_reg <= {462'h0, enable, masked_ready_reg[UNMASKED_BF_LATENCY-1:1]}; gs: masked_ready_reg <= {462'h0, enable, masked_ready_reg[UNMASKED_BF_LATENCY-1:1]}; pwm: masked_ready_reg <= accumulate ? {467'h0, enable, masked_ready_reg[UNMASKED_PWM_LATENCY-1:1]} : {6'h0, enable, masked_ready_reg[UNMASKED_PWM_LATENCY-2:1]}; diff --git a/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv index 6fd1119..91b7b4e 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv @@ -46,7 +46,6 @@ module ntt_masked_BFU_add_sub logic [1:0][WIDTH-1:0] add_res_reduced, prime_packed; logic [1:0] prime [WIDTH-1:0]; logic [WIDTH-1:0] add_res_bool0, add_res_bool1, add_res_arith0, add_res_arith1; - // logic [WIDTH-1:0] prime0, prime1; //Add flops to inputs to avoid pruning TODO always_comb begin @@ -115,19 +114,6 @@ module ntt_masked_BFU_add_sub logic [1:0] temp0 [WIDTH-1:0]; - // always_comb begin - // for(int i = 0; i< WIDTH; i++) begin - // add_res_bool0[i] = add_res_bool[i][0]; - // add_res_bool1[i] = add_res_bool[i][1]; - // // if (i==0) begin - // // temp0[i] = {add_res_bool[HALF_WIDTH][1], add_res_bool[HALF_WIDTH][0]}; - // // end - // // else begin - // // temp0[i] = '0; - // // end - // end - // end - //Convert 1 bit to 46 bit to pass to B2A converter - 1 clk always_ff @(posedge clk or negedge reset_n) begin if (!reset_n) begin @@ -194,7 +180,7 @@ module ntt_masked_BFU_add_sub end else begin for (int i = 0; i < WIDTH; i++) begin - res[i] <= {add_res_reduced[1][i],add_res_reduced[0][i]}; //TODO: check with Emre - shares XORed together give actual result. Is this correct? Or should they be added instead? + res[i] <= {add_res_reduced[1][i],add_res_reduced[0][i]}; end end end diff --git a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv index fd53c44..db84050 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv @@ -131,8 +131,8 @@ module ntt_masked_BFU_mult always_comb begin for (int i = 0; i < WIDTH; i++) begin - mul_res_redux0[i] = mul_res_reduced[i][0]; //mul_res_refresh[i][0]; - mul_res_redux1[i] = mul_res_reduced[i][1]; //mul_res_refresh[i][1]; + mul_res_redux0[i] = mul_res_reduced[i][0]; + mul_res_redux1[i] = mul_res_reduced[i][1]; end end @@ -146,7 +146,7 @@ module ntt_masked_BFU_mult res[i] <= 2'h0; end else begin - res <= mul_res_reduced; //mul_res_refresh; + res <= mul_res_reduced; end end diff --git a/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv b/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv index 99a757d..d04c926 100644 --- a/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +++ b/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv @@ -36,8 +36,7 @@ module ntt_masked_butterfly1x2 input masked_bf_uvwi_t uvw_i, input [4:0][WIDTH-1:0] rnd_i, - output bf_uvo_t uv_o //, - // output logic ready_o + output bf_uvo_t uv_o ); logic [1:0][WIDTH-1:0] u00, v00, w00; @@ -47,8 +46,8 @@ module ntt_masked_butterfly1x2 logic [1:0] u11_int [WIDTH-1:0]; logic [1:0] v11_int [WIDTH-1:0]; logic [1:0][WIDTH-1:0] u10_packed, v10_packed, u11_packed, v11_packed; - logic [HALF_WIDTH-1:0] u10_combined, v10_combined, u11_combined, v11_combined; //TODO: 46 bit or 23 bit? check with Emre - logic [HALF_WIDTH-1:0] u10_div2, v10_div2, u11_div2, v11_div2; //TODO: check width + logic [HALF_WIDTH-1:0] u10_combined, v10_combined, u11_combined, v11_combined; + logic [HALF_WIDTH-1:0] u10_div2, v10_div2, u11_div2, v11_div2; always_comb begin u00 = uvw_i.u00_i; diff --git a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv index a6512b9..a9aa629 100644 --- a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +++ b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv @@ -31,7 +31,7 @@ module ntt_masked_gs_butterfly input wire [1:0][WIDTH-1:0] opu_i, input wire [1:0][WIDTH-1:0] opv_i, - input wire [1:0][WIDTH-1:0] opw_i, //benefit from splitting? Or should we use one share mult? + input wire [1:0][WIDTH-1:0] opw_i, input wire [4:0][WIDTH-1:0] rnd_i, output logic [1:0] u_o [WIDTH-1:0], //TODO: make packed? diff --git a/src/ntt_top/tb/ntt_top_tb.sv b/src/ntt_top/tb/ntt_top_tb.sv index 20a2fdf..529aee7 100644 --- a/src/ntt_top/tb/ntt_top_tb.sv +++ b/src/ntt_top/tb/ntt_top_tb.sv @@ -166,34 +166,6 @@ ntt_wrapper dut ( .ntt_busy() ); -// ntt_masked_BFU_add_sub dut ( -// .clk(clk_tb), -// .reset_n(reset_n_tb), -// .zeroize(zeroize_tb), -// .sub(sub), -// .u(u), -// .v(v), -// .rnd0(rnd0), -// .rnd1(rnd1), -// .rnd2(rnd2), -// .rnd3(rnd3), -// .res() -// ); - -// ntt_masked_BFU_mult dut ( -// .clk(clk_tb), -// .reset_n(reset_n_tb), -// .zeroize(zeroize_tb), -// .u(u), -// .v(v), -// .rnd0(rnd0), -// .rnd1(rnd1), -// .rnd2(rnd2), -// .rnd3(rnd3), -// .rnd4(rnd0+rnd1), -// .res() -// ); - // ntt_shuffle_buffer dut ( // .clk(clk_tb), // .reset_n(reset_n_tb), @@ -208,54 +180,6 @@ ntt_wrapper dut ( // .data_o() // ); -// ntt_masked_gs_butterfly dut ( -// .clk(clk_tb), -// .reset_n(reset_n_tb), -// .zeroize(zeroize_tb), -// .opu_i(u), -// .opv_i(v), -// .opw_i(w), -// .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), -// .u_o(), -// .v_o() -// ); - -// ntt_masked_pwm dut ( -// .clk(clk_tb), -// .reset_n(reset_n_tb), -// .zeroize(zeroize_tb), -// .u(u), -// .v(v), -// .w(w), -// .rnd({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), -// .res() -// ); - -// ntt_masked_butterfly1x2 dut ( -// .clk(clk_tb), -// .reset_n(reset_n_tb), -// .zeroize(zeroize_tb), -// .uvw_i(uvw_i_tb), -// .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), -// .uv_o() -// ); - -// ntt_hybrid_butterfly_2x2 dut ( -// .clk(clk_tb), -// .reset_n(reset_n_tb), -// .zeroize(zeroize_tb), -// .mode(mode_tb), -// .enable(enable_tb), -// .masking_en(1'b0), -// .uvw_i(uvw_i_tb), -// .pw_uvw_i(pw_uvw_i_tb), -// .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), -// .accumulate(1'b0), -// .uv_o(), -// .pwo_uv_o(), -// .ready_o() -// ); - //---------------------------------------------------------------- // clk_gen // @@ -742,272 +666,6 @@ task init_mem(); load_tb_addr = 'h0; endtask -/* -task masked_BFU_adder_test(); - logic [45:0] u_array, v_array; - logic [45:0] rand0, rand1; - sub = 1; - for (int i = 0; i < 1000; i++) begin - @(posedge clk_tb); - fork - begin - actual_u = $random()%PRIME; - actual_v = $random()%PRIME; - u_array = actual_u; - v_array = actual_v; - rand0 = $random(); - rand1 = $random(); - - u[0] = actual_u-rand0; - u[1] = rand0; - v[0] = actual_v-rand1; - v[1] = rand1; - // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); - end - begin - repeat(54) @(posedge clk_tb); - if (!sub) begin - if ((dut.add_res_reduced[1] + dut.add_res_reduced[0]) != ((u_array + v_array)%PRIME)) begin - $error("Addition Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array + v_array)%PRIME, dut.add_res_reduced[0], dut.add_res_reduced[1], dut.add_res_reduced[0] + dut.add_res_reduced[1]); - end - end - else begin - if ((dut.add_res_reduced[1] + dut.add_res_reduced[0]) != ((u_array - v_array + PRIME)%PRIME)) begin - $error("Subtraction Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array + PRIME + (~v_array+'h1))%PRIME, dut.add_res_reduced[0], dut.add_res_reduced[1], dut.add_res_reduced[0] + dut.add_res_reduced[1]); - end - end - end - join - end -endtask - - -task masked_BFU_mult_test(); - logic [45:0] u_array, v_array; - logic [45:0] rand0, rand1; - - for (int i = 0; i < 10; i++) begin - @(posedge clk_tb); - fork - begin - actual_u = $random()%PRIME; - actual_v = $random()%PRIME; - u_array = actual_u; - v_array = actual_v; - rand0 = $random(); - rand1 = $random(); - - // $display("actual u = %h, actual v = %h", actual_u, actual_v); - - u[0] = actual_u-rand0; - u[1] = rand0; - v[0] = actual_v-rand1; - v[1] = rand1; - // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); - end - begin - repeat(210) @(posedge clk_tb); - if ((dut.final_res[1] + dut.final_res[0]) != ((u_array * v_array)%PRIME)) begin - $error("Multiplication Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array * v_array)%PRIME, dut.final_res[0], dut.final_res[1], dut.final_res[0] + dut.final_res[1]); - end - end - join - end -endtask -*/ - - -// task masked_gs_butterfly_test(); -// logic [45:0] rand0, rand1, rand2; -// logic [45:0] actual_u_normalized; -// for (int i = 0; i < 10; i++) begin -// @(posedge clk_tb); -// fork -// begin -// actual_u = $random()%PRIME; -// actual_v = $random()%PRIME; -// actual_w = 'h2; -// if (actual_u < actual_v) -// actual_u_normalized = actual_u + PRIME; -// else -// actual_u_normalized = actual_u; -// // u_array = actual_u; -// // v_array = actual_v; -// rand0 = $random(); -// rand1 = $random(); -// rand2 = $random(); - -// // $display("actual u = %h, actual v = %h", actual_u, actual_v); - -// u[0] = actual_u-rand0; -// u[1] = rand0; -// v[0] = actual_v-rand1; -// v[1] = rand1; -// w[0] = actual_w-rand2; -// w[1] = rand2; -// // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); -// end -// begin -// repeat(264) @(posedge clk_tb); -// if ((dut.u_o_0 + dut.u_o_1) != ((actual_u_normalized + actual_v)%PRIME)) begin -// $error("U = u+v Mismatch: exp_output = %h output shares = %h %h actual output = %h", (actual_u_normalized + actual_v)%PRIME, dut.u_o_0, dut.u_o_1, dut.u_o_0 + dut.u_o_1); -// end -// if ((dut.v_o_0 + dut.v_o_1) != (((actual_u_normalized - actual_v)*actual_w)%PRIME)) begin -// $error("V = (u-v)w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((actual_u_normalized - actual_v)*actual_w)%PRIME, dut.v_o_0, dut.v_o_1, dut.v_o_0 + dut.v_o_1); -// end -// end -// join -// end -// endtask - -/* -task masked_pwm_test(); - logic [45:0] rand0, rand1, rand2; - for (int i = 0; i < 10; i++) begin - @(posedge clk_tb); - fork - begin - actual_u = $random()%PRIME; - actual_v = $random()%PRIME; - actual_w = 'h2; - - // u_array = actual_u; - // v_array = actual_v; - rand0 = $random(); - rand1 = $random(); - rand2 = $random(); - - // $display("actual u = %h, actual v = %h", actual_u, actual_v); - - u[0] = actual_u-rand0; - u[1] = rand0; - v[0] = actual_v-rand1; - v[1] = rand1; - w[0] = actual_w-rand2; - w[1] = rand2; - // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); - end - begin - repeat(264) @(posedge clk_tb); - if ((dut.res[0] + dut.res[1]) != ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME)) begin - $error("U = u*v+w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME), dut.res[0], dut.res[1], dut.res[0] + dut.res[1]); - end - end - join - end -endtask -*/ - -// task masked_bfu_1x2_test(); -// logic [45:0] rand0, rand1, rand2; -// for (int i = 0; i < 10; i++) begin -// @(posedge clk_tb); -// fork -// begin -// actual_u = $random()%PRIME; -// actual_v = $random()%PRIME; -// actual_w = 'h2; - -// // u_array = actual_u; -// // v_array = actual_v; -// rand0 = $random(); -// rand1 = $random(); -// rand2 = $random(); - -// // $display("actual u = %h, actual v = %h", actual_u, actual_v); - -// u[0] = actual_u-rand0; -// u[1] = rand0; -// v[0] = actual_v-rand1; -// v[1] = rand1; -// w[0] = actual_w-rand2; -// w[1] = rand2; - -// uvw_i_tb.u00_i = u; -// uvw_i_tb.u01_i = u; -// uvw_i_tb.v00_i = v; -// uvw_i_tb.v01_i = v; -// uvw_i_tb.w00_i = w; -// uvw_i_tb.w01_i = w; -// // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); -// end -// // begin -// // repeat(264) @(posedge clk_tb); -// // if ((dut.res[0] + dut.res[1]) != ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME)) begin -// // $error("U = u*v+w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME), dut.res[0], dut.res[1], dut.res[0] + dut.res[1]); -// // end -// // end -// join -// end -// endtask -/* -task masked_hybrid_bf_2x2_test(); - logic [45:0] rand0, rand1, rand2; - for (int j = 0; j < 6; j++) begin - mode_tb = j; - for (int i = 0; i < 10; i++) begin - @(posedge clk_tb); - enable_tb = 1'b1; - fork - begin - actual_u = $random()%PRIME; - actual_v = $random()%PRIME; - actual_w = 'h2; - - // u_array = actual_u; - // v_array = actual_v; - rand0 = $random(); - rand1 = $random(); - rand2 = $random(); - - // $display("actual u = %h, actual v = %h", actual_u, actual_v); - - u[0] = actual_u-rand0; - u[1] = rand0; - v[0] = actual_v-rand1; - v[1] = rand1; - w[0] = actual_w-rand2; - w[1] = rand2; - - uvw_i_tb.u00_i = actual_u; - uvw_i_tb.u01_i = actual_u; - uvw_i_tb.v00_i = actual_v; - uvw_i_tb.v01_i = actual_v; - uvw_i_tb.w00_i = actual_w; - uvw_i_tb.w01_i = actual_w; - uvw_i_tb.w10_i = actual_w; - uvw_i_tb.w11_i = actual_w; - - pw_uvw_i_tb.u0_i = actual_u; - pw_uvw_i_tb.v0_i = actual_v; - pw_uvw_i_tb.w0_i = actual_w; - - pw_uvw_i_tb.u1_i = actual_u; - pw_uvw_i_tb.v1_i = actual_v; - pw_uvw_i_tb.w1_i = actual_w; - - pw_uvw_i_tb.u2_i = actual_u; - pw_uvw_i_tb.v2_i = actual_v; - pw_uvw_i_tb.w2_i = actual_w; - - pw_uvw_i_tb.u3_i = actual_u; - pw_uvw_i_tb.v3_i = actual_v; - pw_uvw_i_tb.w3_i = actual_w; - //$display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); - end - // begin - // repeat(264) @(posedge clk_tb); - // if ((dut.res[0] + dut.res[1]) != ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME)) begin - // $error("U = u*v+w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME), dut.res[0], dut.res[1], dut.res[0] + dut.res[1]); - // end - // end - join - end - enable_tb = 1'b0; - @(posedge clk_tb); - end -endtask -*/ initial begin init_sim(); reset_dut(); @@ -1024,11 +682,6 @@ initial begin // ntt_ctrl_test(); $display("Starting ntt test\n"); ntt_top_test(); - // masked_BFU_adder_test(); - // masked_BFU_mult_test(); - // masked_gs_butterfly_test(); - // masked_pwm_test(); - // masked_hybrid_bf_2x2_test(); // pwm_opt_test(); repeat(1000) @(posedge clk_tb); $finish; From 6e820d3031cd0e96df85e2941c809e42be1e817d Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Wed, 20 Nov 2024 19:17:51 +0000 Subject: [PATCH 21/23] MICROSOFT AUTOMATED PIPELINE: Stamp 'user/dev/kupadhyayula/ntt_masking' with updated timestamp and hash after successful run --- .github/workflow_metadata/pr_hash | 2 +- .github/workflow_metadata/pr_timestamp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflow_metadata/pr_hash b/.github/workflow_metadata/pr_hash index 0b48f48..76469fb 100644 --- a/.github/workflow_metadata/pr_hash +++ b/.github/workflow_metadata/pr_hash @@ -1 +1 @@ -214c0b1444725dc142a4361e20d2d37423221cc10eda63949e445a2a06f3f8f61abeb3d4d18dd6fc0a2618cd920e4a51 +61ff2cd5f5fc580fa4d885c8026ab0a4e3bb62ffd6a16b562da228e85b6af89fc13267658403e2388a14cad520c74d34 \ No newline at end of file diff --git a/.github/workflow_metadata/pr_timestamp b/.github/workflow_metadata/pr_timestamp index e5a8f94..8d4e1ec 100644 --- a/.github/workflow_metadata/pr_timestamp +++ b/.github/workflow_metadata/pr_timestamp @@ -1 +1 @@ -1731643612 +1732130263 \ No newline at end of file From a7571ea28a321752463a10d1b5a1e61d439dda3f Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Wed, 20 Nov 2024 11:22:33 -0800 Subject: [PATCH 22/23] Update filelist, clean up --- src/abr_libs/config/abr_libs.vf | 6 ++ src/abr_libs/config/masking_tb.vf | 8 ++- src/abr_libs/rtl/abr_masked_A2B_conv.sv | 18 +----- src/abr_libs/rtl/abr_masked_B2A_conv.sv | 18 +++--- .../rtl/abr_masked_N_bit_Arith_adder.sv | 10 ---- .../rtl/abr_masked_N_bit_Boolean_adder.sv | 10 +--- .../rtl/abr_masked_N_bit_Boolean_sub.sv | 10 +--- src/abr_sha3/config/abr_sha3.vf | 6 ++ src/abr_sha3/config/abr_sha3_tb.vf | 6 ++ src/decompose/config/decompose.vf | 15 +++++ src/decompose/config/decompose_tb.vf | 15 +++++ src/exp_mask/config/exp_mask.vf | 6 ++ src/exp_mask/config/exp_mask_tb.vf | 6 ++ src/makehint/config/makehint.vf | 8 ++- src/makehint/config/makehint_tb.vf | 8 ++- .../config/mldsa_sampler_top.vf | 6 ++ src/mldsa_top/config/mldsa_top.vf | 17 +++++- src/mldsa_top/config/mldsa_top_tb.vf | 17 +++++- src/mldsa_top/uvmf/config/uvmf_mldsa.vf | 17 +++++- src/ntt_top/Model/masked_gadgets.py | 2 +- src/ntt_top/Model/testForMasking.py | 57 ++++++------------- src/ntt_top/config/butterfly_top_tb.vf | 17 +++++- src/ntt_top/config/ntt_mult_reduction_tb.vf | 15 +++++ src/ntt_top/config/ntt_pkg.vf | 6 ++ src/ntt_top/config/ntt_top.vf | 17 +++++- src/ntt_top/config/ntt_top_tb.vf | 15 +++++ src/ntt_top/config/ntt_utb.vf | 17 +++++- src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv | 5 +- src/pk_decode/config/pkdecode.vf | 6 ++ src/pk_decode/config/pkdecode_tb.vf | 6 ++ src/power2round/config/power2round.vf | 6 ++ src/power2round/config/power2round_tb.vf | 6 ++ src/rej_bounded/config/rej_bounded.vf | 6 ++ src/rej_bounded/config/rej_bounded_tb.vf | 6 ++ src/rej_sampler/config/rej_sampler.vf | 6 ++ src/rej_sampler/config/rej_sampler_tb.vf | 6 ++ src/sample_in_ball/config/sample_in_ball.vf | 6 ++ .../config/sample_in_ball_tb.vf | 6 ++ src/sig_decode_z/config/sigdecode_z.vf | 6 ++ src/sig_decode_z/config/sigdecode_z_tb.vf | 6 ++ src/sig_encode_z/config/sigencode_z.vf | 15 +++++ src/sig_encode_z/config/sigencode_z_tb.vf | 15 +++++ src/sigdecode_h/config/sigdecode_h.vf | 6 ++ src/sigdecode_h/config/sigdecode_h_tb.vf | 6 ++ src/sk_decode/config/skdecode.vf | 15 +++++ src/sk_decode/config/skdecode_tb.vf | 15 +++++ src/sk_encode/config/skencode.vf | 6 ++ src/sk_encode/config/skencode_tb.vf | 6 ++ 48 files changed, 411 insertions(+), 103 deletions(-) diff --git a/src/abr_libs/config/abr_libs.vf b/src/abr_libs/config/abr_libs.vf index 62a80cc..91e9713 100644 --- a/src/abr_libs/config/abr_libs.vf +++ b/src/abr_libs/config/abr_libs.vf @@ -17,11 +17,17 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv \ No newline at end of file diff --git a/src/abr_libs/config/masking_tb.vf b/src/abr_libs/config/masking_tb.vf index e43decb..cbd79ec 100644 --- a/src/abr_libs/config/masking_tb.vf +++ b/src/abr_libs/config/masking_tb.vf @@ -4,9 +4,15 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/tb/abr_masked_A2B_conv_tb.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/tb/abr_masked_N_bit_Boolean_adder_tb.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/tb/abr_masked_B2A_conv_tb.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/tb/abr_masked_N_bit_mult_tb.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/tb/abr_masked_N_bit_mult_two_share_tb.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv -${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv \ No newline at end of file +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv \ No newline at end of file diff --git a/src/abr_libs/rtl/abr_masked_A2B_conv.sv b/src/abr_libs/rtl/abr_masked_A2B_conv.sv index 9b006d7..62ded58 100644 --- a/src/abr_libs/rtl/abr_masked_A2B_conv.sv +++ b/src/abr_libs/rtl/abr_masked_A2B_conv.sv @@ -59,18 +59,10 @@ // Pipeline registers for x and y inputs always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - // for (int j = 0; j < WIDTH; j = j + 1) begin - // x_reg[i][j] <= 2'b00; - // y_reg[i][j] <= 2'b00; - // end x_reg[i] <= '0; y_reg[i] <= '0; end else if (zeroize) begin - // for (int j = 0; j < WIDTH; j = j + 1) begin - // x_reg[i][j] <= 2'b00; - // y_reg[i][j] <= 2'b00; - // end x_reg[i] <= '0; y_reg[i] <= '0; end @@ -91,15 +83,9 @@ // Pipeline registers for sum output always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - // for (int j = 0; j < WIDTH; j = j + 1) begin - // sum_reg[i][j] <= 2'b00; - // end sum_reg[i] <= '0; end else if (zeroize) begin - // for (int j = 0; j < WIDTH; j = j + 1) begin - // sum_reg[i][j] <= 2'b00; - // end sum_reg[i] <= '0; end else begin @@ -122,8 +108,8 @@ .clk(clk), // Connect clk to clk .rst_n(rst_n), // Connect rst_n to rst_n .zeroize(zeroize), // Connect zeroize to zeroize - .x(x_reg[i][i]), // Connect x to the last stage of the x pipeline - .y(y_reg[i][i]), // Connect y to the last stage of the y pipeline + .x(x_reg[i][i]), // Connect x to the last stage of the x pipeline + .y(y_reg[i][i]), // Connect y to the last stage of the y pipeline .c_in(carry[i]), // Connect c_in to carry[i] .rnd(rnd[i]), // Connect rnd to corresponding random bit .s(sum[i]), // Connect sum to sum[i] diff --git a/src/abr_libs/rtl/abr_masked_B2A_conv.sv b/src/abr_libs/rtl/abr_masked_B2A_conv.sv index 6112ddc..933b35e 100644 --- a/src/abr_libs/rtl/abr_masked_B2A_conv.sv +++ b/src/abr_libs/rtl/abr_masked_B2A_conv.sv @@ -94,16 +94,16 @@ // Combinational logic always_comb begin - T0 = x0 ^ Gamma_reg; // T = x' ⊕ Γ - T1 = T0 - Gamma_reg; // T = T - Γ - T2 = T1 ^ x0; // T = T ⊕ x' - Gamma_reg2 = Gamma_reg ^ x1; // Γ = Γ ⊕ r - A0 = x0 ^ Gamma_reg2; // A = x' ⊕ Γ - A1 = A0 - Gamma_reg2; // A = A - Γ - A2 = A1 ^ T2; // A = A ⊕ T + T0 = x0 ^ Gamma_reg; // T = x' ⊕ Γ + T1 = T0 - Gamma_reg; // T = T - Γ + T2 = T1 ^ x0; // T = T ⊕ x' + Gamma_reg2 = Gamma_reg ^ x1; // Γ = Γ ⊕ r + A0 = x0 ^ Gamma_reg2; // A = x' ⊕ Γ + A1 = A0 - Gamma_reg2; // A = A - Γ + A2 = A1 ^ T2; // A = A ⊕ T for (int i = 0; i < WIDTH; i++) begin - x_arith_next[i][0] = A2[i]; // Assign A to the output - x_arith_next[i][1] = x1[i]; // Assign r to the output + x_arith_next[i][0] = A2[i]; // Assign A to the output + x_arith_next[i][1] = x1[i]; // Assign r to the output end end diff --git a/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv b/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv index 15e1552..f8567d8 100644 --- a/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv +++ b/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv @@ -47,22 +47,12 @@ // Final output assignment always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - // for (int i = 0; i < WIDTH; i++) begin - // s[i] <= 2'b0; - // end s <= 'h0; end else if (zeroize) begin - // for (int i = 0; i < WIDTH; i++) begin - // s[i] <= 2'b0; - // end s <= 'h0; end else begin - // for (int i = 0; i < WIDTH; i++) begin - // s[i][0] <= add_res[0][i]; // Pass-through without masking - // s[i][1] <= add_res[1][i]; // Pass-through without masking - // end s <= add_res; end end diff --git a/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv index 9ff0aa5..d945e1c 100644 --- a/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv +++ b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv @@ -81,15 +81,9 @@ // Pipeline registers for sum output always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - // for (int j = 0; j < WIDTH; j = j + 1) begin - // sum_reg[i][j] <= 2'b00; - // end sum_reg[i] <= '0; end else if (zeroize) begin - // for (int j = 0; j < WIDTH; j = j + 1) begin - // sum_reg[i][j] <= 2'b00; - // end sum_reg[i] <= '0; end else begin @@ -112,8 +106,8 @@ .clk(clk), // Connect clk to clk .rst_n(rst_n), // Connect rst_n to rst_n .zeroize(zeroize), // Connect zeroize to zeroize - .x(x_reg[i][i]), // Connect x to the last stage of the x pipeline - .y(y_reg[i][i]), // Connect y to the last stage of the y pipeline + .x(x_reg[i][i]), // Connect x to the last stage of the x pipeline + .y(y_reg[i][i]), // Connect y to the last stage of the y pipeline .c_in(carry[i]), // Connect c_in to carry[i] .rnd(rnd[i]), // Connect rnd to corresponding random bit .s(sum[i]), // Connect sum to sum[i] diff --git a/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv index ddc715f..07f7371 100644 --- a/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv +++ b/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv @@ -75,15 +75,9 @@ module abr_masked_N_bit_Boolean_sub #( // Pipeline registers for sum output always_ff @(posedge clk or negedge rst_n) begin if (!rst_n) begin - // for (int j = 0; j < WIDTH; j = j + 1) begin - // sum_reg[i][j] <= 2'b00; - // end sum_reg[i] <= '0; end else if (zeroize) begin - // for (int j = 0; j < WIDTH; j = j + 1) begin - // sum_reg[i][j] <= 2'b00; - // end sum_reg[i] <= '0; end else begin @@ -106,8 +100,8 @@ module abr_masked_N_bit_Boolean_sub #( .clk(clk), // Connect clk to clk .rst_n(rst_n), // Connect rst_n to rst_n .zeroize(zeroize), // Connect zeroize to zeroize - .x(x_reg[i][i]), // Connect x to the last stage of the x pipeline - .y(y_reg[i][i]), // Connect y to the last stage of the y pipeline + .x(x_reg[i][i]), // Connect x to the last stage of the x pipeline + .y(y_reg[i][i]), // Connect y to the last stage of the y pipeline .c_in(carry[i]), // Connect c_in to carry[i] .rnd(rnd[i]), // Connect rnd to corresponding random bit .s(sum[i]), // Connect sum to sum[i] diff --git a/src/abr_sha3/config/abr_sha3.vf b/src/abr_sha3/config/abr_sha3.vf index 2d1f800..d1472dc 100644 --- a/src/abr_sha3/config/abr_sha3.vf +++ b/src/abr_sha3/config/abr_sha3.vf @@ -20,11 +20,16 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_util_pkg.sv @@ -35,6 +40,7 @@ ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_cipher_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_sparse_fsm_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_sha3/rtl/abr_sha3_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim_generic/rtl/abr_prim_generic_flop_en.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim_generic/rtl/abr_prim_generic_flop.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim_generic/rtl/abr_prim_generic_buf.sv diff --git a/src/abr_sha3/config/abr_sha3_tb.vf b/src/abr_sha3/config/abr_sha3_tb.vf index 8c0d033..2a514a8 100644 --- a/src/abr_sha3/config/abr_sha3_tb.vf +++ b/src/abr_sha3/config/abr_sha3_tb.vf @@ -23,11 +23,16 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_util_pkg.sv @@ -38,6 +43,7 @@ ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_cipher_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_sparse_fsm_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_sha3/rtl/abr_sha3_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim_generic/rtl/abr_prim_generic_flop_en.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim_generic/rtl/abr_prim_generic_flop.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim_generic/rtl/abr_prim_generic_buf.sv diff --git a/src/decompose/config/decompose.vf b/src/decompose/config/decompose.vf index a31928a..ea0e268 100644 --- a/src/decompose/config/decompose.vf +++ b/src/decompose/config/decompose.vf @@ -19,27 +19,42 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_r1_lut.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_w1_mem.sv diff --git a/src/decompose/config/decompose_tb.vf b/src/decompose/config/decompose_tb.vf index 29e113b..026256c 100644 --- a/src/decompose/config/decompose_tb.vf +++ b/src/decompose/config/decompose_tb.vf @@ -20,27 +20,42 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_r1_lut.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_w1_mem.sv diff --git a/src/exp_mask/config/exp_mask.vf b/src/exp_mask/config/exp_mask.vf index 712de2b..14bf8cd 100644 --- a/src/exp_mask/config/exp_mask.vf +++ b/src/exp_mask/config/exp_mask.vf @@ -18,12 +18,18 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/exp_mask/rtl/exp_mask_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/exp_mask/rtl/exp_mask.sv \ No newline at end of file diff --git a/src/exp_mask/config/exp_mask_tb.vf b/src/exp_mask/config/exp_mask_tb.vf index 1a83188..b9cbed9 100644 --- a/src/exp_mask/config/exp_mask_tb.vf +++ b/src/exp_mask/config/exp_mask_tb.vf @@ -20,15 +20,21 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_sampler_top/rtl/mldsa_sampler_pkg.sv ${ADAMSBRIDGE_ROOT}/src/exp_mask/tb/exp_mask_tb.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/exp_mask/rtl/exp_mask_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/exp_mask/rtl/exp_mask.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_sampler_top/rtl/mldsa_sampler_pkg.sv \ No newline at end of file diff --git a/src/makehint/config/makehint.vf b/src/makehint/config/makehint.vf index 475692a..f5d79f0 100644 --- a/src/makehint/config/makehint.vf +++ b/src/makehint/config/makehint.vf @@ -19,14 +19,20 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/makehint/rtl/makehint_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/makehint/rtl/hintgen.sv -${ADAMSBRIDGE_ROOT}/src/makehint/rtl/makehint.sv +${ADAMSBRIDGE_ROOT}/src/makehint/rtl/makehint.sv \ No newline at end of file diff --git a/src/makehint/config/makehint_tb.vf b/src/makehint/config/makehint_tb.vf index f3ab58e..c6a0d62 100644 --- a/src/makehint/config/makehint_tb.vf +++ b/src/makehint/config/makehint_tb.vf @@ -20,15 +20,21 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/makehint/rtl/makehint_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/makehint/rtl/hintgen.sv ${ADAMSBRIDGE_ROOT}/src/makehint/rtl/makehint.sv -${ADAMSBRIDGE_ROOT}/src/makehint/tb/makehint_tb.sv +${ADAMSBRIDGE_ROOT}/src/makehint/tb/makehint_tb.sv \ No newline at end of file diff --git a/src/mldsa_sampler_top/config/mldsa_sampler_top.vf b/src/mldsa_sampler_top/config/mldsa_sampler_top.vf index 2200d05..ffe264f 100644 --- a/src/mldsa_sampler_top/config/mldsa_sampler_top.vf +++ b/src/mldsa_sampler_top/config/mldsa_sampler_top.vf @@ -26,11 +26,16 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sample_in_ball_pkg.sv @@ -42,6 +47,7 @@ ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_mubi_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_cipher_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_prim/rtl/abr_prim_sparse_fsm_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded2.sv ${ADAMSBRIDGE_ROOT}/src/rej_sampler/rtl/rej_sampler_ctrl.sv diff --git a/src/mldsa_top/config/mldsa_top.vf b/src/mldsa_top/config/mldsa_top.vf index 8d6fab2..f48350b 100644 --- a/src/mldsa_top/config/mldsa_top.vf +++ b/src/mldsa_top/config/mldsa_top.vf @@ -36,11 +36,16 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_sampler_top/rtl/mldsa_sampler_pkg.sv @@ -57,6 +62,7 @@ ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv ${ADAMSBRIDGE_ROOT}/src/norm_check/rtl/norm_check_defines_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded2.sv ${ADAMSBRIDGE_ROOT}/src/rej_sampler/rtl/rej_sampler_ctrl.sv @@ -110,11 +116,20 @@ ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_r1_lut.sv @@ -155,4 +170,4 @@ ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_seq_prim.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_seq_sec.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_top.sv -${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_reg.sv +${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_reg.sv \ No newline at end of file diff --git a/src/mldsa_top/config/mldsa_top_tb.vf b/src/mldsa_top/config/mldsa_top_tb.vf index abc5b33..fe89d66 100644 --- a/src/mldsa_top/config/mldsa_top_tb.vf +++ b/src/mldsa_top/config/mldsa_top_tb.vf @@ -37,11 +37,16 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_sampler_top/rtl/mldsa_sampler_pkg.sv @@ -59,6 +64,7 @@ ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv ${ADAMSBRIDGE_ROOT}/src/norm_check/rtl/norm_check_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/tb/mldsa_top_tb.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded2.sv ${ADAMSBRIDGE_ROOT}/src/rej_sampler/rtl/rej_sampler_ctrl.sv @@ -112,11 +118,20 @@ ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_r1_lut.sv @@ -157,4 +172,4 @@ ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_seq_prim.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_seq_sec.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_top.sv -${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_reg.sv +${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_reg.sv \ No newline at end of file diff --git a/src/mldsa_top/uvmf/config/uvmf_mldsa.vf b/src/mldsa_top/uvmf/config/uvmf_mldsa.vf index e5040fc..8543bab 100644 --- a/src/mldsa_top/uvmf/config/uvmf_mldsa.vf +++ b/src/mldsa_top/uvmf/config/uvmf_mldsa.vf @@ -102,11 +102,16 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_sampler_top/rtl/mldsa_sampler_pkg.sv @@ -131,6 +136,7 @@ ${ADAMSBRIDGE_ROOT}/src/mldsa_top/uvmf/uvmf_template_output/project_benches/mlds ${ADAMSBRIDGE_ROOT}/src/mldsa_top/uvmf/uvmf_template_output/project_benches/mldsa/tb/tests/mldsa_tests_pkg.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/uvmf/uvmf_template_output/project_benches/mldsa/tb/testbench/hdl_top.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/uvmf/uvmf_template_output/project_benches/mldsa/tb/testbench/hvl_top.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded2.sv ${ADAMSBRIDGE_ROOT}/src/rej_sampler/rtl/rej_sampler_ctrl.sv @@ -184,11 +190,20 @@ ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose.sv ${ADAMSBRIDGE_ROOT}/src/decompose/rtl/decompose_r1_lut.sv @@ -229,4 +244,4 @@ ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_seq_prim.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_seq_sec.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_top.sv -${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_reg.sv +${ADAMSBRIDGE_ROOT}/src/mldsa_top/rtl/mldsa_reg.sv \ No newline at end of file diff --git a/src/ntt_top/Model/masked_gadgets.py b/src/ntt_top/Model/masked_gadgets.py index e782dc0..d737495 100644 --- a/src/ntt_top/Model/masked_gadgets.py +++ b/src/ntt_top/Model/masked_gadgets.py @@ -648,7 +648,7 @@ def B2A(x0, x1): # output: red0 = 1010 red1 = 0111 --> combine = 'd17 % 16 = 1 # def maskedAdderReduction(u0, u1): - uRolled0 = (u0 + Roller) % MultMod #TODO: forgot what it's for? + uRolled0 = (u0 + Roller) % MultMod uRolled1 = u1 print(f"rolled_shares = {uRolled0: X}, {uRolled1: X}") print(f"rolled_combin = {uRolled0+uRolled1: X}") diff --git a/src/ntt_top/Model/testForMasking.py b/src/ntt_top/Model/testForMasking.py index 393277e..e59f432 100644 --- a/src/ntt_top/Model/testForMasking.py +++ b/src/ntt_top/Model/testForMasking.py @@ -281,48 +281,27 @@ def test_maskedBFUAdder(numTest = 10): operands = CustomUnsignedInteger(0, 0, DILITHIUM_Q-1) for i in range(0, numTest): #get a random number ranging [0, DILITHIUM_Q-1] - # operands.generate_random() - # a = int(operands.value) - # operands.generate_random() - # b = int(operands.value) - # expected = (a+b) % DILITHIUM_Q - # randomness.generate_random() - # r0 = int(randomness.value) - # a0 = int(a-r0) % MultMod - # a1 = r0 - # randomness.generate_random() - # r1 = int(randomness.value) - # b0 = int(b-r1) % MultMod - # b1 = r1 - - # a = int("54bb00", 16) - # b = int("727900", 16) - # r0 = int("782839f0", 16) - # r1 = int("7923d7f2", 16) - - # print("actual inputs:") - # print(f"{a: X}, {b: X}") - # expected = (a+b) % DILITHIUM_Q - # a0 = int(a-r0) % MultMod - # a1 = r0 - # b0 = int(b-r1) % MultMod - # b1 = r1 - a0 = int("00007f0ef2fd", 16) - a1 = int("3fff81075002", 16) - b0 = int("3fffff142bfe", 16) - b1 = int("0000013a0302", 16) - expected = ((a0+a1)-(b0+b1)) % DILITHIUM_Q - print("a shares:") - print(f"{a0: X}, {a1: X}") - print("b shares:") - print(f"{b0: X}, {b1: X}") - a0, a1 = maskedBFUSub(a0, a1, b0, b1) #maskedBFUAdder(a0, a1, b0, b1) + operands.generate_random() + a = int(operands.value) + operands.generate_random() + b = int(operands.value) + expected = (a+b) % DILITHIUM_Q + randomness.generate_random() + r0 = int(randomness.value) + a0 = int(a-r0) % MultMod + a1 = r0 + randomness.generate_random() + r1 = int(randomness.value) + b0 = int(b-r1) % MultMod + b1 = r1 + + a0, a1 = maskedBFUAdder(a0, a1, b0, b1) gotten = int(a0 + a1) % MultMod - print(f"Gotten = {gotten: X}, Expected = {expected: X}") + # print(f"Gotten = {gotten: X}, Expected = {expected: X}") if gotten != expected: print(f"Addition gives an Error; gotten = {gotten: X}, while exp = {expected: X}") -test_maskedBFUAdder(numTest = 1) +#test_maskedBFUAdder(numTest = 1000) def gs_bf(u, v, z): t = (u - v) % DILITHIUM_Q @@ -504,6 +483,6 @@ def test_MaskedmodularOps(numTest = 10): # test_MaskedmodularOps(numTest = 100000) -# test_maskedReduction46(numTest = 1000000) +test_maskedReduction46(numTest = 1000000) diff --git a/src/ntt_top/config/butterfly_top_tb.vf b/src/ntt_top/config/butterfly_top_tb.vf index cb94e37..01f1df5 100644 --- a/src/ntt_top/config/butterfly_top_tb.vf +++ b/src/ntt_top/config/butterfly_top_tb.vf @@ -19,24 +19,39 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/butterfly_top_tb.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv -${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv \ No newline at end of file +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv \ No newline at end of file diff --git a/src/ntt_top/config/ntt_mult_reduction_tb.vf b/src/ntt_top/config/ntt_mult_reduction_tb.vf index 60b4036..c89a999 100644 --- a/src/ntt_top/config/ntt_mult_reduction_tb.vf +++ b/src/ntt_top/config/ntt_mult_reduction_tb.vf @@ -19,24 +19,39 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_mult_reduction_tb.sv \ No newline at end of file diff --git a/src/ntt_top/config/ntt_pkg.vf b/src/ntt_top/config/ntt_pkg.vf index a1d3acd..12d50b2 100644 --- a/src/ntt_top/config/ntt_pkg.vf +++ b/src/ntt_top/config/ntt_pkg.vf @@ -18,12 +18,18 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv \ No newline at end of file diff --git a/src/ntt_top/config/ntt_top.vf b/src/ntt_top/config/ntt_top.vf index 4dde389..9650b48 100644 --- a/src/ntt_top/config/ntt_top.vf +++ b/src/ntt_top/config/ntt_top.vf @@ -18,23 +18,38 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv -${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv \ No newline at end of file +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv \ No newline at end of file diff --git a/src/ntt_top/config/ntt_top_tb.vf b/src/ntt_top/config/ntt_top_tb.vf index bad17a2..03a4f80 100644 --- a/src/ntt_top/config/ntt_top_tb.vf +++ b/src/ntt_top/config/ntt_top_tb.vf @@ -19,24 +19,39 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_top_tb.sv \ No newline at end of file diff --git a/src/ntt_top/config/ntt_utb.vf b/src/ntt_top/config/ntt_utb.vf index 60ef46b..7add7da 100644 --- a/src/ntt_top/config/ntt_utb.vf +++ b/src/ntt_top/config/ntt_utb.vf @@ -22,11 +22,16 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv @@ -57,13 +62,23 @@ ${ADAMSBRIDGE_ROOT}/src/ntt_top/utb/sequences/ntt_virtual_seq.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/utb/tests/ntt_base_test.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/utb/tests/ntt_combined_test.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/utb/ntt_utb_top/ntt_utb_top.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv -${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv \ No newline at end of file +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv \ No newline at end of file diff --git a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv index 25fbe33..e7cc4db 100644 --- a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv +++ b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv @@ -282,7 +282,6 @@ end //---------------------------------------------------- //Masked PWMs - Used in masked PWM+INTT mode only - 207 clks //---------------------------------------------------- -// `ifdef MASKING ntt_masked_pwm #( .WIDTH(WIDTH) ) pwm_inst00 ( @@ -334,7 +333,7 @@ ntt_masked_pwm #( .rnd({rnd_i[2], rnd_i[1], rnd_i[0], rnd_i[4], rnd_i[3]}), .res(uv11_share) ); -// `endif + //---------------------------------------------------- //Masked BFU stage 1 - Used in masked PWM+INTT mode only - 260 clks //PWM outputs: uv00[1:0], uv01[1:0], uv10[1:0], uv11[1:0] @@ -349,7 +348,7 @@ ntt_masked_butterfly1x2 #( .rnd_i({rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1], rnd_i[0]}), .uv_o(masked_gs_stage1_uvo) ); -// `endif + //---------------------------------------------------- //Unmasked BFU stage 1 - Used in all other modes //---------------------------------------------------- diff --git a/src/pk_decode/config/pkdecode.vf b/src/pk_decode/config/pkdecode.vf index fe365fa..9fd8c7b 100644 --- a/src/pk_decode/config/pkdecode.vf +++ b/src/pk_decode/config/pkdecode.vf @@ -18,11 +18,17 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/pk_decode/rtl/pkdecode.sv \ No newline at end of file diff --git a/src/pk_decode/config/pkdecode_tb.vf b/src/pk_decode/config/pkdecode_tb.vf index 09a3129..8331a2d 100644 --- a/src/pk_decode/config/pkdecode_tb.vf +++ b/src/pk_decode/config/pkdecode_tb.vf @@ -19,12 +19,18 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/pk_decode/rtl/pkdecode.sv ${ADAMSBRIDGE_ROOT}/src/pk_decode/tb/pkdecode_tb.sv \ No newline at end of file diff --git a/src/power2round/config/power2round.vf b/src/power2round/config/power2round.vf index 81683de..9d82c23 100644 --- a/src/power2round/config/power2round.vf +++ b/src/power2round/config/power2round.vf @@ -18,13 +18,19 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/power2round/rtl/power2round_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/power2round/rtl/power2round_top.sv ${ADAMSBRIDGE_ROOT}/src/power2round/rtl/power2round_ctrl.sv diff --git a/src/power2round/config/power2round_tb.vf b/src/power2round/config/power2round_tb.vf index 1edb622..a5f21df 100644 --- a/src/power2round/config/power2round_tb.vf +++ b/src/power2round/config/power2round_tb.vf @@ -19,13 +19,19 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/power2round/rtl/power2round_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/power2round/rtl/power2round_top.sv ${ADAMSBRIDGE_ROOT}/src/power2round/rtl/power2round_ctrl.sv diff --git a/src/rej_bounded/config/rej_bounded.vf b/src/rej_bounded/config/rej_bounded.vf index a1dbbd8..2c25399 100644 --- a/src/rej_bounded/config/rej_bounded.vf +++ b/src/rej_bounded/config/rej_bounded.vf @@ -18,12 +18,18 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded2.sv \ No newline at end of file diff --git a/src/rej_bounded/config/rej_bounded_tb.vf b/src/rej_bounded/config/rej_bounded_tb.vf index 8fe004e..467f292 100644 --- a/src/rej_bounded/config/rej_bounded_tb.vf +++ b/src/rej_bounded/config/rej_bounded_tb.vf @@ -19,13 +19,19 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/tb/rej_bounded_tb.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/rej_bounded/rtl/rej_bounded2.sv \ No newline at end of file diff --git a/src/rej_sampler/config/rej_sampler.vf b/src/rej_sampler/config/rej_sampler.vf index a2bd53e..50f320e 100644 --- a/src/rej_sampler/config/rej_sampler.vf +++ b/src/rej_sampler/config/rej_sampler.vf @@ -18,12 +18,18 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/rej_sampler/rtl/rej_sampler_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/rej_sampler/rtl/rej_sampler.sv \ No newline at end of file diff --git a/src/rej_sampler/config/rej_sampler_tb.vf b/src/rej_sampler/config/rej_sampler_tb.vf index 4775bf9..8c44774 100644 --- a/src/rej_sampler/config/rej_sampler_tb.vf +++ b/src/rej_sampler/config/rej_sampler_tb.vf @@ -20,15 +20,21 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_sampler_top/rtl/mldsa_sampler_pkg.sv ${ADAMSBRIDGE_ROOT}/src/rej_sampler/tb/rej_sampler_tb.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/rej_sampler/rtl/rej_sampler_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/rej_sampler/rtl/rej_sampler.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_sampler_top/rtl/mldsa_sampler_pkg.sv \ No newline at end of file diff --git a/src/sample_in_ball/config/sample_in_ball.vf b/src/sample_in_ball/config/sample_in_ball.vf index ca353c5..1335f3d 100644 --- a/src/sample_in_ball/config/sample_in_ball.vf +++ b/src/sample_in_ball/config/sample_in_ball.vf @@ -18,15 +18,21 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sample_in_ball_pkg.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sib_mem.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sample_in_ball_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sample_in_ball_shuffler.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sample_in_ball.sv \ No newline at end of file diff --git a/src/sample_in_ball/config/sample_in_ball_tb.vf b/src/sample_in_ball/config/sample_in_ball_tb.vf index c65f5fc..d587627 100644 --- a/src/sample_in_ball/config/sample_in_ball_tb.vf +++ b/src/sample_in_ball/config/sample_in_ball_tb.vf @@ -20,17 +20,23 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sample_in_ball_pkg.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sib_mem.sv ${ADAMSBRIDGE_ROOT}/src/mldsa_sampler_top/rtl/mldsa_sampler_pkg.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/tb/sample_in_ball_tb.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sample_in_ball_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sample_in_ball_shuffler.sv ${ADAMSBRIDGE_ROOT}/src/sample_in_ball/rtl/sample_in_ball.sv diff --git a/src/sig_decode_z/config/sigdecode_z.vf b/src/sig_decode_z/config/sigdecode_z.vf index 2069b72..b860012 100644 --- a/src/sig_decode_z/config/sigdecode_z.vf +++ b/src/sig_decode_z/config/sigdecode_z.vf @@ -18,14 +18,20 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/sig_decode_z/rtl/sigdecode_z_defines_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sig_decode_z/rtl/sigdecode_z_top.sv ${ADAMSBRIDGE_ROOT}/src/sig_decode_z/rtl/sigdecode_z_unit.sv ${ADAMSBRIDGE_ROOT}/src/sig_decode_z/rtl/sigdecode_z_defines_pkg.sv \ No newline at end of file diff --git a/src/sig_decode_z/config/sigdecode_z_tb.vf b/src/sig_decode_z/config/sigdecode_z_tb.vf index 4b78cce..ba1a127 100644 --- a/src/sig_decode_z/config/sigdecode_z_tb.vf +++ b/src/sig_decode_z/config/sigdecode_z_tb.vf @@ -19,14 +19,20 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/sig_decode_z/rtl/sigdecode_z_defines_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sig_decode_z/rtl/sigdecode_z_top.sv ${ADAMSBRIDGE_ROOT}/src/sig_decode_z/rtl/sigdecode_z_unit.sv ${ADAMSBRIDGE_ROOT}/src/sig_decode_z/tb/sigdecode_z_tb.sv \ No newline at end of file diff --git a/src/sig_encode_z/config/sigencode_z.vf b/src/sig_encode_z/config/sigencode_z.vf index 0d9eecc..9e91502 100644 --- a/src/sig_encode_z/config/sigencode_z.vf +++ b/src/sig_encode_z/config/sigencode_z.vf @@ -19,27 +19,42 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sig_encode_z/rtl/sigencode_z_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/sig_encode_z/rtl/sigencode_z_top.sv ${ADAMSBRIDGE_ROOT}/src/sig_encode_z/rtl/sigencode_z_unit.sv ${ADAMSBRIDGE_ROOT}/src/sig_encode_z/rtl/sigencode_z_defines_pkg.sv \ No newline at end of file diff --git a/src/sig_encode_z/config/sigencode_z_tb.vf b/src/sig_encode_z/config/sigencode_z_tb.vf index c19f878..b861745 100644 --- a/src/sig_encode_z/config/sigencode_z_tb.vf +++ b/src/sig_encode_z/config/sigencode_z_tb.vf @@ -20,27 +20,42 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sig_encode_z/rtl/sigencode_z_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/sig_encode_z/rtl/sigencode_z_top.sv ${ADAMSBRIDGE_ROOT}/src/sig_encode_z/rtl/sigencode_z_unit.sv ${ADAMSBRIDGE_ROOT}/src/sig_encode_z/tb/sigencode_z_tb.sv \ No newline at end of file diff --git a/src/sigdecode_h/config/sigdecode_h.vf b/src/sigdecode_h/config/sigdecode_h.vf index b82d677..ee077df 100644 --- a/src/sigdecode_h/config/sigdecode_h.vf +++ b/src/sigdecode_h/config/sigdecode_h.vf @@ -18,13 +18,19 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sigdecode_h/rtl/sigdecode_h_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/sigdecode_h/rtl/sigdecode_h.sv ${ADAMSBRIDGE_ROOT}/src/sigdecode_h/rtl/sigdecode_h_ctrl.sv \ No newline at end of file diff --git a/src/sigdecode_h/config/sigdecode_h_tb.vf b/src/sigdecode_h/config/sigdecode_h_tb.vf index 6ca963c..d8b8637 100644 --- a/src/sigdecode_h/config/sigdecode_h_tb.vf +++ b/src/sigdecode_h/config/sigdecode_h_tb.vf @@ -19,13 +19,19 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sigdecode_h/rtl/sigdecode_h_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/sigdecode_h/rtl/sigdecode_h.sv ${ADAMSBRIDGE_ROOT}/src/sigdecode_h/rtl/sigdecode_h_ctrl.sv diff --git a/src/sk_decode/config/skdecode.vf b/src/sk_decode/config/skdecode.vf index b0cb8f2..4c75998 100644 --- a/src/sk_decode/config/skdecode.vf +++ b/src/sk_decode/config/skdecode.vf @@ -19,26 +19,41 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/sk_decode/rtl/skdecode_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/sk_decode/rtl/skdecode_top.sv ${ADAMSBRIDGE_ROOT}/src/sk_decode/rtl/skdecode_ctrl.sv diff --git a/src/sk_decode/config/skdecode_tb.vf b/src/sk_decode/config/skdecode_tb.vf index 0fa50ca..20d1df5 100644 --- a/src/sk_decode/config/skdecode_tb.vf +++ b/src/sk_decode/config/skdecode_tb.vf @@ -20,26 +20,41 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly2x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_dsp.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_mult_reduction.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_special_adder.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_mult_redux46.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_div2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_buffer.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_shuffle_buffer.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_twiddle_lookup.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_ctrl.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_top.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_add_sub.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv ${ADAMSBRIDGE_ROOT}/src/sk_decode/rtl/skdecode_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/sk_decode/rtl/skdecode_top.sv ${ADAMSBRIDGE_ROOT}/src/sk_decode/rtl/skdecode_ctrl.sv diff --git a/src/sk_encode/config/skencode.vf b/src/sk_encode/config/skencode.vf index 66793f8..06599f9 100644 --- a/src/sk_encode/config/skencode.vf +++ b/src/sk_encode/config/skencode.vf @@ -19,12 +19,18 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/sk_decode/rtl/skdecode_defines_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sk_encode/rtl/skencode.sv \ No newline at end of file diff --git a/src/sk_encode/config/skencode_tb.vf b/src/sk_encode/config/skencode_tb.vf index 118c18c..2c705bd 100644 --- a/src/sk_encode/config/skencode_tb.vf +++ b/src/sk_encode/config/skencode_tb.vf @@ -20,13 +20,19 @@ ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_sample_buffer.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_defines_pkg.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_ahb_slv_sif.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_AND.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_delay_masked_shares.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_add_sub_mod_Boolean.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_MUX.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_sub.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_full_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_A2B_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Boolean_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_B2A_conv.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_mult_two_share.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_adder.sv ${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_add_sub_mod.sv ${ADAMSBRIDGE_ROOT}/src/sk_decode/rtl/skdecode_defines_pkg.sv +${ADAMSBRIDGE_ROOT}/src/abr_libs/rtl/abr_masked_N_bit_Arith_adder.sv ${ADAMSBRIDGE_ROOT}/src/sk_encode/rtl/skencode.sv ${ADAMSBRIDGE_ROOT}/src/sk_encode/tb/skencode_tb.sv \ No newline at end of file From 42248d4ac7ee2b14fdb1f9ad3adaab5657e5e8a0 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Wed, 20 Nov 2024 20:01:54 +0000 Subject: [PATCH 23/23] MICROSOFT AUTOMATED PIPELINE: Stamp 'user/dev/kupadhyayula/ntt_masking' with updated timestamp and hash after successful run --- .github/workflow_metadata/pr_hash | 2 +- .github/workflow_metadata/pr_timestamp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflow_metadata/pr_hash b/.github/workflow_metadata/pr_hash index 76469fb..55c2c5c 100644 --- a/.github/workflow_metadata/pr_hash +++ b/.github/workflow_metadata/pr_hash @@ -1 +1 @@ -61ff2cd5f5fc580fa4d885c8026ab0a4e3bb62ffd6a16b562da228e85b6af89fc13267658403e2388a14cad520c74d34 \ No newline at end of file +17b0590f7883394cd4248a86a3ecb1bc6562091deb33ea19c59735438033e8c65d8971a69f04ceab1bc480f1dfe4623f \ No newline at end of file diff --git a/.github/workflow_metadata/pr_timestamp b/.github/workflow_metadata/pr_timestamp index 8d4e1ec..5baac5c 100644 --- a/.github/workflow_metadata/pr_timestamp +++ b/.github/workflow_metadata/pr_timestamp @@ -1 +1 @@ -1732130263 \ No newline at end of file +1732132891 \ No newline at end of file