From ab4e0b34637095116ac139f6a16f24543ade96ed Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Wed, 17 Apr 2024 11:02:33 +0200 Subject: [PATCH 01/27] automate stuff --- Makefile | 2 +- hw/core-v-mini-mcu/core_v_mini_mcu.sv | 344 ++++++++++----------- hw/core-v-mini-mcu/peripheral_subsystem.sv | 16 +- riscv-gnu-toolchain | 1 + run.sh | 4 + sw/applications/app/main.c | 28 ++ verilator | 1 + 7 files changed, 214 insertions(+), 182 deletions(-) create mode 160000 riscv-gnu-toolchain create mode 100755 run.sh create mode 100644 sw/applications/app/main.c create mode 160000 verilator diff --git a/Makefile b/Makefile index cef2e5c06..824e69a45 100644 --- a/Makefile +++ b/Makefile @@ -128,7 +128,7 @@ mcu-gen-help: ## Runs verible formating verible: - util/format-verible; +# util/format-verible; ## @section APP FW Build diff --git a/hw/core-v-mini-mcu/core_v_mini_mcu.sv b/hw/core-v-mini-mcu/core_v_mini_mcu.sv index 45f21d2c7..d5f696e30 100644 --- a/hw/core-v-mini-mcu/core_v_mini_mcu.sv +++ b/hw/core-v-mini-mcu/core_v_mini_mcu.sv @@ -43,229 +43,229 @@ module core_v_mini_mcu output logic exit_valid_o, output logic gpio_0_o, - input logic gpio_0_i, + input logic gpio_0_i, output logic gpio_0_oe_o, output logic gpio_1_o, - input logic gpio_1_i, + input logic gpio_1_i, output logic gpio_1_oe_o, output logic gpio_2_o, - input logic gpio_2_i, + input logic gpio_2_i, output logic gpio_2_oe_o, output logic gpio_3_o, - input logic gpio_3_i, + input logic gpio_3_i, output logic gpio_3_oe_o, output logic gpio_4_o, - input logic gpio_4_i, + input logic gpio_4_i, output logic gpio_4_oe_o, output logic gpio_5_o, - input logic gpio_5_i, + input logic gpio_5_i, output logic gpio_5_oe_o, output logic gpio_6_o, - input logic gpio_6_i, + input logic gpio_6_i, output logic gpio_6_oe_o, output logic gpio_7_o, - input logic gpio_7_i, + input logic gpio_7_i, output logic gpio_7_oe_o, output logic gpio_8_o, - input logic gpio_8_i, + input logic gpio_8_i, output logic gpio_8_oe_o, output logic gpio_9_o, - input logic gpio_9_i, + input logic gpio_9_i, output logic gpio_9_oe_o, output logic gpio_10_o, - input logic gpio_10_i, + input logic gpio_10_i, output logic gpio_10_oe_o, output logic gpio_11_o, - input logic gpio_11_i, + input logic gpio_11_i, output logic gpio_11_oe_o, output logic gpio_12_o, - input logic gpio_12_i, + input logic gpio_12_i, output logic gpio_12_oe_o, output logic gpio_13_o, - input logic gpio_13_i, + input logic gpio_13_i, output logic gpio_13_oe_o, output logic gpio_14_o, - input logic gpio_14_i, + input logic gpio_14_i, output logic gpio_14_oe_o, output logic gpio_15_o, - input logic gpio_15_i, + input logic gpio_15_i, output logic gpio_15_oe_o, output logic gpio_16_o, - input logic gpio_16_i, + input logic gpio_16_i, output logic gpio_16_oe_o, output logic gpio_17_o, - input logic gpio_17_i, + input logic gpio_17_i, output logic gpio_17_oe_o, output logic spi_flash_sck_o, - input logic spi_flash_sck_i, + input logic spi_flash_sck_i, output logic spi_flash_sck_oe_o, output logic spi_flash_cs_0_o, - input logic spi_flash_cs_0_i, + input logic spi_flash_cs_0_i, output logic spi_flash_cs_0_oe_o, output logic spi_flash_cs_1_o, - input logic spi_flash_cs_1_i, + input logic spi_flash_cs_1_i, output logic spi_flash_cs_1_oe_o, output logic spi_flash_sd_0_o, - input logic spi_flash_sd_0_i, + input logic spi_flash_sd_0_i, output logic spi_flash_sd_0_oe_o, output logic spi_flash_sd_1_o, - input logic spi_flash_sd_1_i, + input logic spi_flash_sd_1_i, output logic spi_flash_sd_1_oe_o, output logic spi_flash_sd_2_o, - input logic spi_flash_sd_2_i, + input logic spi_flash_sd_2_i, output logic spi_flash_sd_2_oe_o, output logic spi_flash_sd_3_o, - input logic spi_flash_sd_3_i, + input logic spi_flash_sd_3_i, output logic spi_flash_sd_3_oe_o, output logic spi_sck_o, - input logic spi_sck_i, + input logic spi_sck_i, output logic spi_sck_oe_o, output logic spi_cs_0_o, - input logic spi_cs_0_i, + input logic spi_cs_0_i, output logic spi_cs_0_oe_o, output logic spi_cs_1_o, - input logic spi_cs_1_i, + input logic spi_cs_1_i, output logic spi_cs_1_oe_o, output logic spi_sd_0_o, - input logic spi_sd_0_i, + input logic spi_sd_0_i, output logic spi_sd_0_oe_o, output logic spi_sd_1_o, - input logic spi_sd_1_i, + input logic spi_sd_1_i, output logic spi_sd_1_oe_o, output logic spi_sd_2_o, - input logic spi_sd_2_i, + input logic spi_sd_2_i, output logic spi_sd_2_oe_o, output logic spi_sd_3_o, - input logic spi_sd_3_i, + input logic spi_sd_3_i, output logic spi_sd_3_oe_o, output logic pdm2pcm_pdm_o, - input logic pdm2pcm_pdm_i, + input logic pdm2pcm_pdm_i, output logic pdm2pcm_pdm_oe_o, output logic gpio_18_o, - input logic gpio_18_i, + input logic gpio_18_i, output logic gpio_18_oe_o, output logic pdm2pcm_clk_o, - input logic pdm2pcm_clk_i, + input logic pdm2pcm_clk_i, output logic pdm2pcm_clk_oe_o, output logic gpio_19_o, - input logic gpio_19_i, + input logic gpio_19_i, output logic gpio_19_oe_o, output logic i2s_sck_o, - input logic i2s_sck_i, + input logic i2s_sck_i, output logic i2s_sck_oe_o, output logic gpio_20_o, - input logic gpio_20_i, + input logic gpio_20_i, output logic gpio_20_oe_o, output logic i2s_ws_o, - input logic i2s_ws_i, + input logic i2s_ws_i, output logic i2s_ws_oe_o, output logic gpio_21_o, - input logic gpio_21_i, + input logic gpio_21_i, output logic gpio_21_oe_o, output logic i2s_sd_o, - input logic i2s_sd_i, + input logic i2s_sd_i, output logic i2s_sd_oe_o, output logic gpio_22_o, - input logic gpio_22_i, + input logic gpio_22_i, output logic gpio_22_oe_o, output logic spi2_cs_0_o, - input logic spi2_cs_0_i, + input logic spi2_cs_0_i, output logic spi2_cs_0_oe_o, output logic gpio_23_o, - input logic gpio_23_i, + input logic gpio_23_i, output logic gpio_23_oe_o, output logic spi2_cs_1_o, - input logic spi2_cs_1_i, + input logic spi2_cs_1_i, output logic spi2_cs_1_oe_o, output logic gpio_24_o, - input logic gpio_24_i, + input logic gpio_24_i, output logic gpio_24_oe_o, output logic spi2_sck_o, - input logic spi2_sck_i, + input logic spi2_sck_i, output logic spi2_sck_oe_o, output logic gpio_25_o, - input logic gpio_25_i, + input logic gpio_25_i, output logic gpio_25_oe_o, output logic spi2_sd_0_o, - input logic spi2_sd_0_i, + input logic spi2_sd_0_i, output logic spi2_sd_0_oe_o, output logic gpio_26_o, - input logic gpio_26_i, + input logic gpio_26_i, output logic gpio_26_oe_o, output logic spi2_sd_1_o, - input logic spi2_sd_1_i, + input logic spi2_sd_1_i, output logic spi2_sd_1_oe_o, output logic gpio_27_o, - input logic gpio_27_i, + input logic gpio_27_i, output logic gpio_27_oe_o, output logic spi2_sd_2_o, - input logic spi2_sd_2_i, + input logic spi2_sd_2_i, output logic spi2_sd_2_oe_o, output logic gpio_28_o, - input logic gpio_28_i, + input logic gpio_28_i, output logic gpio_28_oe_o, output logic spi2_sd_3_o, - input logic spi2_sd_3_i, + input logic spi2_sd_3_i, output logic spi2_sd_3_oe_o, output logic gpio_29_o, - input logic gpio_29_i, + input logic gpio_29_i, output logic gpio_29_oe_o, output logic i2c_scl_o, - input logic i2c_scl_i, + input logic i2c_scl_i, output logic i2c_scl_oe_o, output logic gpio_31_o, - input logic gpio_31_i, + input logic gpio_31_i, output logic gpio_31_oe_o, output logic i2c_sda_o, - input logic i2c_sda_i, + input logic i2c_sda_i, output logic i2c_sda_oe_o, output logic gpio_30_o, - input logic gpio_30_i, + input logic gpio_30_i, output logic gpio_30_oe_o, @@ -303,13 +303,13 @@ module core_v_mini_mcu input logic [NEXT_INT_RND-1:0] intr_vector_ext_i, output logic cpu_subsystem_powergate_switch_no, - input logic cpu_subsystem_powergate_switch_ack_ni, + input logic cpu_subsystem_powergate_switch_ack_ni, output logic peripheral_subsystem_powergate_switch_no, - input logic peripheral_subsystem_powergate_switch_ack_ni, + input logic peripheral_subsystem_powergate_switch_ack_ni, output logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_no, input logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_ack_ni, output logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_switch_no, - input logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_switch_ack_ni, + input logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_switch_ack_ni, output logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_iso_no, output logic [EXT_DOMAINS_RND-1:0] external_subsystem_rst_no, output logic [EXT_DOMAINS_RND-1:0] external_ram_banks_set_retentive_no, @@ -399,9 +399,9 @@ module core_v_mini_mcu logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_iso_n; logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_set_retentive_n; - // Clock gating signals - logic peripheral_subsystem_clkgate_en_n; - logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_clkgate_en_n; + // Clock gating signals + logic peripheral_subsystem_clkgate_en_n; + logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0]memory_subsystem_clkgate_en_n; // DMA logic dma_done_intr; @@ -564,20 +564,18 @@ module core_v_mini_mcu .spimemio_resp_o(flash_mem_slave_resp), .spi_flash_sck_o, .spi_flash_sck_en_o(spi_flash_sck_oe_o), - .spi_flash_csb_o({spi_flash_cs_1_o, spi_flash_cs_0_o}), + .spi_flash_csb_o({spi_flash_cs_1_o,spi_flash_cs_0_o}), .spi_flash_csb_en_o({spi_flash_cs_1_oe_o, spi_flash_cs_0_oe_o}), - .spi_flash_sd_o({spi_flash_sd_3_o, spi_flash_sd_2_o, spi_flash_sd_1_o, spi_flash_sd_0_o}), - .spi_flash_sd_en_o({ - spi_flash_sd_3_oe_o, spi_flash_sd_2_oe_o, spi_flash_sd_1_oe_o, spi_flash_sd_0_oe_o - }), - .spi_flash_sd_i({spi_flash_sd_3_i, spi_flash_sd_2_i, spi_flash_sd_1_i, spi_flash_sd_0_i}), + .spi_flash_sd_o({spi_flash_sd_3_o,spi_flash_sd_2_o, spi_flash_sd_1_o, spi_flash_sd_0_o}), + .spi_flash_sd_en_o({spi_flash_sd_3_oe_o,spi_flash_sd_2_oe_o, spi_flash_sd_1_oe_o, spi_flash_sd_0_oe_o}), + .spi_flash_sd_i({spi_flash_sd_3_i,spi_flash_sd_2_i, spi_flash_sd_1_i, spi_flash_sd_0_i}), .spi_sck_o, .spi_sck_en_o(spi_sck_oe_o), - .spi_csb_o({spi_cs_1_o, spi_cs_0_o}), + .spi_csb_o({spi_cs_1_o,spi_cs_0_o}), .spi_csb_en_o({spi_cs_1_oe_o, spi_cs_0_oe_o}), - .spi_sd_o({spi_sd_3_o, spi_sd_2_o, spi_sd_1_o, spi_sd_0_o}), - .spi_sd_en_o({spi_sd_3_oe_o, spi_sd_2_oe_o, spi_sd_1_oe_o, spi_sd_0_oe_o}), - .spi_sd_i({spi_sd_3_i, spi_sd_2_i, spi_sd_1_i, spi_sd_0_i}), + .spi_sd_o({spi_sd_3_o,spi_sd_2_o, spi_sd_1_o, spi_sd_0_o}), + .spi_sd_en_o({spi_sd_3_oe_o,spi_sd_2_oe_o, spi_sd_1_oe_o, spi_sd_0_oe_o}), + .spi_sd_i({spi_sd_3_i,spi_sd_2_i, spi_sd_1_i, spi_sd_0_i}), .intr_i(intr), .intr_vector_ext_i, .core_sleep_i(core_sleep), @@ -689,104 +687,104 @@ module core_v_mini_mcu .i2s_rx_valid_o(i2s_rx_valid) ); - assign pdm2pcm_pdm_o = 0; + assign pdm2pcm_pdm_o = 0; assign pdm2pcm_pdm_oe_o = 0; - assign gpio_ao_in[0] = gpio_0_i; - assign gpio_0_o = gpio_ao_out[0]; - assign gpio_0_oe_o = gpio_ao_oe[0]; - assign gpio_ao_in[1] = gpio_1_i; - assign gpio_1_o = gpio_ao_out[1]; - assign gpio_1_oe_o = gpio_ao_oe[1]; - assign gpio_ao_in[2] = gpio_2_i; - assign gpio_2_o = gpio_ao_out[2]; - assign gpio_2_oe_o = gpio_ao_oe[2]; - assign gpio_ao_in[3] = gpio_3_i; - assign gpio_3_o = gpio_ao_out[3]; - assign gpio_3_oe_o = gpio_ao_oe[3]; - assign gpio_ao_in[4] = gpio_4_i; - assign gpio_4_o = gpio_ao_out[4]; - assign gpio_4_oe_o = gpio_ao_oe[4]; - assign gpio_ao_in[5] = gpio_5_i; - assign gpio_5_o = gpio_ao_out[5]; - assign gpio_5_oe_o = gpio_ao_oe[5]; - assign gpio_ao_in[6] = gpio_6_i; - assign gpio_6_o = gpio_ao_out[6]; - assign gpio_6_oe_o = gpio_ao_oe[6]; - assign gpio_ao_in[7] = gpio_7_i; - assign gpio_7_o = gpio_ao_out[7]; - assign gpio_7_oe_o = gpio_ao_oe[7]; - assign gpio_in[8] = gpio_8_i; - assign gpio_8_o = gpio_out[8]; - assign gpio_8_oe_o = gpio_oe[8]; - assign gpio_in[9] = gpio_9_i; - assign gpio_9_o = gpio_out[9]; - assign gpio_9_oe_o = gpio_oe[9]; - assign gpio_in[10] = gpio_10_i; - assign gpio_10_o = gpio_out[10]; - assign gpio_10_oe_o = gpio_oe[10]; - assign gpio_in[11] = gpio_11_i; - assign gpio_11_o = gpio_out[11]; - assign gpio_11_oe_o = gpio_oe[11]; - assign gpio_in[12] = gpio_12_i; - assign gpio_12_o = gpio_out[12]; - assign gpio_12_oe_o = gpio_oe[12]; - assign gpio_in[13] = gpio_13_i; - assign gpio_13_o = gpio_out[13]; - assign gpio_13_oe_o = gpio_oe[13]; - assign gpio_in[14] = gpio_14_i; - assign gpio_14_o = gpio_out[14]; - assign gpio_14_oe_o = gpio_oe[14]; - assign gpio_in[15] = gpio_15_i; - assign gpio_15_o = gpio_out[15]; - assign gpio_15_oe_o = gpio_oe[15]; - assign gpio_in[16] = gpio_16_i; - assign gpio_16_o = gpio_out[16]; - assign gpio_16_oe_o = gpio_oe[16]; - assign gpio_in[17] = gpio_17_i; - assign gpio_17_o = gpio_out[17]; - assign gpio_17_oe_o = gpio_oe[17]; - assign gpio_in[18] = gpio_18_i; - assign gpio_18_o = gpio_out[18]; - assign gpio_18_oe_o = gpio_oe[18]; - assign gpio_in[19] = gpio_19_i; - assign gpio_19_o = gpio_out[19]; - assign gpio_19_oe_o = gpio_oe[19]; - assign gpio_in[20] = gpio_20_i; - assign gpio_20_o = gpio_out[20]; - assign gpio_20_oe_o = gpio_oe[20]; - assign gpio_in[21] = gpio_21_i; - assign gpio_21_o = gpio_out[21]; - assign gpio_21_oe_o = gpio_oe[21]; - assign gpio_in[22] = gpio_22_i; - assign gpio_22_o = gpio_out[22]; - assign gpio_22_oe_o = gpio_oe[22]; - assign gpio_in[23] = gpio_23_i; - assign gpio_23_o = gpio_out[23]; - assign gpio_23_oe_o = gpio_oe[23]; - assign gpio_in[24] = gpio_24_i; - assign gpio_24_o = gpio_out[24]; - assign gpio_24_oe_o = gpio_oe[24]; - assign gpio_in[25] = gpio_25_i; - assign gpio_25_o = gpio_out[25]; - assign gpio_25_oe_o = gpio_oe[25]; - assign gpio_in[26] = gpio_26_i; - assign gpio_26_o = gpio_out[26]; - assign gpio_26_oe_o = gpio_oe[26]; - assign gpio_in[27] = gpio_27_i; - assign gpio_27_o = gpio_out[27]; - assign gpio_27_oe_o = gpio_oe[27]; - assign gpio_in[28] = gpio_28_i; - assign gpio_28_o = gpio_out[28]; - assign gpio_28_oe_o = gpio_oe[28]; - assign gpio_in[29] = gpio_29_i; - assign gpio_29_o = gpio_out[29]; - assign gpio_29_oe_o = gpio_oe[29]; - assign gpio_in[30] = gpio_30_i; - assign gpio_30_o = gpio_out[30]; - assign gpio_30_oe_o = gpio_oe[30]; - assign gpio_in[31] = gpio_31_i; - assign gpio_31_o = gpio_out[31]; - assign gpio_31_oe_o = gpio_oe[31]; + assign gpio_ao_in[0] = gpio_0_i; + assign gpio_0_o = gpio_ao_out[0]; + assign gpio_0_oe_o = gpio_ao_oe[0]; + assign gpio_ao_in[1] = gpio_1_i; + assign gpio_1_o = gpio_ao_out[1]; + assign gpio_1_oe_o = gpio_ao_oe[1]; + assign gpio_ao_in[2] = gpio_2_i; + assign gpio_2_o = gpio_ao_out[2]; + assign gpio_2_oe_o = gpio_ao_oe[2]; + assign gpio_ao_in[3] = gpio_3_i; + assign gpio_3_o = gpio_ao_out[3]; + assign gpio_3_oe_o = gpio_ao_oe[3]; + assign gpio_ao_in[4] = gpio_4_i; + assign gpio_4_o = gpio_ao_out[4]; + assign gpio_4_oe_o = gpio_ao_oe[4]; + assign gpio_ao_in[5] = gpio_5_i; + assign gpio_5_o = gpio_ao_out[5]; + assign gpio_5_oe_o = gpio_ao_oe[5]; + assign gpio_ao_in[6] = gpio_6_i; + assign gpio_6_o = gpio_ao_out[6]; + assign gpio_6_oe_o = gpio_ao_oe[6]; + assign gpio_ao_in[7] = gpio_7_i; + assign gpio_7_o = gpio_ao_out[7]; + assign gpio_7_oe_o = gpio_ao_oe[7]; + assign gpio_in[8] = gpio_8_i; + assign gpio_8_o = gpio_out[8]; + assign gpio_8_oe_o = gpio_oe[8]; + assign gpio_in[9] = gpio_9_i; + assign gpio_9_o = gpio_out[9]; + assign gpio_9_oe_o = gpio_oe[9]; + assign gpio_in[10] = gpio_10_i; + assign gpio_10_o = gpio_out[10]; + assign gpio_10_oe_o = gpio_oe[10]; + assign gpio_in[11] = gpio_11_i; + assign gpio_11_o = gpio_out[11]; + assign gpio_11_oe_o = gpio_oe[11]; + assign gpio_in[12] = gpio_12_i; + assign gpio_12_o = gpio_out[12]; + assign gpio_12_oe_o = gpio_oe[12]; + assign gpio_in[13] = gpio_13_i; + assign gpio_13_o = gpio_out[13]; + assign gpio_13_oe_o = gpio_oe[13]; + assign gpio_in[14] = gpio_14_i; + assign gpio_14_o = gpio_out[14]; + assign gpio_14_oe_o = gpio_oe[14]; + assign gpio_in[15] = gpio_15_i; + assign gpio_15_o = gpio_out[15]; + assign gpio_15_oe_o = gpio_oe[15]; + assign gpio_in[16] = gpio_16_i; + assign gpio_16_o = gpio_out[16]; + assign gpio_16_oe_o = gpio_oe[16]; + assign gpio_in[17] = gpio_17_i; + assign gpio_17_o = gpio_out[17]; + assign gpio_17_oe_o = gpio_oe[17]; + assign gpio_in[18] = gpio_18_i; + assign gpio_18_o = gpio_out[18]; + assign gpio_18_oe_o = gpio_oe[18]; + assign gpio_in[19] = gpio_19_i; + assign gpio_19_o = gpio_out[19]; + assign gpio_19_oe_o = gpio_oe[19]; + assign gpio_in[20] = gpio_20_i; + assign gpio_20_o = gpio_out[20]; + assign gpio_20_oe_o = gpio_oe[20]; + assign gpio_in[21] = gpio_21_i; + assign gpio_21_o = gpio_out[21]; + assign gpio_21_oe_o = gpio_oe[21]; + assign gpio_in[22] = gpio_22_i; + assign gpio_22_o = gpio_out[22]; + assign gpio_22_oe_o = gpio_oe[22]; + assign gpio_in[23] = gpio_23_i; + assign gpio_23_o = gpio_out[23]; + assign gpio_23_oe_o = gpio_oe[23]; + assign gpio_in[24] = gpio_24_i; + assign gpio_24_o = gpio_out[24]; + assign gpio_24_oe_o = gpio_oe[24]; + assign gpio_in[25] = gpio_25_i; + assign gpio_25_o = gpio_out[25]; + assign gpio_25_oe_o = gpio_oe[25]; + assign gpio_in[26] = gpio_26_i; + assign gpio_26_o = gpio_out[26]; + assign gpio_26_oe_o = gpio_oe[26]; + assign gpio_in[27] = gpio_27_i; + assign gpio_27_o = gpio_out[27]; + assign gpio_27_oe_o = gpio_oe[27]; + assign gpio_in[28] = gpio_28_i; + assign gpio_28_o = gpio_out[28]; + assign gpio_28_oe_o = gpio_oe[28]; + assign gpio_in[29] = gpio_29_i; + assign gpio_29_o = gpio_out[29]; + assign gpio_29_oe_o = gpio_oe[29]; + assign gpio_in[30] = gpio_30_i; + assign gpio_30_o = gpio_out[30]; + assign gpio_30_oe_o = gpio_oe[30]; + assign gpio_in[31] = gpio_31_i; + assign gpio_31_o = gpio_out[31]; + assign gpio_31_oe_o = gpio_oe[31]; endmodule // core_v_mini_mcu diff --git a/hw/core-v-mini-mcu/peripheral_subsystem.sv b/hw/core-v-mini-mcu/peripheral_subsystem.sv index 5c28e195d..625c91045 100644 --- a/hw/core-v-mini-mcu/peripheral_subsystem.sv +++ b/hw/core-v-mini-mcu/peripheral_subsystem.sv @@ -7,7 +7,7 @@ module peripheral_subsystem import reg_pkg::*; #( //do not touch these parameters - parameter NEXT_INT_RND = core_v_mini_mcu_pkg::NEXT_INT == 0 ? 1 : core_v_mini_mcu_pkg::NEXT_INT + parameter NEXT_INT_RND = core_v_mini_mcu_pkg::NEXT_INT == 0 ? 1 : core_v_mini_mcu_pkg::NEXT_INT ) ( input logic clk_i, input logic rst_ni, @@ -20,8 +20,8 @@ module peripheral_subsystem //PLIC input logic [NEXT_INT_RND-1:0] intr_vector_ext_i, - output logic irq_plic_o, - output logic msip_o, + output logic irq_plic_o, + output logic msip_o, //UART PLIC interrupts input logic uart_intr_tx_watermark_i, @@ -160,7 +160,7 @@ module peripheral_subsystem assign intr_vector[48] = i2c_intr_host_timeout; assign intr_vector[49] = spi2_intr_event; assign intr_vector[50] = i2s_intr_event; - assign intr_vector[51] = dma_window_intr_i; + assign intr_vector[51] = dma_window_intr_i; // External interrupts assignement for (genvar i = 0; i < NEXT_INT; i++) begin @@ -190,18 +190,18 @@ module peripheral_subsystem `else - obi_pkg::obi_req_t slave_fifoin_req; + obi_pkg::obi_req_t slave_fifoin_req; obi_pkg::obi_resp_t slave_fifoin_resp; - obi_pkg::obi_req_t slave_fifoout_req; + obi_pkg::obi_req_t slave_fifoout_req; obi_pkg::obi_resp_t slave_fifoout_resp; obi_fifo obi_fifo_i ( .clk_i(clk_cg), .rst_ni, - .producer_req_i(slave_fifoin_req), + .producer_req_i (slave_fifoin_req), .producer_resp_o(slave_fifoin_resp), - .consumer_req_o(slave_fifoout_req), + .consumer_req_o (slave_fifoout_req), .consumer_resp_i(slave_fifoout_resp) ); diff --git a/riscv-gnu-toolchain b/riscv-gnu-toolchain new file mode 160000 index 000000000..f640044a9 --- /dev/null +++ b/riscv-gnu-toolchain @@ -0,0 +1 @@ +Subproject commit f640044a947afb39c78b96fa1ba1db8aa31b1d89 diff --git a/run.sh b/run.sh new file mode 100755 index 000000000..05784b4a2 --- /dev/null +++ b/run.sh @@ -0,0 +1,4 @@ +conda activate core-v-mini-mcu +export RISCV=/home/linus/tools/riscv +make app PROJECT=app +make run-helloworld \ No newline at end of file diff --git a/sw/applications/app/main.c b/sw/applications/app/main.c new file mode 100644 index 000000000..d4c58adf0 --- /dev/null +++ b/sw/applications/app/main.c @@ -0,0 +1,28 @@ +/* + * Copyright 2020 ETH Zurich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: Robert Balas + */ + +#include +#include + +int main(int argc, char *argv[]) +{ + /* write something to stdout */ + printf("Linus custom app\n"); + return EXIT_SUCCESS; +} + diff --git a/verilator b/verilator new file mode 160000 index 000000000..8e2ba6a00 --- /dev/null +++ b/verilator @@ -0,0 +1 @@ +Subproject commit 8e2ba6a00382075387b32fbbf8f5f85fec482d9a From 8fc07c48b9a8df5d296ef81b1314874e793fa383 Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Wed, 17 Apr 2024 11:14:10 +0200 Subject: [PATCH 02/27] add custom assertion and run first test --- sw/applications/app/fxp32.c | 48 ++++++++++++++++++++++ sw/applications/app/fxp32.h | 24 +++++++++++ sw/applications/app/main.c | 79 ++++++++++++++++++++++++++----------- 3 files changed, 127 insertions(+), 24 deletions(-) create mode 100644 sw/applications/app/fxp32.c create mode 100644 sw/applications/app/fxp32.h diff --git a/sw/applications/app/fxp32.c b/sw/applications/app/fxp32.c new file mode 100644 index 000000000..20f899928 --- /dev/null +++ b/sw/applications/app/fxp32.c @@ -0,0 +1,48 @@ +#include "fxp32.h" + +fxp32 fxp32_fromFloat(float f) { + return (fxp32)(f * (1 << FRACTIONAL_BITS)); +} + +float fxp32_toFloat(fxp32 x) { + return (float)x / (1 << FRACTIONAL_BITS); +} + +fxp32 fxp32_fromInt(int i) { + return i << FRACTIONAL_BITS; +} + +fxp32 fxp32_mul(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS); +} + +fxpMul fxp32_mul64(fxp32 a, fxp32 b) { + return ((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS; +} + +fxp32 fxp32_div(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a << FRACTIONAL_BITS) / b); +} + +bool fxp32_closefxp(fxp32 a, fxp32 b, fxp32 prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= prec; +} + +fxp32 fxp32_sqrt(fxp32 a) { + fxp32 guess = a >> 1; + fxp32 eps = fxp32_fromFloat(0.0001); + while (fxp32_closefxp(guess, fxp32_div(a, guess), eps) == false) { + guess = (guess + fxp32_div(a, guess)) >> 1; + } + return guess; +} + +bool fxp32_close(fxp32 a, fxp32 b, float prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= fxp32_fromFloat(prec); +} diff --git a/sw/applications/app/fxp32.h b/sw/applications/app/fxp32.h new file mode 100644 index 000000000..169a287b8 --- /dev/null +++ b/sw/applications/app/fxp32.h @@ -0,0 +1,24 @@ +#ifndef FXP_H +#define FXP_H + +#include +#include + +#define FRACTIONAL_BITS 23 +typedef int32_t fxp32; +typedef int64_t fxpMul; + +fxp32 fxp32_fromFloat(float f); +float fxp32_toFloat(fxp32 x); +fxp32 fxp32_fromInt(int i); + +fxp32 fxp32_mul(fxp32 a, fxp32 b); +fxpMul fxp32_mul64(fxp32 a, fxp32 b); + +fxp32 fxp32_div(fxp32 a, fxp32 b); + +fxp32 fxp32_sqrt(fxp32 a); + +bool fxp32_close(fxp32 a, fxp32 b, float prec); + +#endif // FXP_H \ No newline at end of file diff --git a/sw/applications/app/main.c b/sw/applications/app/main.c index d4c58adf0..e38fa9923 100644 --- a/sw/applications/app/main.c +++ b/sw/applications/app/main.c @@ -1,28 +1,59 @@ -/* - * Copyright 2020 ETH Zurich - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Author: Robert Balas - */ - -#include +#include "fxp32.h" +#include #include +#include +#include +#include -int main(int argc, char *argv[]) -{ - /* write something to stdout */ - printf("Linus custom app\n"); - return EXIT_SUCCESS; +#define COMP_PREC 0.000001f + +#define my_assert(condition) \ + do { \ + if (!(condition)) { \ + fprintf(stderr, "Assertion failed: %s\n"); \ + fprintf(stderr, "File: %s\n", __FILE__); \ + fprintf(stderr, "Line: %d\n", __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +void test_fxp_basic() { + float f1 = 3.14159286537; + float f2 = 2.71828723519; + + // Convert floats to fxp32 + fxp32 x = fxp32_fromFloat(f1); + fxp32 y = fxp32_fromFloat(f2); + + // Perform addition and multiplication + fxp32 result_add = x + y; + fxp32 result_multiply = fxp32_mul(x, y); + fxp32 result_divide = fxp32_div(x, y); + + // Convert fxp32 back to floats + float result_add_float = fxp32_toFloat(result_add); + float result_multiply_float = fxp32_toFloat(result_multiply); + float result_divide_float = fxp32_toFloat(result_divide); + + // Print results + my_assert(fxp32_close(result_add_float, f1 + f2, COMP_PREC)); + my_assert(fxp32_close(result_multiply_float, f1 * f2, COMP_PREC)); + my_assert(fxp32_close(result_divide_float, f1 / f2, COMP_PREC)); } +void test_fxp_sqrt() { + fxp32 x = fxp32_fromFloat(2.0); + fxp32 result = fxp32_sqrt(x); + float result_float = fxp32_toFloat(result); + my_assert(fxp32_close(result_float, 1.4142135f, COMP_PREC)); +} + +int main() { + printf("\033[1;93m====== Test FXP =========\n"); + printf("\033[0m====== Test Basic ========\n"); + test_fxp_basic(); + test_fxp_sqrt(); + printf("\033[1;32m====== Test passed =======\n"); + printf("\033[0m====== Test Basic end ====\n\n"); + return EXIT_SUCCESS; +} From aff32755f2cd385ad1c8e3360febb11d89a5961d Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Wed, 17 Apr 2024 11:17:38 +0200 Subject: [PATCH 03/27] typo --- sw/applications/app/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sw/applications/app/main.c b/sw/applications/app/main.c index e38fa9923..43dcd8728 100644 --- a/sw/applications/app/main.c +++ b/sw/applications/app/main.c @@ -49,7 +49,7 @@ void test_fxp_sqrt() { } int main() { - printf("\033[1;93m====== Test FXP =========\n"); + printf("\033[1;93m====== Test FXP ==========\n"); printf("\033[0m====== Test Basic ========\n"); test_fxp_basic(); test_fxp_sqrt(); From 79756d0ed31c6d338850c35974183a85fce1147a Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Mon, 22 Apr 2024 15:10:16 +0200 Subject: [PATCH 04/27] add tests --- run.sh | 8 +- sw/applications/app/main.c | 59 --------- sw/applications/l_cnn/cnn.c | 36 ++++++ sw/applications/l_cnn/cnn.h | 27 ++++ sw/applications/l_cnn/conv2dlayer.c | 158 ++++++++++++++++++++++++ sw/applications/l_cnn/conv2dlayer.h | 42 +++++++ sw/applications/{app => l_cnn}/fxp32.c | 0 sw/applications/{app => l_cnn}/fxp32.h | 0 sw/applications/l_cnn/main.c | 131 ++++++++++++++++++++ sw/applications/l_cnn/test_cnnWeights.h | 92 ++++++++++++++ sw/applications/l_cnn/utils.h | 57 +++++++++ sw/applications/l_data/datahelper.c | 142 +++++++++++++++++++++ sw/applications/l_data/datahelper.h | 36 ++++++ sw/applications/l_data/fxp32.c | 48 +++++++ sw/applications/l_data/fxp32.h | 24 ++++ sw/applications/l_data/main.c | 71 +++++++++++ sw/applications/l_data/utils.h | 37 ++++++ sw/applications/l_fxp/fxp32.c | 57 +++++++++ sw/applications/l_fxp/fxp32.h | 26 ++++ sw/applications/l_fxp/main.c | 65 ++++++++++ sw/applications/l_fxp/utils.h | 37 ++++++ 21 files changed, 1092 insertions(+), 61 deletions(-) delete mode 100644 sw/applications/app/main.c create mode 100644 sw/applications/l_cnn/cnn.c create mode 100644 sw/applications/l_cnn/cnn.h create mode 100644 sw/applications/l_cnn/conv2dlayer.c create mode 100644 sw/applications/l_cnn/conv2dlayer.h rename sw/applications/{app => l_cnn}/fxp32.c (100%) rename sw/applications/{app => l_cnn}/fxp32.h (100%) create mode 100644 sw/applications/l_cnn/main.c create mode 100644 sw/applications/l_cnn/test_cnnWeights.h create mode 100644 sw/applications/l_cnn/utils.h create mode 100644 sw/applications/l_data/datahelper.c create mode 100644 sw/applications/l_data/datahelper.h create mode 100644 sw/applications/l_data/fxp32.c create mode 100644 sw/applications/l_data/fxp32.h create mode 100644 sw/applications/l_data/main.c create mode 100644 sw/applications/l_data/utils.h create mode 100644 sw/applications/l_fxp/fxp32.c create mode 100644 sw/applications/l_fxp/fxp32.h create mode 100644 sw/applications/l_fxp/main.c create mode 100644 sw/applications/l_fxp/utils.h diff --git a/run.sh b/run.sh index 05784b4a2..85138f214 100755 --- a/run.sh +++ b/run.sh @@ -1,4 +1,8 @@ +#!/bin/zsh + conda activate core-v-mini-mcu -export RISCV=/home/linus/tools/riscv -make app PROJECT=app +export RISCV=/home/linus/tools/riscv +export VERILATOR_VERSION=4.210 +export PATH=/home/$USER/tools/verilator/$VERILATOR_VERSION/bin:$PATH +make app PROJECT=l_cnn make run-helloworld \ No newline at end of file diff --git a/sw/applications/app/main.c b/sw/applications/app/main.c deleted file mode 100644 index 43dcd8728..000000000 --- a/sw/applications/app/main.c +++ /dev/null @@ -1,59 +0,0 @@ -#include "fxp32.h" -#include -#include -#include -#include -#include - -#define COMP_PREC 0.000001f - -#define my_assert(condition) \ - do { \ - if (!(condition)) { \ - fprintf(stderr, "Assertion failed: %s\n"); \ - fprintf(stderr, "File: %s\n", __FILE__); \ - fprintf(stderr, "Line: %d\n", __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) - -void test_fxp_basic() { - float f1 = 3.14159286537; - float f2 = 2.71828723519; - - // Convert floats to fxp32 - fxp32 x = fxp32_fromFloat(f1); - fxp32 y = fxp32_fromFloat(f2); - - // Perform addition and multiplication - fxp32 result_add = x + y; - fxp32 result_multiply = fxp32_mul(x, y); - fxp32 result_divide = fxp32_div(x, y); - - // Convert fxp32 back to floats - float result_add_float = fxp32_toFloat(result_add); - float result_multiply_float = fxp32_toFloat(result_multiply); - float result_divide_float = fxp32_toFloat(result_divide); - - // Print results - my_assert(fxp32_close(result_add_float, f1 + f2, COMP_PREC)); - my_assert(fxp32_close(result_multiply_float, f1 * f2, COMP_PREC)); - my_assert(fxp32_close(result_divide_float, f1 / f2, COMP_PREC)); -} - -void test_fxp_sqrt() { - fxp32 x = fxp32_fromFloat(2.0); - fxp32 result = fxp32_sqrt(x); - float result_float = fxp32_toFloat(result); - my_assert(fxp32_close(result_float, 1.4142135f, COMP_PREC)); -} - -int main() { - printf("\033[1;93m====== Test FXP ==========\n"); - printf("\033[0m====== Test Basic ========\n"); - test_fxp_basic(); - test_fxp_sqrt(); - printf("\033[1;32m====== Test passed =======\n"); - printf("\033[0m====== Test Basic end ====\n\n"); - return EXIT_SUCCESS; -} diff --git a/sw/applications/l_cnn/cnn.c b/sw/applications/l_cnn/cnn.c new file mode 100644 index 000000000..70654b819 --- /dev/null +++ b/sw/applications/l_cnn/cnn.c @@ -0,0 +1,36 @@ +#include "cnn.h" + +#include + +CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, + Conv2DPadding layer2Pad) { + CnnHandle self = (CnnHandle)malloc(sizeof(Cnn)); + // Create layers + self->layer1 = Conv2DLayer_create(layer1Dim, layer1Pad); + self->layer2 = Conv2DLayer_create(layer2Dim, layer2Pad); + + self->inputDim = inputDim; + // TODO: calculate + self->outputDim = (Dim2D){1u, 256u}; + return self; +} + +void Cnn_destroy(CnnHandle self) { + Conv2DLayer_destroy(self->layer1); + Conv2DLayer_destroy(self->layer2); + free(self); +} + +void Cnn_forward(CnnHandle self, fxp32* input, fxp32* output) { + fxp32* layer1Output = (fxp32*)calloc(self->inputDim.x * self->inputDim.y, sizeof(fxp32)); + Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1Output, output); + free(layer1Output); +} + +void Cnn_predict(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output) { + Cnn_forward(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} \ No newline at end of file diff --git a/sw/applications/l_cnn/cnn.h b/sw/applications/l_cnn/cnn.h new file mode 100644 index 000000000..b127f42e7 --- /dev/null +++ b/sw/applications/l_cnn/cnn.h @@ -0,0 +1,27 @@ +#ifndef CNN_H +#define CNN_H + +#include "conv2dlayer.h" +#include "fxp32.h" + +/** + * @brief a two layer cnn model + */ +typedef struct __Cnn { + Conv2DLayerHandle layer1; + Conv2DLayerHandle layer2; + Dim2D inputDim; + Dim2D outputDim; +} Cnn; + +typedef struct __Cnn* CnnHandle; + +CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, + Conv2DPadding layer2Pad); +void Cnn_destroy(CnnHandle self); + +void Cnn_forward(CnnHandle self, fxp32* input, fxp32* output); + +void Cnn_predict(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); + +#endif // CNN_H \ No newline at end of file diff --git a/sw/applications/l_cnn/conv2dlayer.c b/sw/applications/l_cnn/conv2dlayer.c new file mode 100644 index 000000000..48b37b31a --- /dev/null +++ b/sw/applications/l_cnn/conv2dlayer.c @@ -0,0 +1,158 @@ +#include "conv2dlayer.h" + +#include +#include +#include + +#define my_assert(condition) \ + do { \ + if (!(condition)) { \ + printf("Assertion failed: \n"); \ + printf("File: %s\n", __FILE__); \ + printf("Line: %d\n", __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding) { + Conv2DLayerHandle self = (Conv2DLayerHandle)malloc(sizeof(Conv2DLayer)); + self->dim = dim; + self->padding = padding; + self->weights = (fxp32*)calloc(dim.x * dim.y, sizeof(fxp32)); + return self; +} + +void Conv2DLayer_destroy(Conv2DLayerHandle self) { + free(self->weights); + free(self); +} + +bool Conv2DLayer_setWeights(Conv2DLayerHandle self, fxp32* weights) { + memcpy(self->weights, weights, self->dim.x * self->dim.y * sizeof(fxp32)); + // TODO: make sure we get the right size of weights + return true; +} + +// Could be optimized +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid) { + + // float max = 0; + // float min = 0; + + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + fxp32 sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = 0; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + // printf("input access: %d, %d\n", m, n); + in = input[m * iny + n]; + // printf("kernel access: %d, %d\n", m - i + cx, n - j + + // cy); + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += fxp32_mul(w, in); + // if (sum > max) { + // max = sum; + // } + // if (sum < min) { + // min = sum; + // } + // printf("factor: %.2f\n", w * in); + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + // printf("access output: %d\n", (i-cx)*(iny - kery + 1) + (j-cy)); + // printf("sum = %.2f\n", sum); + } + } + // printf("max: %.6f, min: %.6f\n", max, min); +} + +// Could be optimized +void convolve2D(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid) { + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + float sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = .0f; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + // printf("input access: %d, %d\n", m, n); + in = input[m * iny + n]; + // printf("kernel access: %d, %d\n", m - i + cx, n - j + + // cy); + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += w * in; + // printf("factor: %.2f\n", w * in); + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + // printf("access output: %d\n", (i-cx)*(iny - kery + 1) + (j-cy)); + // printf("sum = %.2f\n", sum); + } + } +} + +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output) { + convolve2DFxp(input, output, self->weights, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); +} \ No newline at end of file diff --git a/sw/applications/l_cnn/conv2dlayer.h b/sw/applications/l_cnn/conv2dlayer.h new file mode 100644 index 000000000..3656037b4 --- /dev/null +++ b/sw/applications/l_cnn/conv2dlayer.h @@ -0,0 +1,42 @@ +#ifndef CONV2DLAYER_H +#define CONV2DLAYER_H + +#include +#include + +#include "fxp32.h" + +// TODO: perhaps we could optimize but we need 256 as value... +/** + * @brief a 2D dimension + * @param x the rows of a matrix + * @param y the columns of a matrix + */ +typedef struct __Dim2D { + uint16_t x; + uint16_t y; +} Dim2D; + +typedef enum __Conv2DPadding { + VALID, + SAME +} Conv2DPadding; + +typedef struct __Conv2DLayer { + Dim2D dim; + Conv2DPadding padding; + fxp32* weights; +} Conv2DLayer; + +typedef struct __Conv2DLayer* Conv2DLayerHandle; + +Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding); +void Conv2DLayer_destroy(Conv2DLayerHandle self); + +bool Conv2DLayer_setWeights(Conv2DLayerHandle self, fxp32* weights); +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output); + +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid); +void convolve2D(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid); + +#endif // CONV2DLAYER_H \ No newline at end of file diff --git a/sw/applications/app/fxp32.c b/sw/applications/l_cnn/fxp32.c similarity index 100% rename from sw/applications/app/fxp32.c rename to sw/applications/l_cnn/fxp32.c diff --git a/sw/applications/app/fxp32.h b/sw/applications/l_cnn/fxp32.h similarity index 100% rename from sw/applications/app/fxp32.h rename to sw/applications/l_cnn/fxp32.h diff --git a/sw/applications/l_cnn/main.c b/sw/applications/l_cnn/main.c new file mode 100644 index 000000000..ea80ae9a2 --- /dev/null +++ b/sw/applications/l_cnn/main.c @@ -0,0 +1,131 @@ +#include "cnn.h" +#include "fxp32.h" +#include "utils.h" +#include +#include +#include + +#include "test_cnnWeights.h" +// #include "testdata_s0.h" +// #include "testdata_s1.h" +// #include "testdata_s2.h" + +#define COMP_PREC 0.01f +#define COMP_PREC_I32 512 + +void compareVectors(float* a, float* b, int size, float prec) { + for (int i = 0; i < size; ++i) { + assert_closef(a[i], b[i], prec, i); + } +} + +void compareVectorsFxp(fxp32* a, fxp32* b, int size, float prec) { + for (int i = 0; i < size; ++i) { + assert_closei32(a[i], b[i], prec, i); + } +} + +void test_same_layer1() { + // fxp32* result_fxp = (fxp32*)calloc(xin1*yin1, sizeof(fxp32)); + float* result = (float*)calloc(xin1*yin1, sizeof(float)); + + // convolve2DFxp(input1_fxp, result_fxp, kernel1_fxp, xin1, yin1, xker1, yker1, false); + convolve2D(input1, result, kernel1, xin1, yin1, xker1, yker1, false); + + // compareVectorsFxp(result1_fxp, result_fxp, xin1*yin1, COMP_PREC_I32); + compareVectors(result1, result, xin1*yin1, COMP_PREC); + + // free(result_fxp); + free(result); +} + +void test_same_layer2() { + // fxp32* result_fxp = (fxp32*)calloc(xin2*yin2, sizeof(fxp32)); + float* result = (float*)calloc(xin2*yin2, sizeof(float)); + + // convolve2DFxp(input2_fxp, result_fxp, kernel2_fxp, xin2, yin2, xker2, yker2, false); + convolve2D(input2, result, kernel2, xin2, yin2, xker2, yker2, false); + + // compareVectorsFxp(result2_fxp, result_fxp, xin2*yin2, COMP_PREC_I32); + compareVectors(result2, result, xin2*yin2, COMP_PREC); + + // free(result_fxp); + free(result); +} + +void test_same_layer3() { + // fxp32* result_fxp = (fxp32*)calloc(xin3*yin3, sizeof(fxp32)); + float* result = (float*)calloc(xin3*yin3, sizeof(float)); + + // convolve2DFxp(input3_fxp, result_fxp, kernel3_fxp, xin3, yin3, xker3, yker3, false); + convolve2D(input3, result, kernel3, xin3, yin3, xker3, yker3, false); + + // compareVectorsFxp(result3_fxp, result_fxp, xin3*yin3, COMP_PREC_I32); + compareVectors(result3, result, xin3*yin3, COMP_PREC); + + // free(result_fxp); + free(result); +} + +void test_valid_layer1() { + // fxp32* result_fxp = (fxp32*)calloc(yout4*yout4, sizeof(fxp32)); + float* result = (float*)calloc(xout4*yout4, sizeof(float)); + + // convolve2DFxp(input4_fxp, result_fxp, kernel4_fxp, xin4, yin4, xker4, yker4, true); + convolve2D(input4, result, kernel4, xin4, yin4, xker4, yker4, true); + + // compareVectorsFxp(result4_fxp, result_fxp, xout4*yout4, COMP_PREC_I32); + compareVectors(result4, result, xout4*yout4, COMP_PREC); + + // free(result_fxp); + free(result); +} + +void test_valid_layer2() { + // fxp32* result_fxp = (fxp32*)calloc(xout5*yout5, sizeof(fxp32)); + float* result = (float*)calloc(xout5*yout5, sizeof(float)); + + // convolve2DFxp(input5_fxp, result_fxp, kernel5_fxp, xin5, yin5, xker5, yker5, true); + convolve2D(input5, result, kernel5, xin5, yin5, xker5, yker5, true); + + // compareVectorsFxp(result5_fxp, result_fxp, xout5*yout5, COMP_PREC_I32); + compareVectors(result5, result, xout5*yout5, COMP_PREC); + + // free(result_fxp); + free(result); +} + +void test_valid_layer3() { + // fxp32* result_fxp = (fxp32*)calloc(xout6*yout6, sizeof(fxp32)); + float* result = (float*)calloc(xout6*yout6, sizeof(float)); + + // convolve2DFxp(input6_fxp, result_fxp, kernel6_fxp, xin6, yin6, xker6, yker6, true); + convolve2D(input6, result, kernel6, xin6, yin6, xker6, yker6, true); + + // compareVectorsFxp(result6_fxp, result_fxp, xout6*yout6, COMP_PREC_I32); + compareVectors(result6, result, xout6*yout6, COMP_PREC); + + // free(result_fxp); + free(result); +} + +int main() { + PRINTF("\033[1;93m====== Test CNN =========\n"); + PRINTF("\033[0m====== Test Same ========\n"); + test_same_layer1(); + PRINTF("\033[1;32m====== Test 1 passed ====\n"); + test_same_layer2(); + PRINTF("\033[1;32m====== Test 2 passed ====\n"); + test_same_layer3(); + PRINTF("\033[1;32m====== Test 3 passed ====\n"); + PRINTF("\033[0m====== Test Same end ====\n\n"); + PRINTF("\033[0m====== Test Valid =======\n"); + test_valid_layer1(); + PRINTF("\033[1;32m====== Test 1 passed ====\n"); + test_valid_layer2(); + PRINTF("\033[1;32m====== Test 2 passed ====\n"); + test_valid_layer3(); + PRINTF("\033[1;32m====== Test 3 passed ====\n"); + PRINTF("\033[0m====== Test Valid end ===\n\n"); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/sw/applications/l_cnn/test_cnnWeights.h b/sw/applications/l_cnn/test_cnnWeights.h new file mode 100644 index 000000000..3768c1d04 --- /dev/null +++ b/sw/applications/l_cnn/test_cnnWeights.h @@ -0,0 +1,92 @@ +#ifndef TEST_CNNWEIGHTS_H +#define TEST_CNNWEIGHTS_H + +#include + +#pragma GCC diagnostic ignored "-Wunused-variable" + +//// Case 1 mode: same +static uint16_t xin1 = 3u; +static uint16_t yin1 = 5u; +static uint16_t xker1 = 1u; +static uint16_t yker1 = 3u; +static float input1[] = { 0.4488318299689684, -0.7634520066109527, 1.4589411306665614, -0.6241278873730751, 3.917730007820797, 4.636627605010293, -1.1655848117422227, 2.917250380826646, 0.28894919752904435, 0.6804456109393229, 4.25596638292661, -4.289639418021131, -4.1287070029845925, -4.797816025596743, 3.32619845547938,}; +static float kernel1[] = { 0.48813503927324753, 2.151893663724195, 1.027633760716439,}; +static float result1[] = { 0.18128931430783346, 0.07548016813612674, 2.12544361152851, 3.3950950613321194, 8.12587968919733, 8.779735260745717, 2.752950806946225, 6.005583772654533, 2.7450489586482236, 1.605292826573081, 4.750208805184423, -11.396160270199067, -15.908859470489649, -8.92164263255216, 4.815643266554799,}; +static int32_t input1_fxp[] = { 3765074, -6404299, 12238485, -5235564, 32864302, 38894852, -9777634, 24471670, 2423881, 5707991, 35701632, -35984104, -34634104, -40246996, 27902174,}; +static int32_t kernel1_fxp[] = { 4094773, 18051392, 8620417,}; +static int32_t result1_fxp[] = { 1520765, 633173, 17829514, 28480122, 68164816, 73649760, 23093426, 50378488, 23027140, 13466172, 39847640, -95597920, -133453184, -74840160, 40396544,}; + +//// Case 2 mode: same +static uint16_t xin2 = 5u; +static uint16_t yin2 = 5u; +static uint16_t xker2 = 3u; +static uint16_t yker2 = 3u; +static float input2[] = { 0.38816734003356945, -0.8080548559670522, 1.8521950039675952, -2.9554775026848255, 3.7811743639094537, -4.726124068020738, 1.704675101784022, -0.8269519763287301, 0.5868982844575168, -3.596130614047662, -3.0189851091512123, 3.007445686755366, 4.682615757193975, -1.8657582184075716, 1.9232261566931408, 3.7638915229603835, 3.9460666350384734, -4.149557886302221, -4.609452167671177, -3.3016958043543108, 3.7814250342941307, -4.016531661669499, -0.7889237499494781, 4.578895301505019, 0.33165284973017073,}; +static float kernel2[] = { -0.8297799529742598, 2.2032449344215808, -4.998856251826551, -1.9766742736816023, -3.5324410918288693, -4.0766140523120225, -3.137397886223291, -1.5443927295695223, -1.0323252576933006,}; +static float result2[] = { 7.462162004395981, 7.585168841573074, 2.4258572583235107, -3.23507218200551, -3.802254814609295, 16.197866692278705, -4.676687562908773, 1.9430859639149014, -26.523794841323774, 25.209671456054078, -30.416593954047023, -25.553426393591323, -22.263511994498945, 32.99627549623894, 8.044960659650508, -52.76130634645628, -23.585886440515488, 51.88998308860701, 15.39566943678927, 11.681919811790573, -8.41685670030438, 36.24364652628926, 2.6849419534286167, -6.1750827833904935, -13.672142251992868,}; +static int32_t input2_fxp[] = { 3256183, -6778455, 15537338, -24792342, 31718790, -39645604, 14299851, -6936976, 4923259, -30166530, -25325082, 25228282, 39280628, -15651114, 16133190, 31573810, 33102006, -34809016, -38666888, -27696632, 31720892, -33693108, -6617972, 38410556, 2782105,}; +static int32_t kernel2_fxp[] = { -6960699, 18482158, -41933444, -16581546, -29632264, -34197116, -26318402, -12955305, -8659772,}; +static int32_t result2_fxp[] = { 62597152, 63629008, 20349566, -27137752, -31895626, 135877552, -39230900, 16299786, -222497712, 211474048, -255152880, -214357680, -186759872, 276792832, 67486024, -442593920, -197852752, 435284736, 129148232, 97995048, -70605712, 304033728, 22522926, -51800348, -114690240,}; + +//// Case 3 mode: same +static uint16_t xin3 = 3u; +static uint16_t yin3 = 61u; +static uint16_t xker3 = 3u; +static uint16_t yker3 = 31u; +static float input3[] = { 1.5751238765904398, -2.8536425360004634, -0.8324655980466291, 1.43841934148195, 1.614813267154414, -3.295228665162293, 3.8165223574854856, 2.780081598084683, -3.6604579182830896, 3.6891662643207876, 2.4877787827685447, 2.9858565409908087, 0.43345230583902694, -2.791620849385381, 4.184586465896006, 0.9208457766637634, -1.5376209054485357, -2.3622147071998025, 4.13915477165132, -0.8026453869317933, 0.4019151566713086, 1.0844215780504962, 3.2624982843415786, 1.2356318456213078, -3.2328783841557396, 0.9125735265354589, -0.1073383300514088, 0.4790778009037666, 1.9952061979054818, -2.541888360958693, -3.133728544135198, -3.894168522038389, -2.2594074746394632, -4.897499606004091, 1.2935972282958215, -2.0482769478025755, -3.1271345529810226, -4.047119477391963, -2.1624419253474536, -2.8507561586930477, -2.1439626608909768, -0.2859014253032104, 0.49496821088408005, 3.4511311557470155, 4.885097007841644, -4.5113191377064865, -2.6788174856383606, 1.433114272482352, -3.385334371083233, 3.7014589336900148, -2.825975736949741, 2.4175503888344894, 1.5302051323110106, 2.988855115046296, -4.687524377347384, -2.7042597125565484, 2.0462749912045917, -4.124374909988772, -4.694105168056807, -1.428650663004393, 0.8978199147176378, -4.477772726158397, -4.343363337936225, -4.5649881394028435, -1.048492415324962, 1.6842704601605778, -3.019728881838304, 3.762664517041591, -0.6761090455515264, 1.1964146370996467, -2.0957325706501164, 1.1525457346427626, 4.536599623823227, -0.5198528347591038, -2.9295016174668573, -0.7463270809461351, -0.5542573523624226, 0.07617486671961338, 0.25733249810957926, -4.575708081174539, -3.3558091382988, -0.49772820382626737, 2.079711706189208, 2.7758790830553357, 2.7710299946964465, 0.02778752509221416, 4.567475933072476, -1.6812628746012415, -0.1854183765396673, 2.4837399934103637, 3.5783951843951396, -0.8553994486708847, 3.486845142673234, -0.5542905824929081, 2.1574708428635425, -4.915516364211802, -4.74798576329251, 4.404504935420885, -3.978620517688065, 1.6196779043970766, -2.167412561788792, -2.991576562245546, -1.1166581788524086, 4.259015965084993, 0.7027420975071905, 4.167146119578826, 2.0226424081972496, 0.012164516870869946, 0.06086833512638812, -2.8117921367062726, -4.953364772649501, -0.8216375599728423, 0.6337611033903512, 3.765618684233271, 1.7758484022790944, 3.4233304610347126, 4.2616128692492, 4.411158736336223, 3.1601915192439396, -3.686053945205562, -1.53369985009736, -2.914975054744775, 3.673815371565235, 2.923302193062579, -1.5099835931614058, 0.6914053205563544, 3.051315402557016, 3.3316124473397615, -2.94206323049928, 2.306802005261156, 3.0341416415944895, -4.568709636466373, -3.928125957684201, -0.6491635014117234, -2.791187781392559, 3.8062206616568073, 2.1968753772441127, 2.1300729354698102, 2.6985965699270196, -1.6925297383059523, -2.7138974816690107, 1.263439592010264, -0.8552075047461081, 4.36781072234186, 1.424524189027922, -1.1309965698763182, 3.5511966080419057, -1.1920741628379194, -3.216901722222798, 2.8165942566608138, -0.2779141258419857, -2.405195955957229, 1.92399245881231, 4.8048507899877855, -2.5374641103221016, 2.9024380259404525, 2.6233524422405887, -3.7996020782258597, 3.3890259455900065, -0.3826109524346837, -3.7421861744691243, 0.36343473096068557, -2.03487249205371, -3.242758067432694, -4.207618292992004, -3.0110090817050086, -0.7041138094358619, 1.4181176362883168, -4.126208212102036, 2.1573058777096517, -3.9825518903661794, -3.244009290911781, 2.133215621574106, 3.852636744104892, 4.8313836104307235, 1.529069507012145, -0.22263646993624597, -4.122124451150055, 0.5895079469727671, -3.1884977059135355, 0.1440206047122059, 1.0893014893439155, 3.3321249012260417,}; +static float kernel3[] = { -0.6400509785799624, -4.740737681721087, 0.49662477878709144, -0.6467760738172315, -0.7963219791251097, -1.6966517899612588, -2.953513659621575, 1.192709663506637, -2.0034532632547686, -2.3317272489713337, 1.2113383276929488, 0.2914209427703902, -3.6542005465506646, 0.13578121265746468, -3.155601343530847, 2.8533514781667346, 3.539752926394888, -0.05763162618072215, 3.46561485357468, -4.20354522990939, 0.05246090121703961, -4.347134956131219, -0.7187767240261058, -4.034690843393874, -3.7284002829872254, 0.9674530897859581, -2.7398799939576413, -3.9305431569001703, -2.796937929294403, -1.5017371499670071, -0.32212515417699805, -2.9825677373503465, 1.4040672521491482, -0.1693016444824833, 0.05236720018549157, -1.131073488814407, 2.936374544415771, 0.8000417887780662, -3.3770140149768615, 2.0075234660715626, 4.645510800892552, 8.361170216719671e-05, 3.89520063946145, -1.5838634732889032, 0.6714412762770925, -0.7245403670388999, -0.63252736973201, 2.7655918499710026, 0.35604173497656344, 4.537422269448667, 0.4420816014810214, -4.179050777249752, -1.336575983249796, 3.5085050400450175, -0.9372495695204917, -4.727976341051504, -2.528227610026467, -4.328556292630453, 4.938520114212729, 4.7058031337717345, 3.0025835113258683, 1.0181712140546741, 2.6495986045168154, -3.3077455341582205, -2.069767681805602, 0.240668753003642, -1.433757188772411, -4.543210347545999, 4.831534453572127, -0.5864508064515324, 0.040004393791526205, -1.7645868246530116, -2.4025524725286296, -1.1311011467923757, 3.320168996345009, 2.3674705628711292, -1.2078943305836254, -4.869826633054494, 2.974049390325936, -2.306112024036988, 0.826848885290346, -4.744490581503072, 1.6220201926832578, -1.1247657412671854, -0.029262012654873715, -0.8509416262531282, -1.491280986983595, 0.509779053244193, 4.729106898748249, -3.872237847619285, -1.8674147157028642, -4.5820229018469, 2.383997586209004,}; +static float result3[] = { 20.938379585920273, 8.02302877465514, -16.857694443813095, 4.581030951067322, -47.684895622509956, 25.342934266840597, 68.36750478864711, 2.5676380052143335, 29.055295803968534, -28.04374791458958, 41.83114391463987, -26.50924062032297, -30.51934104241301, 84.46348958070554, 40.264328143733174, 34.31532066976898, -2.511563242949011, -118.27980904392957, -13.571811349883715, -32.09344069285111, 71.49481770295496, -22.06395380628139, -21.131452617454165, -38.37649565949362, -0.308430518672953, 6.564041191235514, 12.525132993990898, 31.79873177667102, 50.146841838223985, -48.03904602832188, -9.240245071834234, 81.75909358754869, 22.224903966714834, -102.75317844603369, -36.0440186033606, 0.4617001857888705, -51.90829635455982, 19.583409528784266, -118.55050489358963, -26.758792800151088, 36.963275595009, -17.143593036238077, -157.05536586299988, -143.16188227276268, -29.809500883012944, 38.09167352342915, 41.28100796765678, -9.532238623212233, 83.53722296123598, 29.477448838273464, -1.3946555128049916, -19.694261623611865, -106.38617304550061, 78.21896118647508, 3.5221474774463024, -37.63561922915278, -20.289313489154242, -22.60404782786653, -0.9210687069457979, 26.983132426984596, 2.419086747570052, -86.98459645783491, -145.07037665385383, -46.325015600230536, 18.816364649088612, 34.3594532282602, -89.6152853592831, -83.96484146396111, -66.0178965718038, 71.80911903889299, 29.92814314806981, 34.92300062419566, -23.130910037195996, -75.96390223746815, -21.00109946515495, -71.85887873345892, -22.328707246431552, 57.26708051045838, -12.24282331844404, -7.1714798944457945, -30.534493818296973, 27.523283778596163, -70.57989303251122, 58.15567049761824, 33.94735267374952, 38.62037691759168, 39.95618959955201, 95.21166639406364, -7.204825910354527, 49.21666388406004, 101.77680517404558, 99.64834752754254, 9.278473282359952, -16.02033177437054, 36.914168164198315, 6.687573994493457, -9.177060033952387, -36.05265657393037, -60.868336303905366, -22.917308368712966, 101.7498445942521, 100.61491816352485, 33.81388073239066, 210.15189920300776, 103.46315808071684, -60.18392583924689, -59.36631256697667, 11.274377955219325, -113.55011114921368, 92.37440604721446, 182.30644467551863, 34.14469184281109, 95.75779516619747, 29.04656936675967, 62.81447648353387, 60.8592166174747, 20.61627755180531, -6.857905961501912, -66.21411470934667, -56.966329923220634, 44.929314745232105, 60.345150906171696, 0.7796203920941505, 132.41714347101967, -30.52398067257131, 27.23418453373192, 32.7731215542053, 6.408595431273242, 49.27582417569214, 63.438697156687184, 23.039260818042102, 39.783514258951904, 50.77600710118369, 31.393895099039494, 15.080702964409335, -60.97851929404653, 34.34640168372713, -22.530726723004733, -124.39234555627823, -30.170425348290156, 6.38116061686459, -12.741724457526818, 15.305365422729299, 60.98560408737588, -26.56097389044882, -8.254757881856829, 6.138376489617752, -56.23954121746415, 43.581883038015484, 40.86606982571536, 37.993522967048726, -4.626738134589028, 2.5182535809600917, -0.4150455049801065, -54.0913458236004, 94.63587924530772, -108.43645668517142, 6.520016156047285, -65.42753241808275, 60.454244741479215, 53.104516440459065, -100.63167603344571, 44.0241053142488, -11.781082893294116, -124.76747909867008, -45.95018405728458, 25.025227680595158, -120.48169114286, -13.862453835535531, -65.28234182954981, -91.37730985309311, 39.48321142614181, -9.869784175782478, 25.04695690346387, 0.9868548102024803, 2.766199922778613, 23.602380623388747, -12.446914947467487, 23.768689599761093, 32.84198722546352, -22.20616112949299, -28.841533549735036, -19.479887199191595,}; +static int32_t input3_fxp[] = { 13213097, -23938088, -6983227, 12066336, 13546035, -27642382, 32015310, 23321014, -30706146, 30946970, 20869000, 25047180, 3636061, -23417812, 35102856, 7724614, -12898499, -19815694, 34721748, -6733077, 3371508, 9096788, 27367820, 10365231, -27119350, 7655221, -900419, 4018795, 16737003, -21322906, -26287620, -32666654, -18953284, -41083204, 10851480, -17182192, -26232306, -33949700, -18139878, -23913876, -17984862, -2398315, 4152094, 28950186, 40979164, -37843688, -22471550, 12021834, -28398242, 31050088, -23706002, 20279882, 12836291, 25072334, -39321804, -22684974, 17165398, -34597764, -39377008, -11984390, 7531459, -37562280, -36434772, -38293896, -8795392, 14128685, -25331322, 31563518, -5671613, 10036253, -17580280, 9668254, 38055756, -4360841, -24574440, -6260645, -4649447, 639001, 2158661, -38383820, -28150568, -4175246, 17445886, 23285762, 23245084, 233098, 38314764, -14103455, -1555402, 20835122, 30017754, -7175610, 29249778, -4649726, 18098178, -41234340, -39828992, 36947664, -33375088, 13586843, -18181574, -25095164, -9367208, 35727216, 5895028, 34956556, 16967154, 102043, 510600, -23587022, -41551836, -6892395, 5316373, 31588300, 14896896, 28716978, 35749000, 37003480, 26509608, -30920862, -12865607, -24452584, 30818198, 24522436, -12666660, 5799928, 25596288, 27947590, -24679816, 19350858, 25452224, -38325116, -32951508, -5445578, -23414180, 31928894, 18428726, 17868346, 22637468, -14197969, -22765822, 10598499, -7174000, 36639852, 11949775, -9487487, 29789596, -9999843, -26985328, 23627306, -2331312, -20176246, 16139619, 40306008, -21285792, 24347414, 22006276, -31873372, 28429210, -3209573, -31391732, 3048711, -17069748, -27202226, -35296060, -25258174, -5906534, 11896033, -34613144, 18096794, -33408066, -27212722, 17894710, 32318260, 40528584, 12826765, -1867610, -34578888, 4945151, -26747058, 1208132, 9137723, 27951890}; +static int32_t kernel3_fxp[] = { -5369137, -39768192, 4165990, -5425551, -6680033, -14232547, -24775868, 10005174, -16806184, -19559946, 10161442, 2444616, -30653656, 1139015, -26471102, 23935648, 29693600, -483449, 29071684, -35261892, 440073, -36466412, -6029536, -33845440, -31276088, 8115584, -22983780, -32971786, -23462416, -12597484, -2702181, -25019592, 11778170, -1420205, 439287, -9488132, 24632094, 6711237, -28328446, 16840328, 38969368, 701, 32675312, -13286410, 5632457, -6077885, -5306024, 23199466, 2986694, 38062656, 3708449, -35056420, -11212012, 29431474, -7862219, -39661140, -21208310, -36310560, 41427308, 39475136, 25187496, 8541039, 22226444, -27747380, -17362470, 2018875, -12027227, -38111212, 40529848, -4919506, 335581, -14802427, -20154070, -9488364, 27851596, 19859782, -10132552, -40851068, 24948134, -19345070, 6936111, -39799672, 13606492, -9435219, -245467, -7138215, -12509772, 4276336, 39670624, -32482686, -15665010, -38436792, 19998422}; +static int32_t result3_fxp[] = { 175643856, 67302040, -141412592, 38428472, -400009888, 212591936, 573508224, 21538908, 243733488, -235248016, 350905056, -222375632, -256014784, 708531136, 337761664, 287857760, -21068520, -992202944, -113848608, -269219296, 599742016, -185085856, -177263472, -321925376, -2587302, 55063168, 105068432, 266747088, 420662208, -402980736, -77512792, 685844992, 186436000, -861956160, -302359136, 3873021, -435438336, 164277552, -994473728, -224469024, 310070432, -143810880, -1317475840, -1200928896, -250060224, 319536128, 346290208, -79962216, 700761024, 247274768, -11699218, -165207440, -892431872, 656148224, 29545914, -315710464, -170199104, -189616496, -7726484, 226350928, 20292770, -729679680, -1216938496, -388602400, 157843104, 288228000, -751747520, -704348160, -553798272, 602378560, 251055456, 292955360, -194036144, -637231424, -176169984, -602795968, -187306768, 480391104, -102700248, -60158732, -256141904, 230882032, -592067072, 487845120, 284771040, 323971200, 335176800, 798693376, -60438460, 412859296, 853765696, 835910912, 77833472, -134388288, 309658496, 56099436, -76982760, -302431616, -510600608, -192244320, 853539584, 844019136, 283651392, 1762881920, 867911872, -504859360, -498000736, 94576336, -952527360, 774892672, 1529297280, 286426432, 803274624, 243660288, 526926016, 510524096, 172941872, -57528284, -555444224, -477868224, 376894400, 506211808, 6539930, 1110795520, -256053712, 228456896, 274920864, 53759196, 413355584, 532162368, 193267328, 333728320, 425940032, 263351072, 126506104, -511524896, 288118496, -189001440, -1043478656, -253087872, 53529056, -106885328, 128390712, 511584320, -222809600, -69245928, 51492436, -471771456, 365591328, 342809440, 318712768, -38811892, 21124642, -3481654, -453751104, 793863296, -909630912, 54693860, -548845952, 507126976, 445472960, -844159680, 369300960, -98826888, -1046625472, -385458080, 209926832, -1010673664, -116286688, -547627968, -766528448, 331209184, -82793752, 210109104, 8278338, 23204566, 197991120, -104412288, 199386224, 275498560, -186278784, -241940320, -163409136,}; + +//// Case 4 mode: valid +static uint16_t xin4 = 5u; +static uint16_t yin4 = 5u; +static uint16_t xker4 = 3u; +static uint16_t yker4 = 3u; +static uint16_t xout4 = 3u; +static uint16_t yout4 = 3u; +static float input4[] = { -0.5919015634936358, -4.701237891214331, -0.43166775605288876, 1.4914404761476074, -2.215127173520247, 1.7625490198013125, 0.9086281741635087, -4.760181176228347, 0.5885408799088196, -2.407475530925346, -0.8489880298993038, -2.164749182286813, 1.9313791831299634, -0.5954628232926051, -3.4313226152503673, 0.4464901803184471, 2.80314764511367, -1.9363646762382025, -2.780421160678186, -1.1202874244435126, 4.363836498604304, 4.759954224729338, 1.7238367591281367, 4.02834108538398, 3.4575087129317925,}; +static float kernel4[] = { 0.5079790257457546, 2.081478226181048, -2.0909526108705565, 0.10827605197663015, 3.9294695434765465, 3.9629308893343804, -3.7441468953616375, -2.9275712186181324, -4.485327966991701,}; +static float result4[] = { -23.43327726779893, -17.55769120994077, 9.675929089779544, 10.602713570309055, 1.9624163946072903, 8.526826051029296, -43.59194373770315, -55.09548172766729, -42.41408763118508,}; +static int32_t input4_fxp[] = { -4965230, -39436840, -3621091, 12511110, -18581834, 14785333, 7622125, -39931292, 4937038, -20195368, -7121828, -18159232, 16201583, -4995104, -28784020, 3745431, 23514506, -16243404, -23323864, -9397652, 36606512, 39929392, 14460591, 33792176, 29003686,}; +static int32_t kernel4_fxp[] = { 4261237, 17460704, -17540182, 908285, 32962780, 33243474, -31408180, -24558248, -37625660,}; +static int32_t result4_fxp[] = { -196572576, -147284592, 81167576, 88942008, 16461942, 71528200, -365675712, -462174400, -355795168,}; + +//// Case 5 mode: valid +static uint16_t xin5 = 3u; +static uint16_t yin5 = 5u; +static uint16_t xker5 = 1u; +static uint16_t yker5 = 3u; +static uint16_t xout5 = 3u; +static uint16_t yout5 = 3u; +static float input5[] = { 2.148159936743647, 1.9772882459727086, -2.839105044196236, 4.762744547762418, -4.937697447954101, -2.4701763761655604, -0.6520846759555425, 2.7938292179375246, -3.0231492539974694, 3.6299323559922225, 4.834006771753128, -3.361577585953013, 0.9733394393285923, -4.91013902332445, -1.1342871735637061,}; +static float kernel5[] = { 4.670298390136766, 0.4723224917572235, 4.726843599648843,}; +static float result5[] = { -2.453539901299475, 30.406301525520632, -34.34963992380085, 1.3615387453760093, -16.015795336628752, 28.77823723432869, 25.58932864147245, -38.44929949447413, -3.1349815483414165,}; +static int32_t input5_fxp[] = { 18020072, 16586696, -23816140, 39952796, -41420408, -20721342, -5470082, 23436338, -25360014, 30450080, 40550588, -28198956, 8164963, -41189232, -9515090,}; +static int32_t kernel5_fxp[] = { 39177304, 3962128, 39651640,}; +static int32_t result5_fxp[] = { -20581784, 255066544, -288145664, 11421415, -134350224, 241409344, 214658848, -322536096, -26298132,}; + +//// Case 6 mode: valid +static uint16_t xin6 = 3u; +static uint16_t yin6 = 31u; +static uint16_t xker6 = 3u; +static uint16_t yker6 = 1u; +static uint16_t xout6 = 1u; +static uint16_t yout6 = 31u; +static float input6[] = { 4.186109079379216, -0.11588811205170835, 1.1174386290264575, 2.6590785648031554, 0.18417987872943264, -2.0319949842377802, -3.122787713387484, -4.192587312351251, 2.3844029619897, -0.5869077710404689, -3.416901322873488, 3.7993703120127886, -2.259135380077754, -0.8576498091894873, -2.0392006726635206, 1.287879088794833, 0.7983781018954508, 0.999291966249876, -2.341808824644928, -2.153141193586362, -2.464117942262125, -1.7243605231126589, -3.5583569934657957, -3.343871387987324, 4.6393052906794185, 4.602267152856939, -3.1158534440406482, -4.75693438370513, -2.9544445362004934, 1.9984361412655751, 2.7951458555552975, -4.770669075609185, 0.77662858129756, -4.983578272839547, 0.15472611905393485, 1.3979517613085708, 4.856244028041889, -2.4090240358889425, 3.024968852628703, 3.704830870014831, 4.227496139456699, -4.977857874675564, -0.30511628242422706, 4.814687376060105, -1.0105519609714952, 3.137324775869182, 0.46456497972477706, 2.7085408714022456, -0.15068925011541978, -4.708884363272823, -4.134743115977949, -3.885461876219767, -2.4875488830945236, 4.64915292526365, 1.3176605273774546, 3.166602026153619, 0.6608199609275633, 1.353562055818931, 3.1190239118041063, 4.2668261524387425, 4.126267637037831, 3.2481072043852546, -4.057972678226774, -1.389515815361909, -4.6449096823966975, 0.46358348540806915, 2.9614272085600426, -4.488571968986529, -3.113322642068921, -1.345222320878352, -2.557091330420916, 2.9508747292103195, -1.4790506423363148, 1.3887768206036535, -0.06584948103240329, 0.8349974372005651, 4.392993519648517, 4.4354008201587956, -3.883075728164286, 3.435549661102238, -1.5397184824404766, -3.9917272713735485, -1.1659093393288478, 0.10354797339247934, 4.6110308196512975, -1.2848738467199272, -4.876305883996423, 3.597068869881916, -3.888892504596347, -0.2166095601321505, 3.499800323861935, 0.14737967000679042, -0.5339217207768678,}; +static float kernel6[] = { -2.780068289102605, 3.7073230617737636, -2.932808446605736,}; +static float result6[] = { -17.42282404360473, 7.276573635261671, -7.959660008373655, -8.178401848526025, -4.014672567672883, 36.816872081897245, 8.881311672638013, 26.815495210093946, 14.605660913908213, 8.649987283561435, -4.617536065501945, -15.76668994325529, 24.323276089618997, -3.8110050890636913, 4.416385118733144, -14.86628034137447, 19.210207686882818, -13.412562703854679, -6.431267771954796, 2.3640225039054252, -4.134857509100443, -4.731993703294383, 13.605117160950932, 17.9494729397995, 13.143302281753002, -20.894257853421145, 25.085753963769875, 25.42310605950678, 13.767816611263237, 9.309381884658203, 5.836976522005776,}; +static int32_t input6_fxp[] = { 35115628, -972139, 9373755, 22305968, 1545012, -17045610, -26195842, -35169972, 20001822, -4923339, -28663046, 31871428, -18951002, -7194488, -17106056, 10803513, 6697281, 8382668, -19644516, -18061858, -20670520, -14464984, -29849662, -28050426, 38917312, 38606616, -26137674, -39904056, -24783678, 16764097, 23447382, -40019272, 6514832, -41805284, 1297936, 11726869, 40737128, -20208358, 25375278, 31078374, 35462808, -41757300, -2559501, 40388524, -8477124, 26317788, 3897053, 22720888, -1264073, -39500984, -34684740, -32593616, -20867072, 38999920, 11053338, 26563384, 5543359, 11354501, 26164268, 35792732, 34613640, 27247098, -34040744, -11656103, -38964328, 3888820, 24842252, -37652872, -26116444, -11284543, -21450436, 24753732, -12407176, 11649904, -552385, 7004466, 36851100, 37206840, -32573600, 28819480, -12916095, -33485036, -9780356, 868623, 38680132, -10778303, -40905420, 30174400, -32622394, -1817052, 29358452, 1236310, -4478860,}; +static int32_t kernel6_fxp[] = { -23320904, 31099280, -24602180,}; +static int32_t result6_fxp[] = { -146153248, 61040324, -66770468, -68605408, -33677516, 308842304, 74501840, 224944672, 122521160, 72561352, -38734700, -132260584, 204038432, -31969028, 37047324, -124707400, 161146896, -112512728, -53949384, 19830858, -34685700, -39694840, 114127992, 150571088, 110254008, -175273744, 210434560, 213264464, 115492816, 78092752, 48964108,}; + +//// Test full cnn +static float inp[] = { 0.10687026183436554, 0.6433775638335413, 1.5719371774211934, 1.8606301880573626, 1.079730707565259, 1.0363964731573039, 0.8401961894177536, 0.8062113303254894, 1.0693255244711337, 1.8973979155425758, -0.5051419304161229, -1.6677822278462204, -1.0414382415511003, -1.1140689598280193, -0.5456008127835759, 1.241256928449407, -1.7596153205346008, -0.20102576841377573, 1.2526821099333798, -0.9430465018374048, -1.7463960721065255, -1.0315693109762605, -1.6597181529991376, 1.231109741389408, -1.3189996974476714, -1.2186214977502452, 1.2585680462314413, 1.2411421167299825, 0.35749551293497017, 1.658937360469848, -1.7607134529814616, 1.859986561081329, 0.28390086742322307, -0.7899275499984229, 1.3028233059934027, 0.637669121413285, 1.9460057656135836, -1.5700418656996042, 0.3236741105335814, -0.10868737977085274, 0.6090831439508024, -1.0325637836102834, -1.8754797463263757, 0.17692941098474302, -0.54115928483046,}; +static float ke1[] = { 1.2019056830022787, -1.9184344862073974, 0.2904745952311969, -0.3544655368824938, 1.9405471907832181, 1.2056061216442804, -1.7841515914194126, -1.2380889061779894, -0.19032461673209822, 0.8117683084719127, -0.6718074018572904, -0.5600672193147695, 1.685882263836708, 1.8145220229093884, -0.36925708529142565,}; +static float ke2[] = { 1.594284619538893, -0.6789869921619749, -1.6690457236800493,}; +static float res[] = { -5.646923391743088, -12.486867124009787, 1.116749990770437, 3.790885823459984, -7.655608446879853, -5.758159525901699, -16.320022556866068, 10.253133245571195, -8.825685721563698, -3.9221345076618483, -11.844371518436887, 16.970633341131517, 5.631829596306385, 4.730155342366402, 6.236770944902686,}; + +#endif // TEST_CNNWEIGHTS_H \ No newline at end of file diff --git a/sw/applications/l_cnn/utils.h b/sw/applications/l_cnn/utils.h new file mode 100644 index 000000000..7c574ef6e --- /dev/null +++ b/sw/applications/l_cnn/utils.h @@ -0,0 +1,57 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +// Define SIMULATION if you want to disable printing +#define SIMULATION + +// Enable or disable printing +#ifndef SIMULATION +#define PRINTF(...) printf(__VA_ARGS__) +#else +#define PRINTF(...) +#endif + +// Assert functions, always print if failing +void assert_closef(float a, float b, float prec, int idx) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AF %d %d %d %d\n", __LINE__, (int)(a*100000), (int)(b*100000), idx); + exit(EXIT_FAILURE); + } +} + +void assert_closei32(int32_t a, int32_t b, int32_t prec, int idx) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d %d\n", __LINE__, a, b, idx); + exit(EXIT_FAILURE); + } +} + +// Vector export for plots +#ifndef SIMULATION +void vectorExport(float* a, int size, char filename[]) { + FILE *filePointer; + filePointer = fopen(filename, "w"); + if (filePointer == NULL) { + printf("Failed to create file.\n"); + return; + } + for (int i=0; i + +DataMS Data_normalizeAccPpg(fxp32* accx, fxp32* accy, fxp32* accz, fxp32* ppg, int size) { + DataMS mv = Data_normalizeMV(ppg, size); + Data_normalize(accx, size); + Data_normalize(accy, size); + Data_normalize(accz, size); + return mv; +} + +DataMSf Data_normalizeAccPpgf(float* accx, float* accy, float accz, float* ppg, int size) { + DataMSf mv = Data_normalizeMVf(ppg, size); + Data_normalizef(accx, size); + Data_normalizef(accy, size); + accz = (accz - mv.mean) / mv.std; + return mv; +} + +bool closef(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= prec; +} + +float sqrtf(float x) { + float guess = x / 2; + float eps = 0.00001; + while (closef(guess, x / guess, eps) == false) { + guess = (guess + x / guess) / 2; + } + return guess; +} + +// FIXME: +DataMS Data_normalizeMV(fxp32* data, int size) { + // calculate mean + DataMS mv; + fxpMul sum = 0; + for (int i = 0; i < size; ++i) { + sum += (fxpMul)data[i]; + } + mv.mean = (fxp32)((sum << FRACTIONAL_BITS) / fxp32_fromInt(size)); + + // calculate variance + fxpMul std = 0; + for (int i = 0; i < size; ++i) { + std += fxp32_mul(data[i] - mv.mean, data[i] - mv.mean); + printf("data: %f\n", fxp32_toFloat(data[i])); + printf("data - mean: %f\n", fxp32_toFloat(data[i] - mv.mean)); + printf("std: %f\n", fxp32_toFloat((fxp32)std)); + } + mv.std = fxp32_sqrt((fxp32)((std << FRACTIONAL_BITS) / fxp32_fromInt(size))); + + // normalize data + for (int i = 0; i < size; ++i) { + data[i] = fxp32_div(data[i] - mv.mean, mv.std); + } + + return mv; +} + +DataMSf Data_normalizeMVf(float* data, int size) { + // calculate mean + DataMSf mv; + float sum = 0; + for (int i = 0; i < size; ++i) { + sum += data[i]; + } + mv.mean = sum / size; + + // calculate variance + float std = 0; + for (int i = 0; i < size; ++i) { + std += (data[i] - mv.mean) * (data[i] - mv.mean); + } + mv.std = sqrtf(std / size); + + // normalize data + for (int i = 0; i < size; ++i) { + data[i] = (data[i] - mv.mean) / mv.std; + } + + return mv; +} + +// FIXME: +void Data_normalize(fxp32* data, int size) { + // calculate mean + fxp32 sum = 0; + for (int i = 0; i < size; ++i) { + sum += data[i]; + } + fxp32 mean = fxp32_div(sum, fxp32_fromInt(size)); + + // calculate variance + fxp32 var = 0; + for (int i = 0; i < size; ++i) { + var += fxp32_mul(data[i] - mean, data[i] - mean); + } + var = fxp32_sqrt(fxp32_div(var, fxp32_fromInt(size))); + + // normalize data + for (int i = 0; i < size; ++i) { + data[i] = fxp32_div(data[i] - mean, var); + } +} + +void Data_normalizef(float* data, int size) { + // calculate mean + float sum = 0; + for (int i = 0; i < size; ++i) { + sum += data[i]; + } + float mean = sum / size; + + // calculate variance + float var = 0; + for (int i = 0; i < size; ++i) { + var += (data[i] - mean) * (data[i] - mean); + } + var = sqrtf(var / size); + + // normalize data + for (int i = 0; i < size; ++i) { + data[i] = (data[i] - mean) / var; + } +} + +void Data_denormalize(fxp32* data, int size, DataMS mv) { + for (int i = 0; i < size; ++i) { + data[i] = fxp32_mul(data[i], mv.std) + mv.mean; + } +} + +void Data_denormalizef(float* data, int size, DataMSf mv) { + for (int i = 0; i < size; ++i) { + data[i] = data[i] * mv.std + mv.mean; + } +} \ No newline at end of file diff --git a/sw/applications/l_data/datahelper.h b/sw/applications/l_data/datahelper.h new file mode 100644 index 000000000..2b59a2bf9 --- /dev/null +++ b/sw/applications/l_data/datahelper.h @@ -0,0 +1,36 @@ +#ifndef DATAHELPER_H +#define DATAHELPER_H + +#include "fxp32.h" + +typedef struct DataMS { + fxp32 mean; + fxp32 std; +} DataMS; + +typedef struct DataMSf { + float mean; + float std; +} DataMSf; + +/** + * Normalize data and return the mean and variance of ppg data + */ +DataMS Data_normalizeAccPpg(fxp32* accx, fxp32* accy, fxp32* accz, fxp32* ppg, int size); +DataMSf Data_normalizeAccPpgf(float* accx, float* accy, float accz, float* ppg, int size); + +// Helper functions +bool closef(float a, float b, float prec); +float sqrtf(float x); +DataMS Data_normalizeMV(fxp32* data, int size); +void Data_normalize(fxp32* data, int size); +DataMSf Data_normalizeMVf(float* data, int size); +void Data_normalizef(float* data, int size); + +/** + * Denormalize data with given mean and variance + */ +void Data_denormalize(fxp32* data, int size, DataMS mv); +void Data_denormalizef(float* data, int size, DataMSf mv); + +#endif // DATAHELPER_H \ No newline at end of file diff --git a/sw/applications/l_data/fxp32.c b/sw/applications/l_data/fxp32.c new file mode 100644 index 000000000..20f899928 --- /dev/null +++ b/sw/applications/l_data/fxp32.c @@ -0,0 +1,48 @@ +#include "fxp32.h" + +fxp32 fxp32_fromFloat(float f) { + return (fxp32)(f * (1 << FRACTIONAL_BITS)); +} + +float fxp32_toFloat(fxp32 x) { + return (float)x / (1 << FRACTIONAL_BITS); +} + +fxp32 fxp32_fromInt(int i) { + return i << FRACTIONAL_BITS; +} + +fxp32 fxp32_mul(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS); +} + +fxpMul fxp32_mul64(fxp32 a, fxp32 b) { + return ((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS; +} + +fxp32 fxp32_div(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a << FRACTIONAL_BITS) / b); +} + +bool fxp32_closefxp(fxp32 a, fxp32 b, fxp32 prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= prec; +} + +fxp32 fxp32_sqrt(fxp32 a) { + fxp32 guess = a >> 1; + fxp32 eps = fxp32_fromFloat(0.0001); + while (fxp32_closefxp(guess, fxp32_div(a, guess), eps) == false) { + guess = (guess + fxp32_div(a, guess)) >> 1; + } + return guess; +} + +bool fxp32_close(fxp32 a, fxp32 b, float prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= fxp32_fromFloat(prec); +} diff --git a/sw/applications/l_data/fxp32.h b/sw/applications/l_data/fxp32.h new file mode 100644 index 000000000..169a287b8 --- /dev/null +++ b/sw/applications/l_data/fxp32.h @@ -0,0 +1,24 @@ +#ifndef FXP_H +#define FXP_H + +#include +#include + +#define FRACTIONAL_BITS 23 +typedef int32_t fxp32; +typedef int64_t fxpMul; + +fxp32 fxp32_fromFloat(float f); +float fxp32_toFloat(fxp32 x); +fxp32 fxp32_fromInt(int i); + +fxp32 fxp32_mul(fxp32 a, fxp32 b); +fxpMul fxp32_mul64(fxp32 a, fxp32 b); + +fxp32 fxp32_div(fxp32 a, fxp32 b); + +fxp32 fxp32_sqrt(fxp32 a); + +bool fxp32_close(fxp32 a, fxp32 b, float prec); + +#endif // FXP_H \ No newline at end of file diff --git a/sw/applications/l_data/main.c b/sw/applications/l_data/main.c new file mode 100644 index 000000000..0a0133e29 --- /dev/null +++ b/sw/applications/l_data/main.c @@ -0,0 +1,71 @@ +#include "datahelper.h" +#include "utils.h" +#include +#include +#include +#include +#include + +#define COMP_PREC 0.00001f + +static float a1[] = { 123.04050162592358, -128.71307015233148, 135.3744134753319, 103.11344412842786, -46.7358607709171, 116.00302080210878, 82.62329400495238, -76.20943485909036, -90.71184986089287, 41.93325453498443,}; +float mean = 25.971771292849713; +float std = 96.0113380509814; + +int len = 256; +static float a2[] = { 36.40057228916555, -17.4873357823476, -127.25548939081504, -66.02360370727564, -115.35054254378875, 65.73317391590518, 39.6949855266501, -145.09952906073406, 138.76658756925195, 136.14229311557222, 149.64786973323032, -64.23458088252782, 17.671085816474374, -8.62910339947615, -14.73183117957484, 53.96449574858704, 92.446904437044, 68.97978386740812, 16.74350286170096, -94.40070777490078, 116.65733781875741, 124.1243397609166, 125.68590292547839, 134.85723645020056, -135.0245927377151, 112.09498882902591, -58.53839219009234, -111.71841610369637, 113.19041310960239, 109.79335415905297, 20.2605781235022, -100.54927797366389, 25.02276994149426, 71.27468330174392, 32.80112840449547, -0.20022823602866424, -102.62475576256529, -23.487294095268467, 73.0374692535255, 50.458234758543, 85.48229956353077, -46.055447986430735, -6.566758014056262, 2.5167432144677093, 78.64098835139595, -25.677090531806698, 15.109131868753877, -4.395924509437549, -113.44452582042501, -21.243077142712707, -14.74825334934053, -35.725321576837715, 30.364162895528978, -80.95320140964742, 114.24107647970163, 18.66090475050899, -110.99028355341278, -67.24633889812246, -91.2489710325416, -111.82450407627948, 31.589279036576414, -55.247504773469174, -58.476559650457745, -107.309058855176, 35.17204234335952, -149.6360920619463, 147.10690080516474, -122.01002100818047, 140.56206843670248, -52.14519294211374, 41.499937542445, -101.97676942639455, 134.7644732505678, -25.73898973783534, 9.235056563443976, -91.6564063811775, -77.67441611363908, 68.59854728249084, -59.08651010389188, -52.0164041508058, -64.95272996504757, -129.44357734905816, 8.264954133645062, -106.71647342293014, 119.39100698873267, 78.36363524286313, -39.49332364947908, -121.79035268053981, 32.574542780391226, 128.0971677828387, -21.626110308379282, -102.69055420528288, 67.4656767004661, -123.21323138826772, -120.49920470166975, -76.53548442575733, -72.77121451373446, 28.58146810851855, -28.537533893781514, 75.96696982700661, 146.0634156929783, -33.399647769565334, 75.51169508833624, 83.36387184275691, -68.10958431147702, 75.24042423399985, -71.97962718797561, -32.17028705192459, -95.42791421796835, -11.996529807894888, 42.27941921543939, 41.7125096915548, -112.04688924854452, -81.55447055320181, -148.42021976204154, -5.111254520550375, -126.68728743945219, -130.126537788591, -99.72941117018394, -98.2031169251074, 71.294757755586, -1.9341868013485168, -98.4659236352466, 149.00213908914589, 117.24063932453572, -39.73132407748264, 15.091902394279572, 94.91645496147964, -135.58034176776664, -59.25830714184035, -33.641433737941625, -133.6021860559324, -139.54537424274582, 113.69495768322861, -45.257593138771384, 21.983571358037523, -149.12416998415887, -102.9644852364416, -118.57312058834336, -91.24912914087662, 65.41443112820448, 30.4657292518161, 33.4278969140432, -59.33266130882568, 124.20848116332098, -19.74551275436113, -58.72246576618829, -24.915437801168608, 89.20433967079151, 55.19538921266104, -52.15596277773072, -12.547448271854591, -83.69341343012596, 104.13118666721093, 90.69054617716188, 136.31941626173398, 42.93480361810333, 127.05473574597863, -144.1834644180072, -69.45246058460488, -89.76739198374209, -88.42516037596269, -28.344011427569654, -76.36762914821325, 102.82285072212568, -95.77937873505073, -73.88271820881415, -53.296799332230506, -81.95310166563637, 20.96966578559531, 97.19016968338028, 86.41715047563864, 27.116739877975732, 82.08118811104484, -136.6808944624087, -134.2188597368059, -0.7095783113232983, -47.736595751619745, -41.596456306179235, -38.51073306766949, 12.94547416158801, -23.602828947640305, -59.92223516738801, -74.54134353941554, -57.98743506675068, 3.729766894213526, -27.29977236913703, -10.662489375175426, -123.45778266402866, -40.3977773609477, -12.777049318291745, 88.97632585057187, -123.00624392769492, 44.49190355377513, 126.17689393233866, 67.81614514412152, -105.79449434434196, 4.437560052603715, 18.422700909937504, 98.85309726181973, 42.4882072462361, -53.424870567329194, 8.172918229775462, 144.58333289139807, -21.166346997422778, -50.16647607514486, -4.56970898860601, 30.738608514800035, 121.10008566874626, 72.27634952511264, -19.372257247347335, 68.43210539602745, 43.3525071907024, -140.57910192783348, 95.18689839911093, -121.0466797029768, 65.23580169950878, 72.16859627562576, -10.328434919163385, -149.23978600604835, 71.77105027840398, -74.79512320335074, -44.32231374630257, 96.10046289502418, 59.48511414005412, 34.802002192617834, -102.77662804130051, -102.77396235621572, -61.65058214367913, -17.406994931962743, 93.53697575530498, 117.88803127314878, 141.63767049706166, 131.6182043100075, -96.95088443464903, 126.75096198251231, 58.85097613184519, -147.81721491115476, -103.51325596033976, -26.141082353455715, 86.85750303364668, -8.798406372863809, -107.34828370364257, 6.03896424247128, 142.1130272880979, -15.296031442415313, 61.922444380523444, 147.24446407910665, -141.14842801389815, -50.34133098334529, -107.74027398819815, 121.2313290997945, -68.58210567618356, 23.287102140421638, -60.86204674564932, -136.0875262693496,}; +float mean2 = -7.1283687740938095; +float std2 = 85.26595529678028; + +void test_normalizemvf() { + float* a = (float*)malloc(10 * sizeof(float)); + memcpy(a, a1, 10 * sizeof(float)); + DataMSf mv = Data_normalizeMVf(a, 10); + assert_closef(mv.mean, mean, COMP_PREC); + assert_closef(mv.std, std, COMP_PREC); + Data_denormalizef(a, 10, mv); + for (int i = 0; i < 10; ++i) { + assert_closef(a[i], a1[i], COMP_PREC); + } +} + +void test_normalizemvf2() { + float* a = (float*)malloc(len * sizeof(float)); + memcpy(a, a2, len * sizeof(float)); + DataMSf mv = Data_normalizeMVf(a2, len); + assert_closef(mv.mean, mean2, COMP_PREC); + assert_closef(mv.std, std2, COMP_PREC); + Data_denormalizef(a2, len, mv); + for (int i = 0; i < len; ++i) { + assert_closef(a2[i], a[i], COMP_PREC); + } +} + +//TODO: +// void test_normalizemv() { +// fxp32* a = (fxp32*)malloc(10 * sizeof(fxp32)); +// for (int i = 0; i < 10; ++i) { +// a[i] = fxp32_fromFloat(a1[i]); +// } +// DataMS mv = Data_normalizeMV(a, 10); +// my_assert(fxp32_close(mv.mean, fxp32_fromFloat(mean), fxp32_fromFloat(COMP_PREC))); +// my_assert(fxp32_close(mv.std, fxp32_fromFloat(std), fxp32_fromFloat(COMP_PREC))); +// for(int i = 0; i < 10; ++i) { +// printf("a[%d]: %f\n", i, fxp32_toFloat(a[i])); +// } +// Data_denormalize(a, 10, mv); +// for (int i = 0; i < 10; ++i) { +// my_assert(fxp32_close(a[i], fxp32_fromFloat(a1[i]), fxp32_fromFloat(COMP_PREC))); +// } +// } + +int main() { + PRINTF("\033[1;93m====== Test Datahelper =====\n"); + PRINTF("\033[0m====== Test Norm =========\n"); + test_normalizemvf(); + PRINTF("\033[1;32m====== Test 1 passed =====\n"); + test_normalizemvf2(); + PRINTF("\033[1;32m====== Test 2 passed =====\n"); + PRINTF("\033[0m====== Test Norm end =====\n\n"); + return 0; +} \ No newline at end of file diff --git a/sw/applications/l_data/utils.h b/sw/applications/l_data/utils.h new file mode 100644 index 000000000..acfe5bb47 --- /dev/null +++ b/sw/applications/l_data/utils.h @@ -0,0 +1,37 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +// Define SIMULATION if you want to disable printing +#define SIMULATION + +// Enable or disable printing +#ifndef SIMULATION +#define PRINTF(...) printf(__VA_ARGS__) +#else +#define PRINTF(...) +#endif + +// Assert functions, always print if failing +void assert_closef(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AF %d %f %f\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +void assert_closei32(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +#endif /* UTILS_H */ diff --git a/sw/applications/l_fxp/fxp32.c b/sw/applications/l_fxp/fxp32.c new file mode 100644 index 000000000..b08fb9f47 --- /dev/null +++ b/sw/applications/l_fxp/fxp32.c @@ -0,0 +1,57 @@ +#include "fxp32.h" + +#include + +fxp32 fxp32_fromFloat(float f) { + return (fxp32)(f * (1 << FRACTIONAL_BITS)); +} + +float fxp32_toFloat(fxp32 x) { + return (float)x / (1 << FRACTIONAL_BITS); +} + +fxp32 fxp32_fromInt(int i) { + return i << FRACTIONAL_BITS; +} + +fxp32 fxp32_mul(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS); +} + +fxpMul fxp32_mul64(fxp32 a, fxp32 b) { + return ((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS; +} + +fxp32 fxp32_div(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a << FRACTIONAL_BITS) / b); +} + +bool fxp32_closefxp(fxp32 a, fxp32 b, fxp32 prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= prec; +} + +fxp32 fxp32_sqrt(fxp32 a) { + fxp32 guess = a >> 1; + fxp32 eps = fxp32_fromFloat(0.0001); + while (fxp32_closefxp(guess, fxp32_div(a, guess), eps) == false) { + guess = (guess + fxp32_div(a, guess)) >> 1; + } + return guess; +} + +bool fxp32_close(fxp32 a, fxp32 b, float prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= fxp32_fromFloat(prec); +} + +void assert_fxp32Close(fxp32 a, fxp32 b, float prec) { + if (!fxp32_close(a, b, prec)) { + printf("F %s:%d, %d != %d\n", __FILE__, __LINE__, a, b); + exit(EXIT_FAILURE); + } +} diff --git a/sw/applications/l_fxp/fxp32.h b/sw/applications/l_fxp/fxp32.h new file mode 100644 index 000000000..33f7f483e --- /dev/null +++ b/sw/applications/l_fxp/fxp32.h @@ -0,0 +1,26 @@ +#ifndef FXP_H +#define FXP_H + +#include +#include + +#define FRACTIONAL_BITS 23 +typedef int32_t fxp32; +typedef int64_t fxpMul; + +fxp32 fxp32_fromFloat(float f); +float fxp32_toFloat(fxp32 x); +fxp32 fxp32_fromInt(int i); + +fxp32 fxp32_mul(fxp32 a, fxp32 b); +fxpMul fxp32_mul64(fxp32 a, fxp32 b); + +fxp32 fxp32_div(fxp32 a, fxp32 b); + +fxp32 fxp32_sqrt(fxp32 a); + +bool fxp32_close(fxp32 a, fxp32 b, float prec); + +void assert_fxp32Close(fxp32 a, fxp32 b, float prec); + +#endif // FXP_H \ No newline at end of file diff --git a/sw/applications/l_fxp/main.c b/sw/applications/l_fxp/main.c new file mode 100644 index 000000000..4b1a89b85 --- /dev/null +++ b/sw/applications/l_fxp/main.c @@ -0,0 +1,65 @@ +#include "fxp32.h" +#include "utils.h" +#include +#include +#include +#include +#include + +#define COMP_PREC 0.000001f + +void test_assert() { + assert_closef(1.0, 1.0, 0.0001f); + assert_closef(1.0, 1.0001, 0.0002f); + assert_closef(1.0, 1.0002, 0.0003f); + assert_closef(0.01, 0.0109, 0.001f); + assert_closei32(1, 1, 1); + assert_closei32(1000, 1002, 3); + assert_closei32(-1000, -1003, 4); +} + +void test_fxp_basic() { + float f1 = 3.14159286537; + float f2 = 2.71828723519; + + // Convert floats to fxp32 + fxp32 x = fxp32_fromFloat(f1); + fxp32 y = fxp32_fromFloat(f2); + + // Perform addition and multiplication + fxp32 result_add = x + y; + fxp32 result_multiply = fxp32_mul(x, y); + fxp32 result_divide = fxp32_div(x, y); + + // Convert fxp32 back to floats + float result_add_float = fxp32_toFloat(result_add); + float result_multiply_float = fxp32_toFloat(result_multiply); + float result_divide_float = fxp32_toFloat(result_divide); + + // Print results + assert_closef(result_add_float, f1 + f2, COMP_PREC); + assert_closef(result_multiply_float, f1 * f2, COMP_PREC); + assert_closef(result_divide_float, f1 / f2, COMP_PREC); +} + +void test_fxp_sqrt() { + fxp32 x = fxp32_fromFloat(2.0); + fxp32 result = fxp32_sqrt(x); + float result_float = fxp32_toFloat(result); + assert_closef(result_float, 1.41421356237, 10*COMP_PREC); +} + +int main() { + PRINTF("\033[1;93m====== Test FXP ==========\n"); + PRINTF("\033[0m====== Test Assert =======\n"); + test_assert(); + PRINTF("\033[1;32m====== Test passed =======\n"); + PRINTF("\033[0m====== Test Basic ========\n"); + test_fxp_basic(); + PRINTF("\033[1;32m====== Test passed =======\n"); + PRINTF("\033[0m====== Test Sqrt =========\n"); + test_fxp_sqrt(); + PRINTF("\033[1;32m====== Test passed =======\n"); + PRINTF("\033[0m====== Test FXP end ======\n\n"); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/sw/applications/l_fxp/utils.h b/sw/applications/l_fxp/utils.h new file mode 100644 index 000000000..acfe5bb47 --- /dev/null +++ b/sw/applications/l_fxp/utils.h @@ -0,0 +1,37 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +// Define SIMULATION if you want to disable printing +#define SIMULATION + +// Enable or disable printing +#ifndef SIMULATION +#define PRINTF(...) printf(__VA_ARGS__) +#else +#define PRINTF(...) +#endif + +// Assert functions, always print if failing +void assert_closef(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AF %d %f %f\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +void assert_closei32(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +#endif /* UTILS_H */ From 486b7af7dbb5432394d6cd331e0257233d162fa3 Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Mon, 22 Apr 2024 16:18:24 +0200 Subject: [PATCH 05/27] all tests work now --- sw/applications/l_cnn/cnn.c | 25 +++++- sw/applications/l_cnn/cnn.h | 8 +- sw/applications/l_cnn/conv2dlayer.c | 31 ++++++-- sw/applications/l_cnn/conv2dlayer.h | 11 ++- sw/applications/l_cnn/main.c | 100 +++++++++++++++--------- sw/applications/l_cnn/test_cnnWeights.h | 4 + 6 files changed, 128 insertions(+), 51 deletions(-) diff --git a/sw/applications/l_cnn/cnn.c b/sw/applications/l_cnn/cnn.c index 70654b819..0bcb03b23 100644 --- a/sw/applications/l_cnn/cnn.c +++ b/sw/applications/l_cnn/cnn.c @@ -21,16 +21,35 @@ void Cnn_destroy(CnnHandle self) { free(self); } -void Cnn_forward(CnnHandle self, fxp32* input, fxp32* output) { +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output) { fxp32* layer1Output = (fxp32*)calloc(self->inputDim.x * self->inputDim.y, sizeof(fxp32)); Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1Output); Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1Output, output); free(layer1Output); } -void Cnn_predict(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output) { - Cnn_forward(self, acc, output); +void Cnn_forwardFloat(CnnHandle self, float* input, float* output) { + float* layer1Output = (float*)calloc(self->inputDim.x * self->inputDim.y, sizeof(float)); + Conv2DLayer_forwardFloat(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFloat(self->layer2, self->inputDim, layer1Output, output); + free(layer1Output); +} + +void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output) { + Cnn_forwardFxp(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} + +void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output) { + Cnn_forwardFloat(self, acc, output); for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { output[i] = ppg[i] - output[i]; } +} + +void Cnn_freezeModel(CnnHandle self) { + Conv2DLayer_transformWeightsToFxp(self->layer1); + Conv2DLayer_transformWeightsToFxp(self->layer2); } \ No newline at end of file diff --git a/sw/applications/l_cnn/cnn.h b/sw/applications/l_cnn/cnn.h index b127f42e7..3905c6fac 100644 --- a/sw/applications/l_cnn/cnn.h +++ b/sw/applications/l_cnn/cnn.h @@ -20,8 +20,12 @@ CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPad Conv2DPadding layer2Pad); void Cnn_destroy(CnnHandle self); -void Cnn_forward(CnnHandle self, fxp32* input, fxp32* output); +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output); +void Cnn_forwardFloat(CnnHandle self, float* input, float* output); -void Cnn_predict(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); +void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); +void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output); + +void Cnn_freezeModel(CnnHandle self); #endif // CNN_H \ No newline at end of file diff --git a/sw/applications/l_cnn/conv2dlayer.c b/sw/applications/l_cnn/conv2dlayer.c index 48b37b31a..16a2e4c21 100644 --- a/sw/applications/l_cnn/conv2dlayer.c +++ b/sw/applications/l_cnn/conv2dlayer.c @@ -18,21 +18,35 @@ Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding) { Conv2DLayerHandle self = (Conv2DLayerHandle)malloc(sizeof(Conv2DLayer)); self->dim = dim; self->padding = padding; - self->weights = (fxp32*)calloc(dim.x * dim.y, sizeof(fxp32)); + self->weightsFxp = (fxp32*)calloc(dim.x * dim.y, sizeof(fxp32)); + self->weightsFloat = (float*)calloc(dim.x * dim.y, sizeof(float)); return self; } void Conv2DLayer_destroy(Conv2DLayerHandle self) { - free(self->weights); + free(self->weightsFxp); + free(self->weightsFloat); free(self); } -bool Conv2DLayer_setWeights(Conv2DLayerHandle self, fxp32* weights) { - memcpy(self->weights, weights, self->dim.x * self->dim.y * sizeof(fxp32)); +bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights) { + memcpy(self->weightsFxp, weights, self->dim.x * self->dim.y * sizeof(fxp32)); // TODO: make sure we get the right size of weights return true; } +bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights) { + memcpy(self->weightsFloat, weights, self->dim.x * self->dim.y * sizeof(float)); + // TODO: make sure we get the right size of weights + return true; +} + +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self) { + for (int i = 0; i < self->dim.x * self->dim.y; ++i) { + self->weightsFxp[i] = fxp32_fromFloat(self->weightsFloat[i]); + } +} + // Could be optimized void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid) { @@ -99,7 +113,7 @@ void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, } // Could be optimized -void convolve2D(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid) { +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid) { // get kernel center if (kerx % 2 != 1 || kery % 2 != 1) { printf("Kernel size must be odd\n"); @@ -153,6 +167,11 @@ void convolve2D(float* input, float* output, float* kernel, int inx, int iny, in } void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output) { - convolve2DFxp(input, output, self->weights, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + convolve2DFxp(input, output, self->weightsFxp, inputDim.x, inputDim.y, self->dim.x, self->dim.y, self->padding == VALID); +} + +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output) { + convolve2DFloat(input, output, self->weightsFloat, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); } \ No newline at end of file diff --git a/sw/applications/l_cnn/conv2dlayer.h b/sw/applications/l_cnn/conv2dlayer.h index 3656037b4..2b47aee2e 100644 --- a/sw/applications/l_cnn/conv2dlayer.h +++ b/sw/applications/l_cnn/conv2dlayer.h @@ -25,7 +25,8 @@ typedef enum __Conv2DPadding { typedef struct __Conv2DLayer { Dim2D dim; Conv2DPadding padding; - fxp32* weights; + fxp32* weightsFxp; + float* weightsFloat; } Conv2DLayer; typedef struct __Conv2DLayer* Conv2DLayerHandle; @@ -33,10 +34,14 @@ typedef struct __Conv2DLayer* Conv2DLayerHandle; Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding); void Conv2DLayer_destroy(Conv2DLayerHandle self); -bool Conv2DLayer_setWeights(Conv2DLayerHandle self, fxp32* weights); +bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights); +bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights); +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self); + void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output); +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output); void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid); -void convolve2D(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid); +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid); #endif // CONV2DLAYER_H \ No newline at end of file diff --git a/sw/applications/l_cnn/main.c b/sw/applications/l_cnn/main.c index ea80ae9a2..44f179b41 100644 --- a/sw/applications/l_cnn/main.c +++ b/sw/applications/l_cnn/main.c @@ -13,7 +13,7 @@ #define COMP_PREC 0.01f #define COMP_PREC_I32 512 -void compareVectors(float* a, float* b, int size, float prec) { +void compareVectorsFloat(float* a, float* b, int size, float prec) { for (int i = 0; i < size; ++i) { assert_closef(a[i], b[i], prec, i); } @@ -26,89 +26,111 @@ void compareVectorsFxp(fxp32* a, fxp32* b, int size, float prec) { } void test_same_layer1() { - // fxp32* result_fxp = (fxp32*)calloc(xin1*yin1, sizeof(fxp32)); + fxp32* result_fxp = (fxp32*)calloc(xin1*yin1, sizeof(fxp32)); float* result = (float*)calloc(xin1*yin1, sizeof(float)); - // convolve2DFxp(input1_fxp, result_fxp, kernel1_fxp, xin1, yin1, xker1, yker1, false); - convolve2D(input1, result, kernel1, xin1, yin1, xker1, yker1, false); + convolve2DFxp(input1_fxp, result_fxp, kernel1_fxp, xin1, yin1, xker1, yker1, false); + convolve2DFloat(input1, result, kernel1, xin1, yin1, xker1, yker1, false); - // compareVectorsFxp(result1_fxp, result_fxp, xin1*yin1, COMP_PREC_I32); - compareVectors(result1, result, xin1*yin1, COMP_PREC); + compareVectorsFxp(result1_fxp, result_fxp, xin1*yin1, COMP_PREC_I32); + compareVectorsFloat(result1, result, xin1*yin1, COMP_PREC); - // free(result_fxp); + free(result_fxp); free(result); } void test_same_layer2() { - // fxp32* result_fxp = (fxp32*)calloc(xin2*yin2, sizeof(fxp32)); + fxp32* result_fxp = (fxp32*)calloc(xin2*yin2, sizeof(fxp32)); float* result = (float*)calloc(xin2*yin2, sizeof(float)); - // convolve2DFxp(input2_fxp, result_fxp, kernel2_fxp, xin2, yin2, xker2, yker2, false); - convolve2D(input2, result, kernel2, xin2, yin2, xker2, yker2, false); + convolve2DFxp(input2_fxp, result_fxp, kernel2_fxp, xin2, yin2, xker2, yker2, false); + convolve2DFloat(input2, result, kernel2, xin2, yin2, xker2, yker2, false); - // compareVectorsFxp(result2_fxp, result_fxp, xin2*yin2, COMP_PREC_I32); - compareVectors(result2, result, xin2*yin2, COMP_PREC); + compareVectorsFxp(result2_fxp, result_fxp, xin2*yin2, COMP_PREC_I32); + compareVectorsFloat(result2, result, xin2*yin2, COMP_PREC); - // free(result_fxp); + free(result_fxp); free(result); } void test_same_layer3() { - // fxp32* result_fxp = (fxp32*)calloc(xin3*yin3, sizeof(fxp32)); + fxp32* result_fxp = (fxp32*)calloc(xin3*yin3, sizeof(fxp32)); float* result = (float*)calloc(xin3*yin3, sizeof(float)); - // convolve2DFxp(input3_fxp, result_fxp, kernel3_fxp, xin3, yin3, xker3, yker3, false); - convolve2D(input3, result, kernel3, xin3, yin3, xker3, yker3, false); + convolve2DFxp(input3_fxp, result_fxp, kernel3_fxp, xin3, yin3, xker3, yker3, false); + convolve2DFloat(input3, result, kernel3, xin3, yin3, xker3, yker3, false); - // compareVectorsFxp(result3_fxp, result_fxp, xin3*yin3, COMP_PREC_I32); - compareVectors(result3, result, xin3*yin3, COMP_PREC); + compareVectorsFxp(result3_fxp, result_fxp, xin3*yin3, COMP_PREC_I32); + compareVectorsFloat(result3, result, xin3*yin3, COMP_PREC); - // free(result_fxp); + free(result_fxp); free(result); } void test_valid_layer1() { - // fxp32* result_fxp = (fxp32*)calloc(yout4*yout4, sizeof(fxp32)); + fxp32* result_fxp = (fxp32*)calloc(yout4*yout4, sizeof(fxp32)); float* result = (float*)calloc(xout4*yout4, sizeof(float)); - // convolve2DFxp(input4_fxp, result_fxp, kernel4_fxp, xin4, yin4, xker4, yker4, true); - convolve2D(input4, result, kernel4, xin4, yin4, xker4, yker4, true); + convolve2DFxp(input4_fxp, result_fxp, kernel4_fxp, xin4, yin4, xker4, yker4, true); + convolve2DFloat(input4, result, kernel4, xin4, yin4, xker4, yker4, true); - // compareVectorsFxp(result4_fxp, result_fxp, xout4*yout4, COMP_PREC_I32); - compareVectors(result4, result, xout4*yout4, COMP_PREC); + compareVectorsFxp(result4_fxp, result_fxp, xout4*yout4, COMP_PREC_I32); + compareVectorsFloat(result4, result, xout4*yout4, COMP_PREC); - // free(result_fxp); + free(result_fxp); free(result); } void test_valid_layer2() { - // fxp32* result_fxp = (fxp32*)calloc(xout5*yout5, sizeof(fxp32)); + fxp32* result_fxp = (fxp32*)calloc(xout5*yout5, sizeof(fxp32)); float* result = (float*)calloc(xout5*yout5, sizeof(float)); - // convolve2DFxp(input5_fxp, result_fxp, kernel5_fxp, xin5, yin5, xker5, yker5, true); - convolve2D(input5, result, kernel5, xin5, yin5, xker5, yker5, true); + convolve2DFxp(input5_fxp, result_fxp, kernel5_fxp, xin5, yin5, xker5, yker5, true); + convolve2DFloat(input5, result, kernel5, xin5, yin5, xker5, yker5, true); - // compareVectorsFxp(result5_fxp, result_fxp, xout5*yout5, COMP_PREC_I32); - compareVectors(result5, result, xout5*yout5, COMP_PREC); + compareVectorsFxp(result5_fxp, result_fxp, xout5*yout5, COMP_PREC_I32); + compareVectorsFloat(result5, result, xout5*yout5, COMP_PREC); - // free(result_fxp); + free(result_fxp); free(result); } void test_valid_layer3() { - // fxp32* result_fxp = (fxp32*)calloc(xout6*yout6, sizeof(fxp32)); + fxp32* result_fxp = (fxp32*)calloc(xout6*yout6, sizeof(fxp32)); float* result = (float*)calloc(xout6*yout6, sizeof(float)); - // convolve2DFxp(input6_fxp, result_fxp, kernel6_fxp, xin6, yin6, xker6, yker6, true); - convolve2D(input6, result, kernel6, xin6, yin6, xker6, yker6, true); + convolve2DFxp(input6_fxp, result_fxp, kernel6_fxp, xin6, yin6, xker6, yker6, true); + convolve2DFloat(input6, result, kernel6, xin6, yin6, xker6, yker6, true); - // compareVectorsFxp(result6_fxp, result_fxp, xout6*yout6, COMP_PREC_I32); - compareVectors(result6, result, xout6*yout6, COMP_PREC); + compareVectorsFxp(result6_fxp, result_fxp, xout6*yout6, COMP_PREC_I32); + compareVectorsFloat(result6, result, xout6*yout6, COMP_PREC); - // free(result_fxp); + free(result_fxp); free(result); } +void test_cnn() { + CnnHandle cnn = Cnn_create((Dim2D){3u, 15u}, (Dim2D){3u, 5u}, (Dim2D){3u, 1u}, SAME, VALID); + + Conv2DLayer_setWeightsFloat(cnn->layer1, ke1); + Conv2DLayer_setWeightsFloat(cnn->layer2, ke2); + Conv2DLayer_setWeightsFxp(cnn->layer1, ke1_fxp); + Conv2DLayer_setWeightsFxp(cnn->layer2, ke2_fxp); + + float* result = (float*)calloc(1*15, sizeof(float)); + fxp32* result_fxp = (fxp32*)calloc(1*15, sizeof(fxp32)); + + Cnn_forwardFloat(cnn, inp, result); + Cnn_forwardFxp(cnn, inp_fxp, result_fxp); + + compareVectorsFloat(result, res, 15, COMP_PREC*10); + compareVectorsFxp(result_fxp, res_fxp, 15, COMP_PREC_I32*10); + + free(result); + free(result_fxp); + Cnn_destroy(cnn); +} + int main() { PRINTF("\033[1;93m====== Test CNN =========\n"); PRINTF("\033[0m====== Test Same ========\n"); @@ -127,5 +149,9 @@ int main() { test_valid_layer3(); PRINTF("\033[1;32m====== Test 3 passed ====\n"); PRINTF("\033[0m====== Test Valid end ===\n\n"); + PRINTF("\033[0m====== Test CNN =========\n"); + test_cnn(); + PRINTF("\033[1;32m====== Test CNN passed ==\n"); + PRINTF("\033[0m====== Test CNN end =====\n\n"); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/sw/applications/l_cnn/test_cnnWeights.h b/sw/applications/l_cnn/test_cnnWeights.h index 3768c1d04..f3d6efb8e 100644 --- a/sw/applications/l_cnn/test_cnnWeights.h +++ b/sw/applications/l_cnn/test_cnnWeights.h @@ -88,5 +88,9 @@ static float inp[] = { 0.10687026183436554, 0.6433775638335413, 1.57193717742119 static float ke1[] = { 1.2019056830022787, -1.9184344862073974, 0.2904745952311969, -0.3544655368824938, 1.9405471907832181, 1.2056061216442804, -1.7841515914194126, -1.2380889061779894, -0.19032461673209822, 0.8117683084719127, -0.6718074018572904, -0.5600672193147695, 1.685882263836708, 1.8145220229093884, -0.36925708529142565,}; static float ke2[] = { 1.594284619538893, -0.6789869921619749, -1.6690457236800493,}; static float res[] = { -5.646923391743088, -12.486867124009787, 1.116749990770437, 3.790885823459984, -7.655608446879853, -5.758159525901699, -16.320022556866068, 10.253133245571195, -8.825685721563698, -3.9221345076618483, -11.844371518436887, 16.970633341131517, 5.631829596306385, 4.730155342366402, 6.236770944902686,}; +static int32_t inp_fxp[] = { 896492, 5397042, 13186365, 15608097, 9057438, 8693924, 7048076, 6762991, 8970153, 15916527, -4237437, -13990371, -8736217, -9345488, -4576831, 10412418, -14760723, -1686326, 10508259, -7910847, -14649832, -8653431, -13922725, 10327297, -11064571, -10222538, 10557634, 10411455, 2998889, 13916175, -14769935, 15602698, 2381533, -6626392, 10928874, 5349156, 16324280, -13170466, 2715175, -911735, 5109359, -8661773, -15732664, 1484191, -4539573,}; +static int32_t ke1_fxp[] = { 10082316, -16092995, 2436677, -2973472, 16278490, 10113357, -14966548, -10385843, -1596558, 6809606, -5635529, -4698184, 14142205, 15221314, -3097553,}; +static int32_t ke2_fxp[] = { 13373829, -5695755, -14000970,}; +static int32_t res_fxp[] = { -47369828, -104747432, 9367978, 31800256, -64219900, -48302944, -136902272, 86009512, -74035216, -32901248, -99357792, 142359984, 47243212, 39679420, 52317828,}; #endif // TEST_CNNWEIGHTS_H \ No newline at end of file From bdc1bf8eb09f89e8cd36b0d316ead8a11203890c Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Mon, 22 Apr 2024 21:25:22 +0200 Subject: [PATCH 06/27] create target test case --- sw/applications/l_cnn/main.c | 59 +++++++++++++++++++++++------ sw/applications/l_cnn/testdata_s2.h | 30 +++++++++++++++ 2 files changed, 77 insertions(+), 12 deletions(-) create mode 100644 sw/applications/l_cnn/testdata_s2.h diff --git a/sw/applications/l_cnn/main.c b/sw/applications/l_cnn/main.c index 44f179b41..baaf18e77 100644 --- a/sw/applications/l_cnn/main.c +++ b/sw/applications/l_cnn/main.c @@ -5,10 +5,8 @@ #include #include -#include "test_cnnWeights.h" -// #include "testdata_s0.h" -// #include "testdata_s1.h" -// #include "testdata_s2.h" +// #include "test_cnnWeights.h" +#include "testdata_s2.h" #define COMP_PREC 0.01f #define COMP_PREC_I32 512 @@ -24,7 +22,7 @@ void compareVectorsFxp(fxp32* a, fxp32* b, int size, float prec) { assert_closei32(a[i], b[i], prec, i); } } - +/* void test_same_layer1() { fxp32* result_fxp = (fxp32*)calloc(xin1*yin1, sizeof(fxp32)); float* result = (float*)calloc(xin1*yin1, sizeof(float)); @@ -130,28 +128,65 @@ void test_cnn() { free(result_fxp); Cnn_destroy(cnn); } +*/ +void compare_tf_conv_3() { + CnnHandle cnn = Cnn_create((Dim2D){3u, 256u}, (Dim2D){3u, 21u}, (Dim2D){3u, 1u}, SAME, VALID); + + Conv2DLayer_setWeightsFloat(cnn->layer1, weights1_2); + Conv2DLayer_setWeightsFloat(cnn->layer2, weights2_2); + + float* result = (float*)calloc(1*256, sizeof(float)); + Cnn_forwardFloat(cnn, xin_2, result); + + compareVectorsFloat(result, xout_2, 256, COMP_PREC); + + Cnn_predictFloat(cnn, xin_2, ppg_2, result); + + compareVectorsFloat(result, ppgf_2, 256, COMP_PREC); + + free(result); + + // Cnn_freezeModel(cnn); + + // fxp32* result_fxp = (fxp32*)calloc(1*256, sizeof(fxp32)); + // Cnn_forwardFxp(cnn, xin_2_fxp, result_fxp); + + // compareVectorsFxp(result_fxp, xout_2_fxp, 256, COMP_PREC_I32); + + // Cnn_predictFxp(cnn, xin_2_fxp, ppg_2_fxp, result_fxp); + + // compareVectorsFxp(result_fxp, ppgf_2_fxp, 256, COMP_PREC_I32); + + // free(result_fxp); + // Cnn_destroy(cnn); +} int main() { PRINTF("\033[1;93m====== Test CNN =========\n"); PRINTF("\033[0m====== Test Same ========\n"); - test_same_layer1(); + // test_same_layer1(); PRINTF("\033[1;32m====== Test 1 passed ====\n"); - test_same_layer2(); + // test_same_layer2(); PRINTF("\033[1;32m====== Test 2 passed ====\n"); - test_same_layer3(); + // test_same_layer3(); PRINTF("\033[1;32m====== Test 3 passed ====\n"); PRINTF("\033[0m====== Test Same end ====\n\n"); PRINTF("\033[0m====== Test Valid =======\n"); - test_valid_layer1(); + // test_valid_layer1(); PRINTF("\033[1;32m====== Test 1 passed ====\n"); - test_valid_layer2(); + // test_valid_layer2(); PRINTF("\033[1;32m====== Test 2 passed ====\n"); - test_valid_layer3(); + // test_valid_layer3(); PRINTF("\033[1;32m====== Test 3 passed ====\n"); PRINTF("\033[0m====== Test Valid end ===\n\n"); PRINTF("\033[0m====== Test CNN =========\n"); - test_cnn(); + // test_cnn(); PRINTF("\033[1;32m====== Test CNN passed ==\n"); PRINTF("\033[0m====== Test CNN end =====\n\n"); + PRINTF("\033[0m====== Comp TF ==========\n"); + PRINTF("\033[0m====== Sample 3 =========\n"); + compare_tf_conv_3(); + PRINTF("\033[1;32m====== Comp TF passed ===\n"); + PRINTF("\033[0m====== Comp TF end ======\n"); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/sw/applications/l_cnn/testdata_s2.h b/sw/applications/l_cnn/testdata_s2.h new file mode 100644 index 000000000..9ea486952 --- /dev/null +++ b/sw/applications/l_cnn/testdata_s2.h @@ -0,0 +1,30 @@ +#ifndef TESTDATA_S2_H +#define TESTDATA_S2_H + +#pragma GCC diagnostic ignored "-Wunused-variable" + +static float weights1_2[] = { -0.12116f, 0.15478f, 0.15130f, -0.15055f, -0.10036f, 0.14400f, -0.00204f, -0.03408f, -0.18936f, 0.03390f, 0.12200f, -0.13891f, 0.05332f, -0.21001f, -0.18876f, -0.06263f, -0.00274f, -0.14772f, 0.06332f, 0.05349f, -0.05071f, 0.13313f, -0.07974f, -0.06264f, -0.03501f, -0.21790f, 0.05925f, -0.14536f, -0.21762f, 0.08203f, -0.08571f, 0.16886f, 0.12439f, 0.12544f, 0.04466f, -0.16849f, 0.04673f, 0.03604f, -0.04811f, 0.21726f, 0.17702f, 0.16138f, 0.08862f, 0.02971f, -0.05572f, -0.07597f, -0.02538f, 0.06561f, -0.08724f, -0.20014f, -0.09443f, -0.13890f, -0.00194f, 0.03216f, 0.14760f, 0.09256f, 0.13031f, -0.09160f, 0.03522f, -0.03609f, -0.08631f, -0.02040f, -0.19688f,}; + +static float weights2_2[] = { -0.71606f, 0.06657f, -0.07737f,}; + +static float xin_2[] = { -1.30298f, -0.96491f, -0.51415f, -0.17608f, 0.27468f, 0.50006f, 0.04930f, -0.73953f, -0.96491f, -1.97912f, -2.65526f, -2.31719f, -2.54257f, -2.88064f, -3.21871f, -2.76795f, -2.31719f, -2.20450f, 1.17620f, 1.51427f, 0.95082f, 0.38737f, -0.17608f, 0.04930f, 0.61275f, 0.61275f, 0.50006f, 0.50006f, 0.50006f, -0.17608f, -0.40146f, -0.73953f, -0.73953f, -0.73953f, -0.85222f, -0.73953f, -0.85222f, -0.73953f, -0.17608f, 0.27468f, 0.50006f, 0.61275f, 0.95082f, 1.06351f, 1.06351f, 0.83813f, 0.50006f, 0.16199f, 0.04930f, 0.04930f, 0.50006f, 0.50006f, 0.72544f, 0.83813f, 0.83813f, 0.61275f, 0.04930f, -0.06339f, 0.04930f, 0.61275f, 0.83813f, 1.06351f, 0.95082f, 0.72544f, 0.61275f, 0.38737f, 0.27468f, 0.50006f, 0.50006f, 0.38737f, 0.27468f, 0.27468f, 0.72544f, 0.61275f, 0.50006f, 0.50006f, 0.38737f, 0.04930f, 0.04930f, 0.38737f, 0.38737f, -0.06339f, -0.17608f, -0.17608f, -0.06339f, -0.17608f, -0.51415f, -0.73953f, -0.96491f, -0.73953f, -0.62684f, -0.62684f, 0.16199f, 0.83813f, 1.06351f, 1.28889f, 1.17620f, 0.61275f, 0.38737f, 0.27468f, 0.16199f, 0.38737f, 0.50006f, 0.50006f, 0.38737f, 0.27468f, 0.38737f, 0.50006f, 0.61275f, 0.50006f, 0.50006f, 0.61275f, 0.61275f, 0.16199f, -0.06339f, -0.06339f, -0.06339f, 0.04930f, 0.04930f, -0.17608f, -0.40146f, -0.62684f, -0.73953f, -0.62684f, -0.17608f, 0.16199f, 0.61275f, 0.61275f, 0.72544f, 0.50006f, 0.50006f, 0.27468f, 0.27468f, 0.27468f, 0.38737f, 0.50006f, 0.50006f, 0.38737f, 0.38737f, 0.38737f, 0.27468f, 0.16199f, 0.04930f, 0.27468f, 0.50006f, 0.61275f, 0.72544f, 0.61275f, 0.50006f, -0.06339f, -0.28877f, -0.17608f, -0.06339f, 0.04930f, -0.06339f, -0.17608f, -0.40146f, -0.62684f, -0.62684f, -0.40146f, -0.06339f, 0.27468f, 0.72544f, 0.95082f, 1.06351f, 0.72544f, 0.50006f, 0.38737f, 0.27468f, 0.27468f, 0.27468f, 0.38737f, 0.38737f, 0.27468f, -0.06339f, 0.04930f, 0.27468f, 0.38737f, 0.61275f, 0.61275f, 0.27468f, 0.38737f, 0.50006f, 0.61275f, 0.38737f, -0.06339f, -0.17608f, 0.04930f, 0.27468f, 0.38737f, 0.16199f, -0.06339f, -0.28877f, -0.28877f, -0.40146f, -0.40146f, -0.17608f, 0.16199f, 0.38737f, 0.95082f, 1.17620f, 1.17620f, 0.95082f, 0.27468f, 0.04930f, -0.06339f, 0.16199f, 0.27468f, 0.04930f, -0.28877f, -0.28877f, -0.28877f, -0.17608f, -0.28877f, -0.73953f, -0.85222f, -0.85222f, -0.51415f, -0.17608f, 0.38737f, 0.61275f, 0.72544f, 0.83813f, 0.95082f, 1.06351f, 1.06351f, 0.95082f, 0.95082f, 0.50006f, 0.38737f, 0.04930f, 0.16199f, 0.27468f, -0.40146f, -1.19029f, -2.09181f, -1.86643f, -0.28877f, -2.65526f, -9.75473f, -2.65526f, -1.07760f, -0.06339f, 0.16199f, 0.61275f, 0.38737f, 0.04930f, 0.27468f, 0.04930f, -0.51415f, -0.62684f, -0.51415f, -0.51415f, -0.85222f, -1.19029f, -1.19029f, 0.33916f, 0.92881f, 0.92881f, 0.63399f, -0.25048f, -0.76643f, -1.65090f, -2.09313f, -2.09313f, -1.79831f, -1.65090f, -1.94572f, -2.01942f, -1.06125f, -0.39790f, 1.00251f, 3.72963f, 3.36110f, 1.07622f, 1.07622f, 0.85510f, 0.63399f, -0.17678f, -0.61901f, -1.35607f, -1.65090f, -1.72460f, -1.50348f, -1.28237f, -0.54531f, -0.25048f, 0.48657f, 1.14993f, 1.37104f, 1.66587f, 2.25551f, 1.73957f, 1.07622f, 0.70769f, 0.41287f, 0.19175f, -0.17678f, -0.61901f, -1.57719f, -1.79831f, -1.65090f, -1.28237f, -0.54531f, -0.17678f, 0.56028f, 1.07622f, 1.37104f, 1.59216f, 1.51846f, 1.73957f, 1.44475f, 0.48657f, 0.26546f, 0.19175f, 0.85510f, 0.56028f, -0.25048f, -0.61901f, -0.76643f, -0.91384f, -1.20866f, -0.98754f, -0.61901f, -0.02937f, 0.26546f, 1.07622f, 1.22363f, 1.81328f, 1.44475f, 0.04434f, -0.10307f, 0.33916f, -0.02937f, -0.25048f, -0.61901f, -0.54531f, -0.47160f, -0.32419f, -0.25048f, -0.02937f, 0.48657f, 0.85510f, 1.00251f, 1.37104f, 0.04434f, -0.69272f, -0.25048f, 0.56028f, 0.26546f, 0.04434f, 0.33916f, 0.26546f, -0.32419f, -0.32419f, -0.25048f, -0.17678f, 0.41287f, 0.78140f, 0.92881f, 1.44475f, 1.14993f, -0.10307f, -0.32419f, 0.19175f, 0.26546f, -0.02937f, -0.54531f, -0.76643f, -0.76643f, -0.76643f, -0.69272f, -0.54531f, 0.04434f, 0.19175f, 0.63399f, 0.70769f, 1.07622f, 0.85510f, 0.19175f, 0.11804f, 0.33916f, 0.63399f, 0.56028f, 0.11804f, -0.17678f, -0.47160f, -0.47160f, -0.61901f, -0.69272f, -0.47160f, -0.17678f, 0.04434f, 1.00251f, 1.22363f, 1.51846f, 1.51846f, 1.07622f, -0.32419f, -0.47160f, -0.02937f, -0.32419f, -0.54531f, -0.98754f, -1.13495f, -0.98754f, -0.98754f, -0.69272f, -0.39790f, 0.26546f, 0.70769f, 0.70769f, 0.92881f, 1.59216f, 0.41287f, 0.11804f, 0.33916f, 0.41287f, 0.56028f, 0.04434f, -0.17678f, -0.39790f, -0.61901f, -1.06125f, -1.06125f, -0.76643f, -0.54531f, 0.11804f, 0.85510f, 1.07622f, 1.51846f, 1.96069f, 1.51846f, 0.63399f, -0.54531f, 0.11804f, 0.11804f, -0.39790f, -0.61901f, -0.91384f, -0.98754f, -1.06125f, -0.98754f, -0.47160f, -0.17678f, 0.04434f, 0.56028f, 0.70769f, 0.92881f, 1.29734f, 1.73957f, 1.00251f, -0.69272f, -0.17678f, 0.11804f, 0.33916f, 0.04434f, -0.54531f, -0.84013f, -1.06125f, -1.28237f, -1.79831f, -1.57719f, -1.28237f, -0.54531f, 0.04434f, 0.63399f, 0.78140f, 1.44475f, 1.66587f, 2.18181f, 2.03440f, 1.44475f, 0.33916f, 0.48657f, 0.63399f, 0.41287f, -0.32419f, -0.47160f, -0.39790f, -0.84013f, -1.06125f, -1.35607f, -1.42978f, -1.50348f, -1.57719f, -1.42978f, -1.28237f, -1.20866f, -0.84013f, -0.69272f, -0.69272f, -0.69272f, -0.17678f, 0.48657f, 1.51846f, 1.00251f, 0.56028f, -0.02937f, 0.33916f, 0.33916f, 0.33916f, -0.10307f, -0.54531f, -1.20866f, -1.50348f, -1.42978f, -1.42978f, -1.35607f, -1.06125f, -0.84013f, 0.04434f, 1.10447f, 1.23962f, 0.69900f, 0.42869f, -0.11193f, -0.24708f, -0.65255f, -0.78770f, -1.05801f, -1.19317f, -0.92286f, -0.24708f, -0.24708f, 0.56385f, 0.96931f, 0.96931f, 0.29354f, 1.64509f, 2.18571f, 0.69900f, -0.51739f, -0.92286f, -1.32832f, -1.19317f, -0.65255f, -0.24708f, -0.24708f, -0.38224f, -0.65255f, -1.32832f, -1.32832f, -1.19317f, -1.32832f, -1.59863f, -2.13925f, -2.27441f, -2.13925f, -1.32832f, -0.92286f, -1.32832f, -0.78770f, -0.38224f, -0.38224f, -0.24708f, -0.11193f, 0.02323f, 0.15838f, 0.83416f, 1.23962f, 1.91540f, 2.45602f, 2.86148f, 3.40211f, 3.40211f, 2.59117f, 2.45602f, 1.64509f, 1.23962f, 0.42869f, 0.02323f, -0.11193f, -0.51739f, -0.38224f, -0.24708f, -0.51739f, -0.92286f, -0.92286f, -0.65255f, -0.11193f, 0.15838f, 0.69900f, 0.69900f, -0.11193f, -0.11193f, 0.69900f, 0.69900f, 0.15838f, 0.29354f, 0.15838f, 0.15838f, 0.29354f, 0.42869f, 0.29354f, 0.15838f, 0.02323f, 0.15838f, 0.29354f, 0.15838f, 0.56385f, 0.69900f, 0.56385f, 0.56385f, 0.42869f, 0.02323f, -0.11193f, 0.15838f, 0.15838f, -0.24708f, -0.51739f, -0.92286f, -1.05801f, -0.78770f, -0.65255f, -0.51739f, -0.51739f, -0.38224f, 0.29354f, 0.29354f, 0.42869f, 0.29354f, 0.15838f, 0.02323f, 0.15838f, 0.15838f, 0.02323f, -0.51739f, -0.51739f, -0.11193f, 0.02323f, 0.15838f, 0.15838f, 0.15838f, 1.10447f, 0.83416f, 0.42869f, 0.29354f, 0.02323f, 0.02323f, -0.24708f, -0.38224f, -0.38224f, -0.65255f, -0.78770f, -1.19317f, -0.92286f, -0.65255f, -0.51739f, -0.24708f, -0.24708f, -0.24708f, -0.11193f, 0.29354f, 1.10447f, 0.83416f, 0.69900f, 0.56385f, 0.42869f, 0.29354f, 0.29354f, 0.02323f, 0.02323f, -0.24708f, -0.11193f, 0.29354f, 0.42869f, 0.56385f, 0.42869f, 0.69900f, 1.23962f, 0.96931f, 0.69900f, 0.56385f, 0.29354f, -0.24708f, -0.38224f, -0.38224f, -0.38224f, -0.92286f, -1.05801f, -1.32832f, -1.32832f, -0.92286f, -0.51739f, -0.38224f, -0.11193f, 0.02323f, 0.02323f, 0.29354f, 0.83416f, 0.42869f, 0.29354f, 0.15838f, 0.15838f, 0.15838f, 0.15838f, 0.15838f, 0.02323f, 0.02323f, 0.29354f, 0.42869f, 0.56385f, 0.56385f, 0.56385f, 0.56385f, 1.10447f, 1.37478f, 0.56385f, 0.42869f, 0.42869f, 0.02323f, -0.24708f, -0.51739f, -0.65255f, -0.65255f, -0.92286f, -1.46348f, -1.46348f, -1.32832f, -0.92286f, -0.78770f, -0.78770f, -0.65255f, -0.24708f, -0.11193f, 0.02323f, -0.51739f, -0.51739f, 0.29354f, 0.29354f, 0.42869f, 0.29354f, -0.24708f, -0.24708f, -0.24708f, -0.24708f, -0.38224f, -0.38224f, -0.51739f, -0.38224f, -0.24708f, 0.15838f, 0.69900f, 0.96931f, 1.10447f, 0.83416f, 0.83416f, 0.83416f, 1.23962f, 2.18571f, 7.18645f, 1.78024f, 0.29354f, -1.46348f, -2.00410f, -1.59863f, -1.59863f, -1.05801f, -0.92286f, -1.32832f, -1.19317f, -1.05801f, -1.05801f, -1.32832f, -1.19317f, -0.92286f, -0.78770f,}; + +static float xout_2[] = { 0.39249f, 0.48451f, 0.73383f, 0.73852f, 1.10356f, 1.51326f, 1.76721f, 1.83251f, 1.31946f, 0.81513f, 0.02872f, 0.09573f, -0.31142f, -1.19899f, -1.17897f, -1.59128f, -1.68418f, -1.58064f, -1.61839f, -0.72027f, -0.35841f, 0.28717f, 1.03874f, 1.20660f, 1.62155f, 1.03099f, 0.66879f, 0.72900f, 0.17669f, -0.18862f, -0.35105f, -0.40824f, -0.50307f, -0.60400f, -0.51233f, -0.42268f, -0.19640f, -0.11939f, -0.08316f, 0.09401f, -0.03073f, 0.05474f, 0.00948f, -0.14916f, -0.29699f, -0.46144f, -0.41988f, -0.36489f, -0.28950f, -0.25506f, -0.41477f, -0.55599f, -0.60255f, -0.37059f, -0.06804f, 0.10718f, 0.32548f, 0.28716f, 0.30881f, 0.11494f, 0.19378f, 0.36549f, 0.49032f, 0.39248f, -0.11330f, -0.16019f, -0.25388f, -0.31549f, -0.40915f, -0.69023f, -0.56602f, -0.27259f, -0.03218f, 0.30087f, 0.40573f, 0.69364f, 0.80954f, 0.71345f, 0.81998f, 0.72108f, 0.49698f, 0.16719f, -0.10408f, -0.19166f, -0.49458f, -0.48501f, -0.35172f, -0.08854f, 0.20221f, 0.22341f, 0.01684f, -0.12514f, -0.43663f, -0.55029f, -0.36411f, -0.09131f, -0.07201f, -0.16178f, -0.05504f, 0.10212f, -0.12362f, -0.37001f, -0.33644f, -0.23595f, -0.00672f, 0.11853f, 0.06942f, 0.20967f, 0.21001f, 0.23882f, 0.36883f, 0.66184f, 0.71750f, 0.42784f, 0.21827f, 0.07058f, -0.03356f, -0.28731f, -0.41314f, -0.34149f, -0.14244f, 0.07088f, 0.01262f, -0.05669f, -0.19078f, -0.33944f, -0.39268f, -0.18032f, 0.07529f, 0.33591f, 0.39141f, 0.32471f, 0.16026f, -0.03295f, -0.25818f, -0.64155f, -0.79985f, -0.93584f, -0.66718f, -0.20500f, 0.17531f, 0.48883f, 0.47296f, 0.40285f, 0.35651f, 0.18949f, 0.29055f, 0.61037f, 0.46581f, 0.27488f, -0.01284f, -0.15280f, -0.17538f, -0.58741f, -0.57524f, -0.47580f, -0.21400f, -0.06589f, -0.10277f, -0.08980f, -0.12787f, -0.20503f, -0.26596f, -0.03148f, 0.32823f, 0.54433f, 0.48218f, 0.34929f, 0.02126f, -0.20263f, -0.49552f, -0.67534f, -0.62950f, -0.68360f, -0.35470f, -0.08379f, 0.14763f, 0.26742f, 0.29058f, 0.28133f, 0.20008f, 0.14494f, 0.41903f, 0.54173f, 0.52405f, 0.29799f, -0.07209f, -0.14607f, -0.46453f, -0.58305f, -0.57501f, -0.69478f, -0.46508f, -0.23234f, 0.07366f, 0.14385f, 0.05992f, -0.20362f, -0.39168f, -0.48250f, -0.07809f, 0.42712f, 0.71610f, 0.81517f, 0.82542f, 0.97280f, 0.77453f, 0.36336f, 0.04949f, -0.26119f, -0.45929f, -0.60788f, -0.69065f, -0.31942f, -0.13976f, 0.09962f, 0.01459f, -0.23469f, -0.43849f, -0.63786f, -0.63111f, -0.59381f, -0.51142f, -0.44169f, -0.36079f, 0.11435f, 0.52288f, 0.56995f, 0.72460f, 1.52536f, 1.96464f, 1.74206f, 0.39785f, 0.85132f, 0.47575f, -0.53417f, 0.15703f, 0.37793f, 0.68815f, 0.26998f, -0.36431f, -0.00991f, -1.89429f, -1.31357f, -0.37315f, -0.73792f, 0.22693f, -0.13869f, -0.04304f, 0.91745f, 0.28913f, 0.00824f, -0.09014f, -0.22067f, -0.39314f, -0.55479f,}; + +static float ppg_2[] = { 0.46327f, 0.32949f, 0.07201f, -0.29688f, -0.62103f, -0.64516f, -0.27495f, 0.24132f, 0.54530f, 0.49617f, 0.18035f, -0.26311f, -0.72587f, -1.14389f, -1.46848f, -1.61454f, -1.46102f, -1.00265f, -0.46707f, -0.07844f, 0.23123f, 0.65189f, 1.15061f, 1.54846f, 1.77962f, 1.84234f, 1.68487f, 1.28177f, 0.72119f, 0.13912f, -0.37057f, -0.75262f, -0.92106f, -0.80702f, -0.53024f, -0.32013f, -0.19117f, 0.04964f, 0.44617f, 0.83831f, 1.10851f, 1.21904f, 1.13570f, 0.90103f, 0.59486f, 0.20316f, -0.28679f, -0.74780f, -1.03247f, -1.10836f, -1.04695f, -0.96712f, -0.96142f, -0.95659f, -0.78377f, -0.44865f, -0.14380f, 0.03078f, 0.13561f, 0.21939f, 0.30010f, 0.43695f, 0.63083f, 0.75716f, 0.72338f, 0.56635f, 0.33563f, 0.00841f, -0.39163f, -0.67148f, -0.58419f, -0.11046f, 0.53038f, 1.13658f, 1.57872f, 1.68092f, 1.36379f, 0.82866f, 0.33212f, -0.07932f, -0.46620f, -0.83991f, -1.20091f, -1.53734f, -1.77377f, -1.83781f, -1.73824f, -1.49787f, -1.13511f, -0.75526f, -0.49339f, -0.37189f, -0.32057f, -0.27758f, -0.18547f, -0.01659f, 0.15754f, 0.23167f, 0.19878f, 0.11763f, 0.00490f, -0.15038f, -0.30127f, -0.28197f, 0.12903f, 0.97034f, 1.93402f, 2.56521f, 2.62969f, 2.21650f, 1.58793f, 0.99797f, 0.55670f, 0.18079f, -0.25433f, -0.73201f, -1.14652f, -1.47374f, -1.73648f, -1.85492f, -1.69262f, -1.25530f, -0.75262f, -0.41400f, -0.28372f, -0.25390f, -0.23898f, -0.23591f, -0.24337f, -0.23372f, -0.20477f, -0.17889f, -0.16924f, -0.19249f, -0.29381f, -0.44251f, -0.44909f, -0.11573f, 0.54179f, 1.30633f, 1.97086f, 2.42310f, 2.54723f, 2.23185f, 1.54012f, 0.72251f, -0.00212f, -0.55436f, -0.94782f, -1.27811f, -1.66060f, -2.05800f, -2.24311f, -2.00405f, -1.36847f, -0.59866f, 0.01893f, 0.34660f, 0.38344f, 0.21808f, 0.01586f, -0.09994f, -0.15038f, -0.19249f, -0.21223f, -0.21924f, -0.30873f, -0.49646f, -0.61182f, -0.48769f, -0.15257f, 0.26633f, 0.70189f, 1.12386f, 1.46906f, 1.69321f, 1.80988f, 1.82217f, 1.65329f, 1.23659f, 0.63917f, -0.00124f, -0.61752f, -1.16319f, -1.48558f, -1.43953f, -1.09388f, -0.68244f, -0.39163f, -0.23328f, -0.11704f, 0.00753f, 0.09087f, 0.11280f, 0.12552f, 0.13561f, 0.09613f, -0.01616f, -0.26618f, -0.68815f, -0.99387f, -0.72192f, 0.13210f, 1.01376f, 1.45108f, 1.42871f, 1.11026f, 0.58521f, -0.03677f, -0.55261f, -0.79868f, -0.76710f, -0.55568f, -0.30302f, -0.15871f, -0.13897f, -0.00300f, 0.50143f, 1.26905f, 1.93840f, 2.29326f, 2.33712f, 2.12044f, 1.71207f, 1.24931f, 0.88787f, 0.68522f, 0.52337f, 0.20053f, -0.38417f, -1.12722f, -1.75578f, -2.00668f, -1.83562f, -1.42110f, -0.94255f, -0.41356f, 0.19527f, 0.57732f, 0.19132f, -0.83114f, -1.50488f, -1.29566f, -0.74561f, -0.39163f, -0.17714f, -0.09511f, -0.24337f, -0.29601f, 0.11807f, 0.73567f, 1.00543f, 0.84269f, 0.53345f, 0.17465f, -0.34645f,}; + +static float ppgf_2[] = { 0.07078f, -0.15502f, -0.66182f, -1.03541f, -1.72459f, -2.15842f, -2.04216f, -1.59119f, -0.77417f, -0.31896f, 0.15163f, -0.35884f, -0.41444f, 0.05510f, -0.28951f, -0.02326f, 0.22316f, 0.57799f, 1.15132f, 0.64183f, 0.58965f, 0.36472f, 0.11188f, 0.34186f, 0.15807f, 0.81135f, 1.01608f, 0.55277f, 0.54450f, 0.32774f, -0.01952f, -0.34439f, -0.41799f, -0.20301f, -0.01791f, 0.10255f, 0.00523f, 0.16903f, 0.52932f, 0.74430f, 1.13924f, 1.16430f, 1.12622f, 1.05019f, 0.89185f, 0.66460f, 0.13308f, -0.38291f, -0.74298f, -0.85330f, -0.63218f, -0.41113f, -0.35886f, -0.58600f, -0.71572f, -0.55583f, -0.46928f, -0.25638f, -0.17320f, 0.10445f, 0.10632f, 0.07146f, 0.14051f, 0.36468f, 0.83668f, 0.72655f, 0.58951f, 0.32389f, 0.01752f, 0.01875f, -0.01817f, 0.16213f, 0.56257f, 0.83571f, 1.17299f, 0.98729f, 0.55425f, 0.11521f, -0.48786f, -0.80040f, -0.96317f, -1.00710f, -1.09683f, -1.34569f, -1.27919f, -1.35280f, -1.38652f, -1.40933f, -1.33732f, -0.97867f, -0.51023f, -0.24675f, 0.11606f, 0.27271f, 0.17864f, 0.07471f, 0.22955f, 0.39345f, 0.25381f, 0.01551f, 0.12852f, 0.21963f, 0.03517f, -0.04602f, 0.13576f, 0.85180f, 1.86460f, 2.35555f, 2.41968f, 1.97768f, 1.21910f, 0.33612f, -0.16080f, -0.24705f, -0.47261f, -0.80259f, -1.11296f, -1.18643f, -1.32334f, -1.51343f, -1.55018f, -1.32618f, -0.76524f, -0.35730f, -0.09294f, 0.08555f, 0.15369f, -0.05559f, -0.31866f, -0.56963f, -0.59618f, -0.50360f, -0.32950f, -0.15953f, -0.03563f, 0.19904f, 0.35076f, 0.82012f, 1.20897f, 1.51133f, 1.79555f, 1.93426f, 2.07427f, 1.82900f, 1.18362f, 0.53301f, -0.29267f, -1.16473f, -1.41363f, -1.55299f, -1.64776f, -1.90520f, -2.06773f, -1.41664f, -0.79323f, -0.12286f, 0.23294f, 0.41249f, 0.48621f, 0.30788f, 0.14374f, 0.10509f, 0.11559f, -0.16101f, -0.54046f, -0.76358f, -0.79090f, -0.84575f, -0.63308f, -0.28506f, 0.34295f, 0.94167f, 1.33139f, 1.80746f, 1.82377f, 1.77700f, 1.66225f, 1.55474f, 1.36271f, 0.95526f, 0.43909f, -0.14618f, -1.03655f, -1.70492f, -2.00963f, -1.73752f, -1.02179f, -0.53637f, 0.07291f, 0.34977f, 0.45797f, 0.70231f, 0.55596f, 0.34514f, 0.05186f, -0.00824f, 0.03621f, 0.18746f, 0.12550f, -0.20564f, -0.91578f, -1.14904f, -0.58400f, 0.19859f, 0.62565f, 0.45591f, 0.33573f, 0.22186f, -0.08626f, -0.29142f, -0.33939f, -0.15922f, 0.13497f, 0.01640f, -0.01895f, -0.23859f, -0.01758f, 0.73613f, 1.70753f, 2.57627f, 2.92437f, 2.93094f, 2.63186f, 2.15376f, 1.61009f, 0.77352f, 0.16235f, -0.04659f, -0.52407f, -1.90953f, -3.09186f, -3.49785f, -2.40454f, -2.68693f, -1.89685f, -0.40838f, -0.57059f, -0.18267f, -0.11083f, -0.07867f, -0.46683f, -1.49498f, 0.59864f, 0.56797f, -0.01848f, 0.56078f, -0.32204f, -0.10468f, -0.25296f, -0.79938f, 0.44654f, 0.99719f, 0.93283f, 0.75412f, 0.56779f, 0.20835f,}; +/* +static int32_t weights1_2_fxp[] = { -1016363, 1298388, 1269196, -1262904, -841880, 1207959, -17112, -285883, -1588466, 284373, 1023410, -1165261, 447280, -1761691, -1583433, -525378, -22984, -1239165, 531166, 448706, -425386, 1116775, -668907, -525462, -293685, -1827877, 497025, -1219368, -1825528, 688117, -718987, 1416500, 1043458, 1052267, 374635, -1413396, 391999, 302325, -403575, 1822509, 1484951, 1353753, 743398, 249225, -467413, -637282, -212902, 550376, -731822, -1678896, -792136, -1165177, -16273, 269777, 1238158, 776449, 1093119, -768396, 295446, -302744, -724020, -171127, -1651549,}; + +static int32_t weights2_2_fxp[] = { -6006746, 558429, -649026,}; + +static int32_t xin_2_fxp[] = { -10930188, -8094251, -4313003, -1477066, 2304182, 4194807, 413558, -6203627, -8094251, -16602062, -22273936, -19437998, -21328624, -24164560, -27000496, -23219248, -19437998, -18492686, 9866681, 12702617, 7976056, 3249495, -1477066, 413558, 5140119, 5140119, 4194807, 4194807, 4194807, -1477066, -3367690, -6203627, -6203627, -6203627, -7148939, -6203627, -7148939, -6203627, -1477066, 2304182, 4194807, 5140119, 7976056, 8921368, 8921368, 7030744, 4194807, 1358870, 413558, 413558, 4194807, 4194807, 6085432, 7030744, 7030744, 5140119, 413558, -531753, 413558, 5140119, 7030744, 8921368, 7976056, 6085432, 5140119, 3249495, 2304182, 4194807, 4194807, 3249495, 2304182, 2304182, 6085432, 5140119, 4194807, 4194807, 3249495, 413558, 413558, 3249495, 3249495, -531753, -1477066, -1477066, -531753, -1477066, -4313003, -6203627, -8094251, -6203627, -5258315, -5258315, 1358870, 7030744, 8921368, 10811993, 9866681, 5140119, 3249495, 2304182, 1358870, 3249495, 4194807, 4194807, 3249495, 2304182, 3249495, 4194807, 5140119, 4194807, 4194807, 5140119, 5140119, 1358870, -531753, -531753, -531753, 413558, 413558, -1477066, -3367690, -5258315, -6203627, -5258315, -1477066, 1358870, 5140119, 5140119, 6085432, 4194807, 4194807, 2304182, 2304182, 2304182, 3249495, 4194807, 4194807, 3249495, 3249495, 3249495, 2304182, 1358870, 413558, 2304182, 4194807, 5140119, 6085432, 5140119, 4194807, -531753, -2422378, -1477066, -531753, 413558, -531753, -1477066, -3367690, -5258315, -5258315, -3367690, -531753, 2304182, 6085432, 7976056, 8921368, 6085432, 4194807, 3249495, 2304182, 2304182, 2304182, 3249495, 3249495, 2304182, -531753, 413558, 2304182, 3249495, 5140119, 5140119, 2304182, 3249495, 4194807, 5140119, 3249495, -531753, -1477066, 413558, 2304182, 3249495, 1358870, -531753, -2422378, -2422378, -3367690, -3367690, -1477066, 1358870, 3249495, 7976056, 9866681, 9866681, 7976056, 2304182, 413558, -531753, 1358870, 2304182, 413558, -2422378, -2422378, -2422378, -1477066, -2422378, -6203627, -7148939, -7148939, -4313003, -1477066, 3249495, 5140119, 6085432, 7030744, 7976056, 8921368, 8921368, 7976056, 7976056, 4194807, 3249495, 413558, 1358870, 2304182, -3367690, -9984876, -17547374, -15656750, -2422378, -22273936, -81828608, -22273936, -9039564, -531753, 1358870, 5140119, 3249495, 413558, 2304182, 413558, -4313003, -5258315, -4313003, -4313003, -7148939, -9984876, -9984876, 2845080, 7791423, 7791423, 5318293, -2101178, -6429281, -13848753, -17558448, -17558448, -15085318, -13848753, -16321882, -16940122, -8902410, -3337827, 8409663, 31286404, 28194950, 9027988, 9027988, 7173098, 5318293, -1482938, -5192632, -11375540, -13848753, -14466993, -12612104, -10757299, -4574392, -2101178, 4081645, 9646312, 11501117, 13974330, 18920590, 14592571, 9027988, 5936534, 3463404, 1608515, -1482938, -5192632, -13230429, -15085318, -13848753, -10757299, -4574392, -1482938, 4699969, 9027988, 11501117, 13356006, 12737766, 14592571, 12119441, 4081645, 2226840, 1608515, 7173098, 4699969, -2101178, -5192632, -6429281, -7665845, -10138975, -8284086, -5192632, -246373, 2226840, 9027988, 10264552, 15210895, 12119441, 371950, -864613, 2845080, -246373, -2101178, -5192632, -4574392, -3956067, -2719502, -2101178, -246373, 4081645, 7173098, 8409663, 11501117, 371950, -5810956, -2101178, 4699969, 2226840, 371950, 2845080, 2226840, -2719502, -2719502, -2101178, -1482938, 3463404, 6554858, 7791423, 12119441, 9646312, -864613, -2719502, 1608515, 2226840, -246373, -4574392, -6429281, -6429281, -6429281, -5810956, -4574392, 371950, 1608515, 5318293, 5936534, 9027988, 7173098, 1608515, 990191, 2845080, 5318293, 4699969, 990191, -1482938, -3956067, -3956067, -5192632, -5810956, -3956067, -1482938, 371950, 8409663, 10264552, 12737766, 12737766, 9027988, -2719502, -3956067, -246373, -2719502, -4574392, -8284086, -9520651, -8284086, -8284086, -5810956, -3337827, 2226840, 5936534, 5936534, 7791423, 13356006, 3463404, 990191, 2845080, 3463404, 4699969, 371950, -1482938, -3337827, -5192632, -8902410, -8902410, -6429281, -4574392, 990191, 7173098, 9027988, 12737766, 16447460, 12737766, 5318293, -4574392, 990191, 990191, -3337827, -5192632, -7665845, -8284086, -8902410, -8284086, -3956067, -1482938, 371950, 4699969, 5936534, 7791423, 10882877, 14592571, 8409663, -5810956, -1482938, 990191, 2845080, 371950, -4574392, -7047521, -8902410, -10757299, -15085318, -13230429, -10757299, -4574392, 371950, 5318293, 6554858, 12119441, 13974330, 18302348, 17065784, 12119441, 2845080, 4081645, 5318293, 3463404, -2719502, -3956067, -3337827, -7047521, -8902410, -11375540, -11993864, -12612104, -13230429, -11993864, -10757299, -10138975, -7047521, -5810956, -5810956, -5810956, -1482938, 4081645, 12737766, 8409663, 4699969, -246373, 2845080, 2845080, 2845080, -864613, -4574392, -10138975, -12612104, -11993864, -11993864, -11375540, -8902410, -7047521, 371950, 9264966, 10398686, 5863637, 3596112, -938936, -2072657, -5473986, -6607706, -8875231, -10009035, -7741511, -2072657, -2072657, 4729916, 8131161, 8131161, 2462392, 13800015, 18335064, 5863637, -4340182, -7741511, -11142756, -10009035, -5473986, -2072657, -2072657, -3206461, -5473986, -11142756, -11142756, -10009035, -11142756, -13410280, -17945330, -19079134, -17945330, -11142756, -7741511, -11142756, -6607706, -3206461, -3206461, -2072657, -938936, 194867, 1328587, 6997441, 10398686, 16067540, 20602590, 24003834, 28538968, 28538968, 21736310, 20602590, 13800015, 10398686, 3596112, 194867, -938936, -4340182, -3206461, -2072657, -4340182, -7741511, -7741511, -5473986, -938936, 1328587, 5863637, 5863637, -938936, -938936, 5863637, 5863637, 1328587, 2462392, 1328587, 1328587, 2462392, 3596112, 2462392, 1328587, 194867, 1328587, 2462392, 1328587, 4729916, 5863637, 4729916, 4729916, 3596112, 194867, -938936, 1328587, 1328587, -2072657, -4340182, -7741511, -8875231, -6607706, -5473986, -4340182, -4340182, -3206461, 2462392, 2462392, 3596112, 2462392, 1328587, 194867, 1328587, 1328587, 194867, -4340182, -4340182, -938936, 194867, 1328587, 1328587, 1328587, 9264966, 6997441, 3596112, 2462392, 194867, 194867, -2072657, -3206461, -3206461, -5473986, -6607706, -10009035, -7741511, -5473986, -4340182, -2072657, -2072657, -2072657, -938936, 2462392, 9264966, 6997441, 5863637, 4729916, 3596112, 2462392, 2462392, 194867, 194867, -2072657, -938936, 2462392, 3596112, 4729916, 3596112, 5863637, 10398686, 8131161, 5863637, 4729916, 2462392, -2072657, -3206461, -3206461, -3206461, -7741511, -8875231, -11142756, -11142756, -7741511, -4340182, -3206461, -938936, 194867, 194867, 2462392, 6997441, 3596112, 2462392, 1328587, 1328587, 1328587, 1328587, 1328587, 194867, 194867, 2462392, 3596112, 4729916, 4729916, 4729916, 4729916, 9264966, 11532491, 4729916, 3596112, 3596112, 194867, -2072657, -4340182, -5473986, -5473986, -7741511, -12276560, -12276560, -11142756, -7741511, -6607706, -6607706, -5473986, -2072657, -938936, 194867, -4340182, -4340182, 2462392, 2462392, 3596112, 2462392, -2072657, -2072657, -2072657, -2072657, -3206461, -3206461, -4340182, -3206461, -2072657, 1328587, 5863637, 8131161, 9264966, 6997441, 6997441, 6997441, 10398686, 18335064, 60284312, 14933736, 2462392, -12276560, -16811610, -13410280, -13410280, -8875231, -7741511, -11142756, -10009035, -8875231, -8875231, -11142756, -10009035, -7741511, -6607706,}; + +static int32_t xout_2_fxp[] = { 3292444, 4064364, 6155812, 6195155, 9257332, 12694145, 14824432, 15372208, 11068433, 6837806, 240920, 803041, -2612380, -10057857, -9889917, -13348624, -14127926, -13259369, -13576039, -6042062, -3006561, 2408956, 8713583, 10121694, 13602547, 8648571, 5610217, 6115295, 1482183, -1582259, -2944820, -3424565, -4220057, -5066719, -4297735, -3545696, -1647522, -1001515, -697596, 788613, -257781, 459192, 79524, -1251244, -2491332, -3870839, -3522208, -3060919, -2428502, -2139598, -3479343, -4663982, -5054556, -3108734, -570760, 899091, 2730324, 2408872, 2590486, 964186, 1625544, 3065952, 4113102, 3292360, -950429, -1343771, -2129699, -2646522, -3432199, -5790069, -4748120, -2286650, -269945, 2523880, 3403510, 5818674, 6790913, 5984852, 6878491, 6048857, 4168970, 1402491, -873086, -1607760, -4148837, -4068558, -2950441, -742727, 1696260, 1874098, 141264, -1049750, -3662718, -4616167, -3054376, -765963, -604063, -1357109, -461708, 856644, -1036999, -3103868, -2822263, -1979292, -56371, 994301, 582337, 1758839, 1761691, 2003367, 3093970, 5551916, 6018826, 3588982, 1830981, 592067, -281521, -2410131, -3465669, -2864625, -1194873, 594584, 105864, -475550, -1600378, -2847429, -3294038, -1512633, 631578, 2817817, 3283385, 2723865, 1344358, -276404, -2165770, -5381711, -6709628, -7850395, -5596711, -1719664, 1470606, 4100603, 3967476, 3379350, 2990622, 1589557, 2437310, 5120154, 3907497, 2305860, -107709, -1281779, -1471194, -4927552, -4825463, -3991299, -1795162, -552725, -862097, -753297, -1072651, -1719916, -2231034, -264073, 2753392, 4566171, 4044819, 2930057, 178341, -1699783, -4156723, -5665162, -5280628, -5734452, -2975439, -702881, 1238410, 2243281, 2437561, 2359967, 1678392, 1215844, 3515078, 4544360, 4396050, 2499721, -604734, -1225324, -3896760, -4890978, -4823533, -5828237, -3901373, -1949009, 617904, 1206701, 502645, -1708088, -3285650, -4047503, -655066, 3582942, 6007082, 6838141, 6924125, 8160438, 6497228, 3048084, 415152, -2191020, -3852803, -5099267, -5793592, -2679489, -1172391, 835673, 122389, -1968722, -3678320, -5350757, -5294134, -4981239, -4290102, -3705164, -3026526, 959237, 4386235, 4781087, 6078385, 12795647, 16480595, 14613458, 3337407, 7141390, 3990880, -4480942, 1317263, 3170306, 5772620, 2264756, -3056053, -83131, -15890456, -11019024, -3130209, -6190121, 1903626, -1163416, -361045, 7696128, 2425398, 69122, -756149, -1851114, -3297897, -4653916,}; + +static int32_t ppg_2_fxp[] = { 3886190, 2763962, 604063, -2490410, -5209577, -5411994, -2306447, 2024338, 4574308, 4162175, 1512885, -2207126, -6089039, -9595645, -12318503, -13543743, -12255924, -8410838, -3918067, -658002, 1939697, 5468449, 9652016, 12989424, 14928535, 15454668, 14133714, 10752266, 6049780, 1167023, -3108566, -6313434, -7726411, -6769774, -4447975, -2685445, -1603650, 416410, 3742745, 7032254, 9298856, 10226049, 9526942, 7558387, 4990047, 1704229, -2405769, -6273001, -8660986, -9297598, -8782453, -8112790, -8064975, -8024458, -6574739, -3763549, -1206281, 258201, 1137579, 1840376, 2517421, 3665402, 5291785, 6351518, 6068151, 4750888, 2815468, 70548, -3285230, -5632782, -4900541, -926605, 4449150, 9534324, 13243263, 14100579, 11440300, 6951304, 2786024, -665384, -3910769, -7045675, -10073963, -12896143, -14879461, -15416668, -14581414, -12565044, -9521993, -6335580, -4138855, -3119639, -2689136, -2328509, -1555835, -139167, 1321541, 1943388, 1667487, 986751, 41104, -1261478, -2527236, -2365335, 1082382, 8139802, 16223736, 21518542, 22059438, 18593350, 13320522, 8371579, 4669938, 1516576, -2133474, -6140545, -9617707, -12362627, -14566650, -15560197, -14198726, -10530220, -6313434, -3472883, -2380015, -2129867, -2004709, -1978956, -2041535, -1960585, -1717735, -1500638, -1419688, -1614723, -2464657, -3712043, -3767240, -970813, 4544864, 10958290, 16532772, 20326436, 21367714, 18722114, 12919463, 6060853, -17783, -4650308, -7950890, -10721564, -13930122, -17263756, -18816570, -16811190, -11479558, -5021924, 158796, 2907491, 3216527, 1829387, 133043, -838357, -1261478, -1614723, -1780314, -1839118, -2589815, -4164608, -5132318, -4091040, -1279849, 2234138, 5887880, 9427621, 12323368, 14203675, 15182374, 15285470, 13868802, 10373269, 5361746, -10401, -5180133, -9757545, -12461948, -12075653, -9176131, -5724721, -3285230, -1956894, -981802, 63166, 762272, 946235, 1052938, 1137579, 806396, -135559, -2232879, -5772620, -8337186, -6055904, 1108135, 8504035, 12172541, 11984888, 9313536, 4909097, -308449, -4635628, -6699813, -6434901, -4661381, -2541916, -1331356, -1165764, -25165, 4206299, 10645563, 16260478, 19237260, 19605184, 17787540, 14361884, 10479972, 7447993, 5748042, 4390346, 1682167, -3222651, -9455807, -14728550, -16833252, -15398297, -11921051, -7906682, -3469192, 1638043, 4842911, 1604908, -6972107, -12623848, -10868784, -6254630, -3285230, -1485958, -797840, -2041535, -2483111, 990442, 6171247, 8434158, 7068996, 4474903, 1465070, -2906233,}; + +static int32_t ppgf_2_fxp[] = { 593745, -1300402, -5551748, -8685649, -14466909, -18106140, -17130880, -13347869, -6494208, -2675630, 1271964, -3010168, -3476574, 462212, -2428586, -195119, 1872001, 4848531, 9657972, 5384060, 4946342, 3059493, 938517, 2867729, 1325987, 6806097, 8523497, 4636971, 4567597, 2749282, -163745, -2888952, -3506354, -1702971, -150239, 860251, 43872, 1417926, 4440258, 6243641, 9556638, 9766856, 9447418, 8809632, 7481380, 5575069, 1116356, -3212082, -6232568, -7157999, -5303110, -3448808, -3010335, -4915724, -6003894, -4662640, -3936606, -2150671, -1452906, 876190, 891876, 599449, 1178683, 3059157, 7018580, 6094743, 4945168, 2716986, 146968, 157286, -152421, 1360045, 4719179, 7010443, 9839753, 8281989, 4649386, 966451, -4092466, -6714242, -8079655, -8448167, -9200877, -11288466, -10730623, -11348109, -11630973, -11822317, -11218253, -8209679, -4280119, -2069889, 973581, 2287657, 1498540, 626712, 1925605, 3300497, 2129112, 130107, 1078103, 1842390, 295027, -386043, 1138837, 7145416, 15641398, 19759786, 20297748, 16589982, 10226552, 2819579, -1348888, -2072405, -3964540, -6732613, -9336185, -9952496, -11100981, -12695571, -13003852, -11124804, -6419298, -2997249, -779637, 717645, 1289245, -466322, -2673113, -4778403, -5001120, -4224503, -2764046, -1338234, -298886, 1669668, 2942388, 6879665, 10141575, 12677955, 15062165, 16225749, 17400238, 15342764, 9928924, 4471212, -2455094, -9770463, -11858388, -13027424, -13822413, -15981976, -17345376, -11883638, -6654095, -1030624, 1954042, 3460217, 4078625, 2582684, 1205778, 881558, 969639, -1350649, -4533707, -6405373, -6634550, -7094665, -5310660, -2391256, 2876873, 7899300, 11168509, 15162073, 15298892, 14906556, 13943964, 13042104, 11431240, 8013301, 3683354, -1226246, -8695212, -14301906, -16857998, -14575374, -8571396, -4499397, 611613, 2934083, 3841730, 5891403, 4663730, 2895244, 435033, -69122, 303751, 1572528, 1052770, -1725033, -7682119, -9638846, -4898947, 1665893, 5248332, 3824450, 2816307, 1861096, -723601, -2444608, -2847009, -1335634, 1132210, 137573, -158964, -2001438, -147471, 6175106, 14323800, 21611320, 24531394, 24586506, 22077642, 18067048, 13506414, 6488756, 1361890, -390825, -4396218, -16018299, -25936402, -29342092, -20170744, -22539602, -15911931, -3425739, -4786456, -1532347, -929709, -659931, -3916053, -12540801, 5021756, 4764477, -155021, 4704163, -2701467, -878119, -2121982, -6705685, 3745849, 8365036, 7825145, 6326017, 4762967, 1747766,}; +*/ +#endif From 236eed8b35467bcfcc561deb16e2a74354fdff7d Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Mon, 29 Apr 2024 14:36:08 +0200 Subject: [PATCH 07/27] modify helloworld to always print --- .gitignore | 1 + sw/applications/hello_world/main.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f2b85af8d..512cfb0b5 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,4 @@ flow/OpenROAD-flow-scripts # User-dependent configuration files .vscode private/ +vivado.jou diff --git a/sw/applications/hello_world/main.c b/sw/applications/hello_world/main.c index e61339734..c9ff225c5 100644 --- a/sw/applications/hello_world/main.c +++ b/sw/applications/hello_world/main.c @@ -22,7 +22,8 @@ int main(int argc, char *argv[]) { /* write something to stdout */ - printf("hello world!\n"); + while(1) + printf("hello world!\n"); return EXIT_SUCCESS; } From ec91f7624eba74d24fa14e833845859da355df53 Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Thu, 9 May 2024 10:54:59 +0200 Subject: [PATCH 08/27] try to find the error --- .env | 4 ++ sw/applications/hello_world/main.c | 3 +- sw/applications/l_cnn/conv2dlayer.c | 7 +++- sw/applications/l_cnn/main.c | 58 +++++++++++++++-------------- sw/applications/l_cnn/utils.h | 15 +++++--- sw/applications/l_data/utils.h | 3 +- sw/applications/l_fxp/main.c | 6 +-- sw/applications/l_fxp/utils.h | 4 +- 8 files changed, 58 insertions(+), 42 deletions(-) create mode 100644 .env diff --git a/.env b/.env new file mode 100644 index 000000000..d73f6b18b --- /dev/null +++ b/.env @@ -0,0 +1,4 @@ +conda activate core-v-mini-mcu +export RISCV=/home/linus/tools/riscv +export VERILATOR_VERSION=4.210 +export PATH=/home/$USER/tools/verilator/$VERILATOR_VERSION/bin:$PATH diff --git a/sw/applications/hello_world/main.c b/sw/applications/hello_world/main.c index c9ff225c5..e61339734 100644 --- a/sw/applications/hello_world/main.c +++ b/sw/applications/hello_world/main.c @@ -22,8 +22,7 @@ int main(int argc, char *argv[]) { /* write something to stdout */ - while(1) - printf("hello world!\n"); + printf("hello world!\n"); return EXIT_SUCCESS; } diff --git a/sw/applications/l_cnn/conv2dlayer.c b/sw/applications/l_cnn/conv2dlayer.c index 16a2e4c21..0ec81ee54 100644 --- a/sw/applications/l_cnn/conv2dlayer.c +++ b/sw/applications/l_cnn/conv2dlayer.c @@ -56,6 +56,7 @@ void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, // get kernel center if (kerx % 2 != 1 || kery % 2 != 1) { printf("Kernel size must be odd\n"); + printf("kerx: %d, kery: %d\n", kerx, kery); exit(EXIT_FAILURE); } int cx = kerx / 2; @@ -92,6 +93,7 @@ void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, // cy); w = kernel[(m - i + cx) * kery + (n - j + cy)]; sum += fxp32_mul(w, in); + // printf("m: %d, n: %d, sum: %d\n", m, n, sum); // if (sum > max) { // max = sum; // } @@ -105,8 +107,8 @@ void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; else output[i * iny + j] = sum; - // printf("access output: %d\n", (i-cx)*(iny - kery + 1) + (j-cy)); - // printf("sum = %.2f\n", sum); + // printf("access output: %d, address: %d\n", i * iny + j, &output[i * iny + j]); + // printf("sum = %d\n", sum); } } // printf("max: %.6f, min: %.6f\n", max, min); @@ -117,6 +119,7 @@ void convolve2DFloat(float* input, float* output, float* kernel, int inx, int in // get kernel center if (kerx % 2 != 1 || kery % 2 != 1) { printf("Kernel size must be odd\n"); + printf("kerx: %d, kery: %d\n", kerx, kery); exit(EXIT_FAILURE); } int cx = kerx / 2; diff --git a/sw/applications/l_cnn/main.c b/sw/applications/l_cnn/main.c index baaf18e77..daaebc6b2 100644 --- a/sw/applications/l_cnn/main.c +++ b/sw/applications/l_cnn/main.c @@ -5,8 +5,8 @@ #include #include -// #include "test_cnnWeights.h" -#include "testdata_s2.h" +#include "test_cnnWeights.h" +// #include "testdata_s2.h" #define COMP_PREC 0.01f #define COMP_PREC_I32 512 @@ -17,12 +17,13 @@ void compareVectorsFloat(float* a, float* b, int size, float prec) { } } -void compareVectorsFxp(fxp32* a, fxp32* b, int size, float prec) { +void compareVectorsFxp(fxp32* a, fxp32* b, int size, int32_t prec) { for (int i = 0; i < size; ++i) { + printf("b[%d] address: %d\n", i, &b[i]); assert_closei32(a[i], b[i], prec, i); } } -/* + void test_same_layer1() { fxp32* result_fxp = (fxp32*)calloc(xin1*yin1, sizeof(fxp32)); float* result = (float*)calloc(xin1*yin1, sizeof(float)); @@ -39,22 +40,23 @@ void test_same_layer1() { void test_same_layer2() { fxp32* result_fxp = (fxp32*)calloc(xin2*yin2, sizeof(fxp32)); - float* result = (float*)calloc(xin2*yin2, sizeof(float)); + // float* result = (float*)calloc(xin2*yin2, sizeof(float)); convolve2DFxp(input2_fxp, result_fxp, kernel2_fxp, xin2, yin2, xker2, yker2, false); - convolve2DFloat(input2, result, kernel2, xin2, yin2, xker2, yker2, false); - + // convolve2DFloat(input2, result, kernel2, xin2, yin2, xker2, yker2, false); + compareVectorsFxp(result2_fxp, result_fxp, xin2*yin2, COMP_PREC_I32); - compareVectorsFloat(result2, result, xin2*yin2, COMP_PREC); + // compareVectorsFloat(result2, result, xin2*yin2, COMP_PREC); free(result_fxp); - free(result); + // free(result); } void test_same_layer3() { fxp32* result_fxp = (fxp32*)calloc(xin3*yin3, sizeof(fxp32)); float* result = (float*)calloc(xin3*yin3, sizeof(float)); + convolve2DFxp(input3_fxp, result_fxp, kernel3_fxp, xin3, yin3, xker3, yker3, false); convolve2DFloat(input3, result, kernel3, xin3, yin3, xker3, yker3, false); @@ -128,23 +130,23 @@ void test_cnn() { free(result_fxp); Cnn_destroy(cnn); } -*/ -void compare_tf_conv_3() { - CnnHandle cnn = Cnn_create((Dim2D){3u, 256u}, (Dim2D){3u, 21u}, (Dim2D){3u, 1u}, SAME, VALID); - Conv2DLayer_setWeightsFloat(cnn->layer1, weights1_2); - Conv2DLayer_setWeightsFloat(cnn->layer2, weights2_2); +// void compare_tf_conv_3() { +// CnnHandle cnn = Cnn_create((Dim2D){3u, 256u}, (Dim2D){3u, 21u}, (Dim2D){3u, 1u}, SAME, VALID); - float* result = (float*)calloc(1*256, sizeof(float)); - Cnn_forwardFloat(cnn, xin_2, result); +// Conv2DLayer_setWeightsFloat(cnn->layer1, weights1_2); +// Conv2DLayer_setWeightsFloat(cnn->layer2, weights2_2); - compareVectorsFloat(result, xout_2, 256, COMP_PREC); +// float* result = (float*)calloc(1*256, sizeof(float)); +// Cnn_forwardFloat(cnn, xin_2, result); - Cnn_predictFloat(cnn, xin_2, ppg_2, result); +// compareVectorsFloat(result, xout_2, 256, COMP_PREC); - compareVectorsFloat(result, ppgf_2, 256, COMP_PREC); +// Cnn_predictFloat(cnn, xin_2, ppg_2, result); - free(result); +// compareVectorsFloat(result, ppgf_2, 256, COMP_PREC); + +// free(result); // Cnn_freezeModel(cnn); @@ -159,7 +161,7 @@ void compare_tf_conv_3() { // free(result_fxp); // Cnn_destroy(cnn); -} +// } int main() { PRINTF("\033[1;93m====== Test CNN =========\n"); @@ -168,24 +170,26 @@ int main() { PRINTF("\033[1;32m====== Test 1 passed ====\n"); // test_same_layer2(); PRINTF("\033[1;32m====== Test 2 passed ====\n"); - // test_same_layer3(); + test_same_layer3(); PRINTF("\033[1;32m====== Test 3 passed ====\n"); + return EXIT_SUCCESS; PRINTF("\033[0m====== Test Same end ====\n\n"); PRINTF("\033[0m====== Test Valid =======\n"); - // test_valid_layer1(); + test_valid_layer1(); PRINTF("\033[1;32m====== Test 1 passed ====\n"); - // test_valid_layer2(); + test_valid_layer2(); PRINTF("\033[1;32m====== Test 2 passed ====\n"); - // test_valid_layer3(); + test_valid_layer3(); PRINTF("\033[1;32m====== Test 3 passed ====\n"); PRINTF("\033[0m====== Test Valid end ===\n\n"); PRINTF("\033[0m====== Test CNN =========\n"); - // test_cnn(); + test_cnn(); PRINTF("\033[1;32m====== Test CNN passed ==\n"); PRINTF("\033[0m====== Test CNN end =====\n\n"); + return EXIT_SUCCESS; PRINTF("\033[0m====== Comp TF ==========\n"); PRINTF("\033[0m====== Sample 3 =========\n"); - compare_tf_conv_3(); + // compare_tf_conv_3(); PRINTF("\033[1;32m====== Comp TF passed ===\n"); PRINTF("\033[0m====== Comp TF end ======\n"); return EXIT_SUCCESS; diff --git a/sw/applications/l_cnn/utils.h b/sw/applications/l_cnn/utils.h index 7c574ef6e..c90bcfdec 100644 --- a/sw/applications/l_cnn/utils.h +++ b/sw/applications/l_cnn/utils.h @@ -6,22 +6,25 @@ #include // Define SIMULATION if you want to disable printing -#define SIMULATION +// #define SIMULATION +#define TARGET // Enable or disable printing #ifndef SIMULATION #define PRINTF(...) printf(__VA_ARGS__) #else +#pragma message ("Simulation mode, no printing") #define PRINTF(...) #endif + // Assert functions, always print if failing void assert_closef(float a, float b, float prec, int idx) { float diff = a - b; if (diff < 0) diff = -diff; if (diff > prec) { printf("AF %d %d %d %d\n", __LINE__, (int)(a*100000), (int)(b*100000), idx); - exit(EXIT_FAILURE); + // exit(EXIT_FAILURE); } } @@ -30,12 +33,14 @@ void assert_closei32(int32_t a, int32_t b, int32_t prec, int idx) { if (diff < 0) diff = -diff; if (diff > prec) { printf("AI %d %d %d %d\n", __LINE__, a, b, idx); - exit(EXIT_FAILURE); + // exit(EXIT_FAILURE); } } // Vector export for plots -#ifndef SIMULATION +#if defined(SIMULATION) || defined(TARGET) +#define VECTOR_EXPORT(...) +#else void vectorExport(float* a, int size, char filename[]) { FILE *filePointer; filePointer = fopen(filename, "w"); @@ -50,8 +55,6 @@ void vectorExport(float* a, int size, char filename[]) { } #define VECTOR_EXPORT(...) vectorExport(__VA_ARGS__) -#else -#define VECTOR_EXPORT(...) #endif #endif /* UTILS_H */ diff --git a/sw/applications/l_data/utils.h b/sw/applications/l_data/utils.h index acfe5bb47..7568a27e4 100644 --- a/sw/applications/l_data/utils.h +++ b/sw/applications/l_data/utils.h @@ -6,12 +6,13 @@ #include // Define SIMULATION if you want to disable printing -#define SIMULATION +// #define SIMULATION // Enable or disable printing #ifndef SIMULATION #define PRINTF(...) printf(__VA_ARGS__) #else +#pragma message ("Simulation mode, no printing") #define PRINTF(...) #endif diff --git a/sw/applications/l_fxp/main.c b/sw/applications/l_fxp/main.c index 4b1a89b85..d62b16974 100644 --- a/sw/applications/l_fxp/main.c +++ b/sw/applications/l_fxp/main.c @@ -52,13 +52,13 @@ void test_fxp_sqrt() { int main() { PRINTF("\033[1;93m====== Test FXP ==========\n"); PRINTF("\033[0m====== Test Assert =======\n"); - test_assert(); + // test_assert(); PRINTF("\033[1;32m====== Test passed =======\n"); PRINTF("\033[0m====== Test Basic ========\n"); - test_fxp_basic(); + // test_fxp_basic(); PRINTF("\033[1;32m====== Test passed =======\n"); PRINTF("\033[0m====== Test Sqrt =========\n"); - test_fxp_sqrt(); + // test_fxp_sqrt(); PRINTF("\033[1;32m====== Test passed =======\n"); PRINTF("\033[0m====== Test FXP end ======\n\n"); return EXIT_SUCCESS; diff --git a/sw/applications/l_fxp/utils.h b/sw/applications/l_fxp/utils.h index acfe5bb47..50c5427ad 100644 --- a/sw/applications/l_fxp/utils.h +++ b/sw/applications/l_fxp/utils.h @@ -6,15 +6,17 @@ #include // Define SIMULATION if you want to disable printing -#define SIMULATION +// #define SIMULATION // Enable or disable printing #ifndef SIMULATION #define PRINTF(...) printf(__VA_ARGS__) #else +#pragma message ("Simulation mode, no printing") #define PRINTF(...) #endif + // Assert functions, always print if failing void assert_closef(float a, float b, float prec) { float diff = a - b; From 1ca79f7509e08d6737f3528e360a56dde64affb4 Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Thu, 9 May 2024 11:57:48 +0200 Subject: [PATCH 09/27] float tests all pass --- sw/applications/l_cnn/main.c | 47 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/sw/applications/l_cnn/main.c b/sw/applications/l_cnn/main.c index daaebc6b2..67a4d322d 100644 --- a/sw/applications/l_cnn/main.c +++ b/sw/applications/l_cnn/main.c @@ -19,51 +19,51 @@ void compareVectorsFloat(float* a, float* b, int size, float prec) { void compareVectorsFxp(fxp32* a, fxp32* b, int size, int32_t prec) { for (int i = 0; i < size; ++i) { - printf("b[%d] address: %d\n", i, &b[i]); + // printf("b[%d] address: %d\n", i, &b[i]); assert_closei32(a[i], b[i], prec, i); } } void test_same_layer1() { - fxp32* result_fxp = (fxp32*)calloc(xin1*yin1, sizeof(fxp32)); + // fxp32* result_fxp = (fxp32*)calloc(xin1*yin1, sizeof(fxp32)); float* result = (float*)calloc(xin1*yin1, sizeof(float)); - convolve2DFxp(input1_fxp, result_fxp, kernel1_fxp, xin1, yin1, xker1, yker1, false); + // convolve2DFxp(input1_fxp, result_fxp, kernel1_fxp, xin1, yin1, xker1, yker1, false); convolve2DFloat(input1, result, kernel1, xin1, yin1, xker1, yker1, false); - compareVectorsFxp(result1_fxp, result_fxp, xin1*yin1, COMP_PREC_I32); + // compareVectorsFxp(result1_fxp, result_fxp, xin1*yin1, COMP_PREC_I32); compareVectorsFloat(result1, result, xin1*yin1, COMP_PREC); - free(result_fxp); + // free(result_fxp); free(result); } void test_same_layer2() { - fxp32* result_fxp = (fxp32*)calloc(xin2*yin2, sizeof(fxp32)); - // float* result = (float*)calloc(xin2*yin2, sizeof(float)); + // fxp32* result_fxp = (fxp32*)calloc(xin2*yin2, sizeof(fxp32)); + float* result = (float*)calloc(xin2*yin2, sizeof(float)); - convolve2DFxp(input2_fxp, result_fxp, kernel2_fxp, xin2, yin2, xker2, yker2, false); - // convolve2DFloat(input2, result, kernel2, xin2, yin2, xker2, yker2, false); + // convolve2DFxp(input2_fxp, result_fxp, kernel2_fxp, xin2, yin2, xker2, yker2, false); + convolve2DFloat(input2, result, kernel2, xin2, yin2, xker2, yker2, false); - compareVectorsFxp(result2_fxp, result_fxp, xin2*yin2, COMP_PREC_I32); - // compareVectorsFloat(result2, result, xin2*yin2, COMP_PREC); + // compareVectorsFxp(result2_fxp, result_fxp, xin2*yin2, COMP_PREC_I32); + compareVectorsFloat(result2, result, xin2*yin2, COMP_PREC); - free(result_fxp); - // free(result); + // free(result_fxp); + free(result); } void test_same_layer3() { - fxp32* result_fxp = (fxp32*)calloc(xin3*yin3, sizeof(fxp32)); + // fxp32* result_fxp = (fxp32*)calloc(xin3*yin3, sizeof(fxp32)); float* result = (float*)calloc(xin3*yin3, sizeof(float)); - convolve2DFxp(input3_fxp, result_fxp, kernel3_fxp, xin3, yin3, xker3, yker3, false); + // convolve2DFxp(input3_fxp, result_fxp, kernel3_fxp, xin3, yin3, xker3, yker3, false); convolve2DFloat(input3, result, kernel3, xin3, yin3, xker3, yker3, false); - compareVectorsFxp(result3_fxp, result_fxp, xin3*yin3, COMP_PREC_I32); + // compareVectorsFxp(result3_fxp, result_fxp, xin3*yin3, COMP_PREC_I32); compareVectorsFloat(result3, result, xin3*yin3, COMP_PREC); - free(result_fxp); + // free(result_fxp); free(result); } @@ -118,16 +118,16 @@ void test_cnn() { Conv2DLayer_setWeightsFxp(cnn->layer2, ke2_fxp); float* result = (float*)calloc(1*15, sizeof(float)); - fxp32* result_fxp = (fxp32*)calloc(1*15, sizeof(fxp32)); + // fxp32* result_fxp = (fxp32*)calloc(1*15, sizeof(fxp32)); Cnn_forwardFloat(cnn, inp, result); - Cnn_forwardFxp(cnn, inp_fxp, result_fxp); + // Cnn_forwardFxp(cnn, inp_fxp, result_fxp); compareVectorsFloat(result, res, 15, COMP_PREC*10); - compareVectorsFxp(result_fxp, res_fxp, 15, COMP_PREC_I32*10); + // compareVectorsFxp(result_fxp, res_fxp, 15, COMP_PREC_I32*10); free(result); - free(result_fxp); + // free(result_fxp); Cnn_destroy(cnn); } @@ -166,13 +166,12 @@ void test_cnn() { int main() { PRINTF("\033[1;93m====== Test CNN =========\n"); PRINTF("\033[0m====== Test Same ========\n"); - // test_same_layer1(); + test_same_layer1(); PRINTF("\033[1;32m====== Test 1 passed ====\n"); - // test_same_layer2(); + test_same_layer2(); PRINTF("\033[1;32m====== Test 2 passed ====\n"); test_same_layer3(); PRINTF("\033[1;32m====== Test 3 passed ====\n"); - return EXIT_SUCCESS; PRINTF("\033[0m====== Test Same end ====\n\n"); PRINTF("\033[0m====== Test Valid =======\n"); test_valid_layer1(); From cb147dc331f62b0695631099028fce3ca7a31aed Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Thu, 9 May 2024 12:05:13 +0200 Subject: [PATCH 10/27] try one whole sample --- sw/applications/l_cnn/main.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/sw/applications/l_cnn/main.c b/sw/applications/l_cnn/main.c index 67a4d322d..887ab8c00 100644 --- a/sw/applications/l_cnn/main.c +++ b/sw/applications/l_cnn/main.c @@ -6,7 +6,7 @@ #include #include "test_cnnWeights.h" -// #include "testdata_s2.h" +#include "testdata_s2.h" #define COMP_PREC 0.01f #define COMP_PREC_I32 512 @@ -131,22 +131,22 @@ void test_cnn() { Cnn_destroy(cnn); } -// void compare_tf_conv_3() { -// CnnHandle cnn = Cnn_create((Dim2D){3u, 256u}, (Dim2D){3u, 21u}, (Dim2D){3u, 1u}, SAME, VALID); +void compare_tf_conv_3() { + CnnHandle cnn = Cnn_create((Dim2D){3u, 256u}, (Dim2D){3u, 21u}, (Dim2D){3u, 1u}, SAME, VALID); -// Conv2DLayer_setWeightsFloat(cnn->layer1, weights1_2); -// Conv2DLayer_setWeightsFloat(cnn->layer2, weights2_2); + Conv2DLayer_setWeightsFloat(cnn->layer1, weights1_2); + Conv2DLayer_setWeightsFloat(cnn->layer2, weights2_2); -// float* result = (float*)calloc(1*256, sizeof(float)); -// Cnn_forwardFloat(cnn, xin_2, result); + float* result = (float*)calloc(1*256, sizeof(float)); + Cnn_forwardFloat(cnn, xin_2, result); -// compareVectorsFloat(result, xout_2, 256, COMP_PREC); + compareVectorsFloat(result, xout_2, 256, COMP_PREC); -// Cnn_predictFloat(cnn, xin_2, ppg_2, result); + Cnn_predictFloat(cnn, xin_2, ppg_2, result); -// compareVectorsFloat(result, ppgf_2, 256, COMP_PREC); + compareVectorsFloat(result, ppgf_2, 256, COMP_PREC); -// free(result); + free(result); // Cnn_freezeModel(cnn); @@ -160,8 +160,8 @@ void test_cnn() { // compareVectorsFxp(result_fxp, ppgf_2_fxp, 256, COMP_PREC_I32); // free(result_fxp); - // Cnn_destroy(cnn); -// } + Cnn_destroy(cnn); +} int main() { PRINTF("\033[1;93m====== Test CNN =========\n"); From 05a2976d375d6138d2d6577355668005e91f7ccd Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Mon, 13 May 2024 12:39:31 +0200 Subject: [PATCH 11/27] create an easy example showing the problem --- sw/applications/l_malloc/main.c | 28 +++++++++++ sw/applications/l_malloc/utils.h | 80 ++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 sw/applications/l_malloc/main.c create mode 100644 sw/applications/l_malloc/utils.h diff --git a/sw/applications/l_malloc/main.c b/sw/applications/l_malloc/main.c new file mode 100644 index 000000000..0256979f5 --- /dev/null +++ b/sw/applications/l_malloc/main.c @@ -0,0 +1,28 @@ +#include "utils.h" + +typedef struct __Dim2D { + int x; + int y; +} Dim2D; + +Dim2D* alloc_struct() { + printf("allocate struct\n"); + return (Dim2D*)my_malloc(sizeof(Dim2D)); +} + +void alloc_array() { + printf("allocate arrays\n"); + int* arr = (int*)my_malloc(10 * sizeof(int)); + int* arr2 = (int*)my_malloc(10 * sizeof(int)); + int* arr3 = (int*)my_malloc(10 * sizeof(int)); + + free(arr3); + free(arr2); + free(arr); +} + +int main(int argc, char *argv[]) { + Dim2D* dim = alloc_struct(); + alloc_array(); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/sw/applications/l_malloc/utils.h b/sw/applications/l_malloc/utils.h new file mode 100644 index 000000000..afdfc6bd2 --- /dev/null +++ b/sw/applications/l_malloc/utils.h @@ -0,0 +1,80 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +// Define SIMULATION if you want to disable printing +// #define SIMULATION +#define TARGET + +// Enable or disable printing +#ifndef SIMULATION +#define PRINTF(...) printf(__VA_ARGS__) +#else +#pragma message ("Simulation mode, no printing") +#define PRINTF(...) +#endif + + +// Assert functions, always print if failing +void assert_closef(float a, float b, float prec, int idx) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AF %d %d %d %d\n", __LINE__, (int)(a*100000), (int)(b*100000), idx); + // exit(EXIT_FAILURE); + } +} + +void assert_closei32(int32_t a, int32_t b, int32_t prec, int idx) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d %d\n", __LINE__, a, b, idx); + // exit(EXIT_FAILURE); + } +} + +void* my_calloc(size_t num, size_t size) { + void *ptr = calloc(num, size); + if (ptr != NULL) { + printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + num * size - 1); + } else { + printf("Failed to allocate memory\n"); + } + return ptr; +} + +void* my_malloc(size_t size) { + void *ptr = malloc(size); + if (ptr != NULL) { + printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + size - 1); + } else { + printf("Failed to allocate memory\n"); + } + return ptr; +} + +// Vector export for plots +#if defined(SIMULATION) || defined(TARGET) +#define VECTOR_EXPORT(...) +#else +void vectorExport(float* a, int size, char filename[]) { + FILE *filePointer; + filePointer = fopen(filename, "w"); + if (filePointer == NULL) { + printf("Failed to create file.\n"); + return; + } + for (int i=0; i Date: Mon, 13 May 2024 12:42:18 +0200 Subject: [PATCH 12/27] make example even easier --- sw/applications/l_malloc/main.c | 27 +++++++++-- sw/applications/l_malloc/utils.h | 80 -------------------------------- 2 files changed, 24 insertions(+), 83 deletions(-) delete mode 100644 sw/applications/l_malloc/utils.h diff --git a/sw/applications/l_malloc/main.c b/sw/applications/l_malloc/main.c index 0256979f5..2ef871e99 100644 --- a/sw/applications/l_malloc/main.c +++ b/sw/applications/l_malloc/main.c @@ -1,4 +1,25 @@ -#include "utils.h" +#include +#include + +void* my_calloc(size_t num, size_t size) { + void *ptr = calloc(num, size); + if (ptr != NULL) { + printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + num * size - 1); + } else { + printf("Failed to allocate memory\n"); + } + return ptr; +} + +void* my_malloc(size_t size) { + void *ptr = malloc(size); + if (ptr != NULL) { + printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + size - 1); + } else { + printf("Failed to allocate memory\n"); + } + return ptr; +} typedef struct __Dim2D { int x; @@ -13,8 +34,8 @@ Dim2D* alloc_struct() { void alloc_array() { printf("allocate arrays\n"); int* arr = (int*)my_malloc(10 * sizeof(int)); - int* arr2 = (int*)my_malloc(10 * sizeof(int)); - int* arr3 = (int*)my_malloc(10 * sizeof(int)); + int* arr2 = (int*)my_malloc(5 * sizeof(int)); + int* arr3 = (int*)my_malloc(15 * sizeof(int)); free(arr3); free(arr2); diff --git a/sw/applications/l_malloc/utils.h b/sw/applications/l_malloc/utils.h deleted file mode 100644 index afdfc6bd2..000000000 --- a/sw/applications/l_malloc/utils.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef UTILS_H -#define UTILS_H - -#include -#include -#include - -// Define SIMULATION if you want to disable printing -// #define SIMULATION -#define TARGET - -// Enable or disable printing -#ifndef SIMULATION -#define PRINTF(...) printf(__VA_ARGS__) -#else -#pragma message ("Simulation mode, no printing") -#define PRINTF(...) -#endif - - -// Assert functions, always print if failing -void assert_closef(float a, float b, float prec, int idx) { - float diff = a - b; - if (diff < 0) diff = -diff; - if (diff > prec) { - printf("AF %d %d %d %d\n", __LINE__, (int)(a*100000), (int)(b*100000), idx); - // exit(EXIT_FAILURE); - } -} - -void assert_closei32(int32_t a, int32_t b, int32_t prec, int idx) { - int32_t diff = a - b; - if (diff < 0) diff = -diff; - if (diff > prec) { - printf("AI %d %d %d %d\n", __LINE__, a, b, idx); - // exit(EXIT_FAILURE); - } -} - -void* my_calloc(size_t num, size_t size) { - void *ptr = calloc(num, size); - if (ptr != NULL) { - printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + num * size - 1); - } else { - printf("Failed to allocate memory\n"); - } - return ptr; -} - -void* my_malloc(size_t size) { - void *ptr = malloc(size); - if (ptr != NULL) { - printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + size - 1); - } else { - printf("Failed to allocate memory\n"); - } - return ptr; -} - -// Vector export for plots -#if defined(SIMULATION) || defined(TARGET) -#define VECTOR_EXPORT(...) -#else -void vectorExport(float* a, int size, char filename[]) { - FILE *filePointer; - filePointer = fopen(filename, "w"); - if (filePointer == NULL) { - printf("Failed to create file.\n"); - return; - } - for (int i=0; i Date: Mon, 13 May 2024 13:16:25 +0200 Subject: [PATCH 13/27] explain problem --- sw/applications/l_malloc/main.c | 11 ----------- sw/applications/l_malloc/problem.md | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 11 deletions(-) create mode 100644 sw/applications/l_malloc/problem.md diff --git a/sw/applications/l_malloc/main.c b/sw/applications/l_malloc/main.c index 2ef871e99..84fdcd2ff 100644 --- a/sw/applications/l_malloc/main.c +++ b/sw/applications/l_malloc/main.c @@ -21,16 +21,6 @@ void* my_malloc(size_t size) { return ptr; } -typedef struct __Dim2D { - int x; - int y; -} Dim2D; - -Dim2D* alloc_struct() { - printf("allocate struct\n"); - return (Dim2D*)my_malloc(sizeof(Dim2D)); -} - void alloc_array() { printf("allocate arrays\n"); int* arr = (int*)my_malloc(10 * sizeof(int)); @@ -43,7 +33,6 @@ void alloc_array() { } int main(int argc, char *argv[]) { - Dim2D* dim = alloc_struct(); alloc_array(); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/sw/applications/l_malloc/problem.md b/sw/applications/l_malloc/problem.md new file mode 100644 index 000000000..b64e04803 --- /dev/null +++ b/sw/applications/l_malloc/problem.md @@ -0,0 +1,20 @@ +So this simple program using dynamic memory allocation does give the following result: +```bash +Allocated memory range: [0xd590 - 0xd5a3] +allocate arrays +Allocated memory range: [0xd218 - 0xd23f] +Allocated memory range: [0xd218 - 0xd22b] +Allocated memory range: [0xd218 - 0xd253] +``` +So after the second allocation all the new arrays are allocated at the same address which then obviously leads to memory corruption + +If I coment out the call to the first function I get this output: +```bash +allocate arrays +Allocated memory range: [0xd590 - 0xd5b7] +Allocated memory range: [0xd218 - 0xd22b] +Allocated memory range: [0xd218 - 0xd253] +``` +The issue persists... + +I don't really know how this could occur and how to fix it. \ No newline at end of file From 507daaf219d2eb97608bc0e1056c00491db95ccf Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Mon, 13 May 2024 13:17:03 +0200 Subject: [PATCH 14/27] get back original example --- sw/applications/l_malloc/main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sw/applications/l_malloc/main.c b/sw/applications/l_malloc/main.c index 84fdcd2ff..11fd984d9 100644 --- a/sw/applications/l_malloc/main.c +++ b/sw/applications/l_malloc/main.c @@ -21,6 +21,17 @@ void* my_malloc(size_t size) { return ptr; } +int* alloc_array_ret() { + printf("allocate array and return\n"); + int* myarr = (int*)my_malloc(5 * sizeof(int)); + + for (int i = 0; i < 5; ++i) { + myarr[i] = i; + } + + return myarr; +} + void alloc_array() { printf("allocate arrays\n"); int* arr = (int*)my_malloc(10 * sizeof(int)); @@ -33,6 +44,7 @@ void alloc_array() { } int main(int argc, char *argv[]) { + int* ret = alloc_array_ret(); alloc_array(); return EXIT_SUCCESS; } \ No newline at end of file From aa609154f5b1563b6800177f28c782e474e8abab Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Mon, 13 May 2024 15:21:31 +0200 Subject: [PATCH 15/27] Add static test and do debug on dynamic --- hw/core-v-mini-mcu/core_v_mini_mcu.sv | 16 +- sw/applications/l_cnn/cnn.c | 79 ++++++++-- sw/applications/l_cnn/cnn.h | 38 ++++- sw/applications/l_cnn/conv2dlayer.c | 104 ++++++------- sw/applications/l_cnn/conv2dlayer.h | 52 +++---- sw/applications/l_cnn/main.c | 165 ++------------------- sw/applications/l_cnn/test_cnnWeights.h | 78 ---------- sw/applications/l_cnn/utils.h | 20 +++ sw/applications/l_cnn_static/cnn.c | 81 ++++++++++ sw/applications/l_cnn_static/cnn.h | 42 ++++++ sw/applications/l_cnn_static/conv2dlayer.c | 161 ++++++++++++++++++++ sw/applications/l_cnn_static/conv2dlayer.h | 47 ++++++ sw/applications/l_cnn_static/fxp32.c | 56 +++++++ sw/applications/l_cnn_static/fxp32.h | 26 ++++ sw/applications/l_cnn_static/main.c | 83 +++++++++++ sw/applications/l_cnn_static/utils.h | 124 ++++++++++++++++ 16 files changed, 822 insertions(+), 350 deletions(-) create mode 100644 sw/applications/l_cnn_static/cnn.c create mode 100644 sw/applications/l_cnn_static/cnn.h create mode 100644 sw/applications/l_cnn_static/conv2dlayer.c create mode 100644 sw/applications/l_cnn_static/conv2dlayer.h create mode 100644 sw/applications/l_cnn_static/fxp32.c create mode 100644 sw/applications/l_cnn_static/fxp32.h create mode 100644 sw/applications/l_cnn_static/main.c create mode 100644 sw/applications/l_cnn_static/utils.h diff --git a/hw/core-v-mini-mcu/core_v_mini_mcu.sv b/hw/core-v-mini-mcu/core_v_mini_mcu.sv index ecaa6cffb..0b90c20cd 100644 --- a/hw/core-v-mini-mcu/core_v_mini_mcu.sv +++ b/hw/core-v-mini-mcu/core_v_mini_mcu.sv @@ -566,11 +566,9 @@ module core_v_mini_mcu .spi_flash_sck_en_o(spi_flash_sck_oe_o), .spi_flash_csb_o({spi_flash_cs_1_o,spi_flash_cs_0_o}), .spi_flash_csb_en_o({spi_flash_cs_1_oe_o, spi_flash_cs_0_oe_o}), - .spi_flash_sd_o({spi_flash_sd_3_o, spi_flash_sd_2_o, spi_flash_sd_1_o, spi_flash_sd_0_o}), - .spi_flash_sd_en_o({ - spi_flash_sd_3_oe_o, spi_flash_sd_2_oe_o, spi_flash_sd_1_oe_o, spi_flash_sd_0_oe_o - }), - .spi_flash_sd_i({spi_flash_sd_3_i, spi_flash_sd_2_i, spi_flash_sd_1_i, spi_flash_sd_0_i}), + .spi_flash_sd_o({spi_flash_sd_3_o,spi_flash_sd_2_o, spi_flash_sd_1_o, spi_flash_sd_0_o}), + .spi_flash_sd_en_o({spi_flash_sd_3_oe_o,spi_flash_sd_2_oe_o, spi_flash_sd_1_oe_o, spi_flash_sd_0_oe_o}), + .spi_flash_sd_i({spi_flash_sd_3_i,spi_flash_sd_2_i, spi_flash_sd_1_i, spi_flash_sd_0_i}), .intr_i(intr), .intr_vector_ext_i, .core_sleep_i(core_sleep), @@ -661,11 +659,11 @@ module core_v_mini_mcu .cio_sda_en_o(i2c_sda_oe_o), .spi_sck_o, .spi_sck_en_o(spi_sck_oe_o), - .spi_csb_o({spi_cs_1_o, spi_cs_0_o}), + .spi_csb_o({spi_cs_1_o,spi_cs_0_o}), .spi_csb_en_o({spi_cs_1_oe_o, spi_cs_0_oe_o}), - .spi_sd_o({spi_sd_3_o, spi_sd_2_o, spi_sd_1_o, spi_sd_0_o}), - .spi_sd_en_o({spi_sd_3_oe_o, spi_sd_2_oe_o, spi_sd_1_oe_o, spi_sd_0_oe_o}), - .spi_sd_i({spi_sd_3_i, spi_sd_2_i, spi_sd_1_i, spi_sd_0_i}), + .spi_sd_o({spi_sd_3_o,spi_sd_2_o, spi_sd_1_o, spi_sd_0_o}), + .spi_sd_en_o({spi_sd_3_oe_o,spi_sd_2_oe_o, spi_sd_1_oe_o, spi_sd_0_oe_o}), + .spi_sd_i({spi_sd_3_i,spi_sd_2_i, spi_sd_1_i, spi_sd_0_i}), .spi_intr_event_o(spi_intr), .spi_rx_valid_o(spi_rx_valid), .spi_tx_ready_o(spi_tx_ready), diff --git a/sw/applications/l_cnn/cnn.c b/sw/applications/l_cnn/cnn.c index 0bcb03b23..09a7740f5 100644 --- a/sw/applications/l_cnn/cnn.c +++ b/sw/applications/l_cnn/cnn.c @@ -1,40 +1,75 @@ +// clang-format off #include "cnn.h" #include +#include +// clang-format on CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, Conv2DPadding layer2Pad) { - CnnHandle self = (CnnHandle)malloc(sizeof(Cnn)); + CnnHandle self = (CnnHandle)my_malloc(sizeof(Cnn)); // Create layers - self->layer1 = Conv2DLayer_create(layer1Dim, layer1Pad); - self->layer2 = Conv2DLayer_create(layer2Dim, layer2Pad); - self->inputDim = inputDim; + self->layer1Dim = layer1Dim; + self->layer2Dim = layer2Dim; // TODO: calculate self->outputDim = (Dim2D){1u, 256u}; + + self->layer1Pad = layer1Pad; + self->layer2Pad = layer2Pad; + + self->layer1Weights = NULL; + self->layer2Weights = NULL; + return self; } void Cnn_destroy(CnnHandle self) { - Conv2DLayer_destroy(self->layer1); - Conv2DLayer_destroy(self->layer2); + // free(self->layer1Weights); + // free(self->layer2Weights); free(self); } void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output) { - fxp32* layer1Output = (fxp32*)calloc(self->inputDim.x * self->inputDim.y, sizeof(fxp32)); - Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1Output); - Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1Output, output); - free(layer1Output); + // fxp32* layer1Output = (fxp32*)my_calloc(self->inputDim.x * self->inputDim.y, sizeof(fxp32)); + // Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1Output); + // Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1Output, output); + // free(layer1Output); + printf("Cnn_forwardFxp not implemented\n"); } void Cnn_forwardFloat(CnnHandle self, float* input, float* output) { - float* layer1Output = (float*)calloc(self->inputDim.x * self->inputDim.y, sizeof(float)); - Conv2DLayer_forwardFloat(self->layer1, self->inputDim, input, layer1Output); - Conv2DLayer_forwardFloat(self->layer2, self->inputDim, layer1Output, output); + float* layer1Output = (float*)my_calloc(self->inputDim.x * self->inputDim.y, sizeof(float)); + convolve2DFloat(input, layer1Output, self->layer1Weights, self->inputDim.x, self->inputDim.y, self->layer1Dim.x, + self->layer1Dim.y, self->layer1Pad == VALID); + convolve2DFloat(layer1Output, output, self->layer2Weights, self->inputDim.x, self->inputDim.y, self->layer2Dim.x, + self->layer2Dim.y, self->layer2Pad == VALID); + for (int i = 0; i < 15; ++i) { + printf("layer2 output[%d] = %d\n", i, (int)(100000*output[i])); + } free(layer1Output); } +void Cnn_setWeights1Float(CnnHandle self, float* weights) { + self->layer1Weights = weights; +} + +void Cnn_setWeights2Float(CnnHandle self, float* weights) { + self->layer2Weights = weights; +} + +// void Cnn_setWeights1Fxp(CnnHandle self, fxp32* weights) { +// for (int i = 0; i < self->layer1Dim.x * self->layer1Dim.y; ++i) { +// self->layer1Weights[i] = fxp32_fxpMulToFloat(weights[i]); +// } +// } + +// void Cnn_setWeights2Fxp(CnnHandle self, fxp32* weights) { +// for (int i = 0; i < self->layer2Dim.x * self->layer2Dim.y; ++i) { +// self->layer2Weights[i] = fxp32_fxpMulToFloat(weights[i]); +// } +// } + void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output) { Cnn_forwardFxp(self, acc, output); for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { @@ -49,7 +84,21 @@ void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output) { } } +float Cnn_sampleLoss(CnnHandle self, complex_t* ypred, complex_t* ytrue) { + float loss = 0.0f; + // NOTE: could be optimized by reusing .r and .i for the abs + fxpMul* abs = (fxpMul*)my_calloc(self->outputDim.y * self->outputDim.x, sizeof(fxpMul)); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + abs[i] = fxp32_pow2(ytrue[i].r - ypred[i].r) + fxp32_pow2(ytrue[i].i - ypred[i].i); + } + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + loss += fxp32_fxpMulToFloat(abs[i]); + } + return loss; +} + void Cnn_freezeModel(CnnHandle self) { - Conv2DLayer_transformWeightsToFxp(self->layer1); - Conv2DLayer_transformWeightsToFxp(self->layer2); + // Conv2DLayer_transformWeightsToFxp(self->layer1); + // Conv2DLayer_transformWeightsToFxp(self->layer2); + printf("Cnn_freezeModel not implemented\n"); } \ No newline at end of file diff --git a/sw/applications/l_cnn/cnn.h b/sw/applications/l_cnn/cnn.h index 3905c6fac..d654014e4 100644 --- a/sw/applications/l_cnn/cnn.h +++ b/sw/applications/l_cnn/cnn.h @@ -4,28 +4,60 @@ #include "conv2dlayer.h" #include "fxp32.h" +// TODO: perhaps we could optimize but we need 256 as value... +/** + * @brief a 2D dimension + * @param x the rows of a matrix + * @param y the columns of a matrix + */ +typedef struct __Dim2D { + uint16_t x; + uint16_t y; +} Dim2D; + +typedef enum __Conv2DPadding { + VALID, + SAME +} Conv2DPadding; + /** * @brief a two layer cnn model */ typedef struct __Cnn { - Conv2DLayerHandle layer1; - Conv2DLayerHandle layer2; Dim2D inputDim; Dim2D outputDim; + Dim2D layer1Dim; + Dim2D layer2Dim; + Conv2DPadding layer1Pad; + Conv2DPadding layer2Pad; + float* layer1Weights; + float* layer2Weights; } Cnn; typedef struct __Cnn* CnnHandle; +typedef struct __complex_t { + int32_t r; + int32_t i; +} complex_t; + CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, Conv2DPadding layer2Pad); void Cnn_destroy(CnnHandle self); -void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output); +// void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output); void Cnn_forwardFloat(CnnHandle self, float* input, float* output); +void Cnn_setWeights1Float(CnnHandle self, float* weights); +void Cnn_setWeights2Float(CnnHandle self, float* weights); +// void Cnn_setWeights1Fxp(CnnHandle self, fxp32* weights); +// void Cnn_setWeights2Fxp(CnnHandle self, fxp32* weights); + void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output); +float Cnn_sampleLoss(CnnHandle self, complex_t* ypred, complex_t* ytrue); + void Cnn_freezeModel(CnnHandle self); #endif // CNN_H \ No newline at end of file diff --git a/sw/applications/l_cnn/conv2dlayer.c b/sw/applications/l_cnn/conv2dlayer.c index 0ec81ee54..8a6d9d8f6 100644 --- a/sw/applications/l_cnn/conv2dlayer.c +++ b/sw/applications/l_cnn/conv2dlayer.c @@ -4,48 +4,38 @@ #include #include -#define my_assert(condition) \ - do { \ - if (!(condition)) { \ - printf("Assertion failed: \n"); \ - printf("File: %s\n", __FILE__); \ - printf("Line: %d\n", __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) - -Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding) { - Conv2DLayerHandle self = (Conv2DLayerHandle)malloc(sizeof(Conv2DLayer)); - self->dim = dim; - self->padding = padding; - self->weightsFxp = (fxp32*)calloc(dim.x * dim.y, sizeof(fxp32)); - self->weightsFloat = (float*)calloc(dim.x * dim.y, sizeof(float)); - return self; -} - -void Conv2DLayer_destroy(Conv2DLayerHandle self) { - free(self->weightsFxp); - free(self->weightsFloat); - free(self); -} - -bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights) { - memcpy(self->weightsFxp, weights, self->dim.x * self->dim.y * sizeof(fxp32)); - // TODO: make sure we get the right size of weights - return true; -} - -bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights) { - memcpy(self->weightsFloat, weights, self->dim.x * self->dim.y * sizeof(float)); - // TODO: make sure we get the right size of weights - return true; -} - -void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self) { - for (int i = 0; i < self->dim.x * self->dim.y; ++i) { - self->weightsFxp[i] = fxp32_fromFloat(self->weightsFloat[i]); - } -} +// Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding) { +// Conv2DLayerHandle self = (Conv2DLayerHandle)my_malloc(sizeof(Conv2DLayer)); +// self->dim = dim; +// self->padding = padding; +// self->weightsFxp = (fxp32*)my_calloc(dim.x * dim.y, sizeof(fxp32)); +// self->weightsFloat = (float*)my_calloc(dim.x * dim.y, sizeof(float)); +// return self; +// } + +// void Conv2DLayer_destroy(Conv2DLayerHandle self) { +// free(self->weightsFxp); +// free(self->weightsFloat); +// free(self); +// } + +// bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights) { +// memcpy(self->weightsFxp, weights, self->dim.x * self->dim.y * sizeof(fxp32)); +// // TODO: make sure we get the right size of weights +// return true; +// } + +// bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights) { +// memcpy(self->weightsFloat, weights, self->dim.x * self->dim.y * sizeof(float)); +// // TODO: make sure we get the right size of weights +// return true; +// } + +// void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self) { +// for (int i = 0; i < self->dim.x * self->dim.y; ++i) { +// self->weightsFxp[i] = fxp32_fromFloat(self->weightsFloat[i]); +// } +// } // Could be optimized void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid) { @@ -56,7 +46,6 @@ void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, // get kernel center if (kerx % 2 != 1 || kery % 2 != 1) { printf("Kernel size must be odd\n"); - printf("kerx: %d, kery: %d\n", kerx, kery); exit(EXIT_FAILURE); } int cx = kerx / 2; @@ -93,7 +82,6 @@ void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, // cy); w = kernel[(m - i + cx) * kery + (n - j + cy)]; sum += fxp32_mul(w, in); - // printf("m: %d, n: %d, sum: %d\n", m, n, sum); // if (sum > max) { // max = sum; // } @@ -107,8 +95,8 @@ void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; else output[i * iny + j] = sum; - // printf("access output: %d, address: %d\n", i * iny + j, &output[i * iny + j]); - // printf("sum = %d\n", sum); + // printf("access output: %d\n", (i-cx)*(iny - kery + 1) + (j-cy)); + // printf("sum = %.2f\n", sum); } } // printf("max: %.6f, min: %.6f\n", max, min); @@ -119,7 +107,6 @@ void convolve2DFloat(float* input, float* output, float* kernel, int inx, int in // get kernel center if (kerx % 2 != 1 || kery % 2 != 1) { printf("Kernel size must be odd\n"); - printf("kerx: %d, kery: %d\n", kerx, kery); exit(EXIT_FAILURE); } int cx = kerx / 2; @@ -150,31 +137,26 @@ void convolve2DFloat(float* input, float* output, float* kernel, int inx, int in sum = .0f; for (int m = mMin; m < mMax; ++m) { for (int n = nMin; n < nMax; ++n) { - // printf("input access: %d, %d\n", m, n); in = input[m * iny + n]; - // printf("kernel access: %d, %d\n", m - i + cx, n - j + - // cy); w = kernel[(m - i + cx) * kery + (n - j + cy)]; sum += w * in; - // printf("factor: %.2f\n", w * in); } } + // printf("sum = %d\n", (int)(100000*sum)); if (valid) output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; else output[i * iny + j] = sum; - // printf("access output: %d\n", (i-cx)*(iny - kery + 1) + (j-cy)); - // printf("sum = %.2f\n", sum); } } } -void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output) { - convolve2DFxp(input, output, self->weightsFxp, inputDim.x, inputDim.y, self->dim.x, self->dim.y, - self->padding == VALID); -} +// void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output) { +// convolve2DFxp(input, output, self->weightsFxp, inputDim.x, inputDim.y, self->dim.x, self->dim.y, +// self->padding == VALID); +// } -void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output) { - convolve2DFloat(input, output, self->weightsFloat, inputDim.x, inputDim.y, self->dim.x, self->dim.y, - self->padding == VALID); -} \ No newline at end of file +// void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output) { +// convolve2DFloat(input, output, self->weightsFloat, inputDim.x, inputDim.y, self->dim.x, self->dim.y, +// self->padding == VALID); +// } \ No newline at end of file diff --git a/sw/applications/l_cnn/conv2dlayer.h b/sw/applications/l_cnn/conv2dlayer.h index 2b47aee2e..cf7f49d42 100644 --- a/sw/applications/l_cnn/conv2dlayer.h +++ b/sw/applications/l_cnn/conv2dlayer.h @@ -6,40 +6,24 @@ #include "fxp32.h" -// TODO: perhaps we could optimize but we need 256 as value... -/** - * @brief a 2D dimension - * @param x the rows of a matrix - * @param y the columns of a matrix - */ -typedef struct __Dim2D { - uint16_t x; - uint16_t y; -} Dim2D; - -typedef enum __Conv2DPadding { - VALID, - SAME -} Conv2DPadding; - -typedef struct __Conv2DLayer { - Dim2D dim; - Conv2DPadding padding; - fxp32* weightsFxp; - float* weightsFloat; -} Conv2DLayer; - -typedef struct __Conv2DLayer* Conv2DLayerHandle; - -Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding); -void Conv2DLayer_destroy(Conv2DLayerHandle self); - -bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights); -bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights); -void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self); - -void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output); -void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output); +// typedef struct __Conv2DLayer { +// Dim2D dim; +// Conv2DPadding padding; +// fxp32* weightsFxp; +// float* weightsFloat; +// } Conv2DLayer; + +// typedef struct __Conv2DLayer* Conv2DLayerHandle; + +// Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding); +// void Conv2DLayer_destroy(Conv2DLayerHandle self); + +// bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights); +// bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights); +// void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self); + +// void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output); +// void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output); void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid); void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid); diff --git a/sw/applications/l_cnn/main.c b/sw/applications/l_cnn/main.c index 887ab8c00..0139ff3b6 100644 --- a/sw/applications/l_cnn/main.c +++ b/sw/applications/l_cnn/main.c @@ -6,7 +6,6 @@ #include #include "test_cnnWeights.h" -#include "testdata_s2.h" #define COMP_PREC 0.01f #define COMP_PREC_I32 512 @@ -19,108 +18,29 @@ void compareVectorsFloat(float* a, float* b, int size, float prec) { void compareVectorsFxp(fxp32* a, fxp32* b, int size, int32_t prec) { for (int i = 0; i < size; ++i) { - // printf("b[%d] address: %d\n", i, &b[i]); assert_closei32(a[i], b[i], prec, i); } } -void test_same_layer1() { - // fxp32* result_fxp = (fxp32*)calloc(xin1*yin1, sizeof(fxp32)); - float* result = (float*)calloc(xin1*yin1, sizeof(float)); - - // convolve2DFxp(input1_fxp, result_fxp, kernel1_fxp, xin1, yin1, xker1, yker1, false); - convolve2DFloat(input1, result, kernel1, xin1, yin1, xker1, yker1, false); - - // compareVectorsFxp(result1_fxp, result_fxp, xin1*yin1, COMP_PREC_I32); - compareVectorsFloat(result1, result, xin1*yin1, COMP_PREC); - - // free(result_fxp); - free(result); -} - -void test_same_layer2() { - // fxp32* result_fxp = (fxp32*)calloc(xin2*yin2, sizeof(fxp32)); - float* result = (float*)calloc(xin2*yin2, sizeof(float)); - - // convolve2DFxp(input2_fxp, result_fxp, kernel2_fxp, xin2, yin2, xker2, yker2, false); - convolve2DFloat(input2, result, kernel2, xin2, yin2, xker2, yker2, false); - - // compareVectorsFxp(result2_fxp, result_fxp, xin2*yin2, COMP_PREC_I32); - compareVectorsFloat(result2, result, xin2*yin2, COMP_PREC); - - // free(result_fxp); - free(result); -} - -void test_same_layer3() { - // fxp32* result_fxp = (fxp32*)calloc(xin3*yin3, sizeof(fxp32)); - float* result = (float*)calloc(xin3*yin3, sizeof(float)); - - - // convolve2DFxp(input3_fxp, result_fxp, kernel3_fxp, xin3, yin3, xker3, yker3, false); - convolve2DFloat(input3, result, kernel3, xin3, yin3, xker3, yker3, false); - - // compareVectorsFxp(result3_fxp, result_fxp, xin3*yin3, COMP_PREC_I32); - compareVectorsFloat(result3, result, xin3*yin3, COMP_PREC); - - // free(result_fxp); - free(result); -} - -void test_valid_layer1() { - fxp32* result_fxp = (fxp32*)calloc(yout4*yout4, sizeof(fxp32)); - float* result = (float*)calloc(xout4*yout4, sizeof(float)); - - convolve2DFxp(input4_fxp, result_fxp, kernel4_fxp, xin4, yin4, xker4, yker4, true); - convolve2DFloat(input4, result, kernel4, xin4, yin4, xker4, yker4, true); - - compareVectorsFxp(result4_fxp, result_fxp, xout4*yout4, COMP_PREC_I32); - compareVectorsFloat(result4, result, xout4*yout4, COMP_PREC); - - free(result_fxp); - free(result); -} - -void test_valid_layer2() { - fxp32* result_fxp = (fxp32*)calloc(xout5*yout5, sizeof(fxp32)); - float* result = (float*)calloc(xout5*yout5, sizeof(float)); - - convolve2DFxp(input5_fxp, result_fxp, kernel5_fxp, xin5, yin5, xker5, yker5, true); - convolve2DFloat(input5, result, kernel5, xin5, yin5, xker5, yker5, true); - - compareVectorsFxp(result5_fxp, result_fxp, xout5*yout5, COMP_PREC_I32); - compareVectorsFloat(result5, result, xout5*yout5, COMP_PREC); - - free(result_fxp); - free(result); -} - -void test_valid_layer3() { - fxp32* result_fxp = (fxp32*)calloc(xout6*yout6, sizeof(fxp32)); - float* result = (float*)calloc(xout6*yout6, sizeof(float)); - - convolve2DFxp(input6_fxp, result_fxp, kernel6_fxp, xin6, yin6, xker6, yker6, true); - convolve2DFloat(input6, result, kernel6, xin6, yin6, xker6, yker6, true); - - compareVectorsFxp(result6_fxp, result_fxp, xout6*yout6, COMP_PREC_I32); - compareVectorsFloat(result6, result, xout6*yout6, COMP_PREC); - - free(result_fxp); - free(result); -} - void test_cnn() { - CnnHandle cnn = Cnn_create((Dim2D){3u, 15u}, (Dim2D){3u, 5u}, (Dim2D){3u, 1u}, SAME, VALID); + Dim2D inputDim = {3u, 15u}; + Dim2D layer1Dim = {3u, 5u}; + Dim2D layer2Dim = {3u, 1u}; + CnnHandle cnn = Cnn_create(inputDim, layer1Dim, layer2Dim, SAME, VALID); - Conv2DLayer_setWeightsFloat(cnn->layer1, ke1); - Conv2DLayer_setWeightsFloat(cnn->layer2, ke2); - Conv2DLayer_setWeightsFxp(cnn->layer1, ke1_fxp); - Conv2DLayer_setWeightsFxp(cnn->layer2, ke2_fxp); + Cnn_setWeights1Float(cnn, ke1); + Cnn_setWeights2Float(cnn, ke2); + // Conv2DLayer_setWeightsFxp(cnn->layer1, ke1_fxp); + // Conv2DLayer_setWeightsFxp(cnn->layer2, ke2_fxp); - float* result = (float*)calloc(1*15, sizeof(float)); - // fxp32* result_fxp = (fxp32*)calloc(1*15, sizeof(fxp32)); + float* result = (float*)my_calloc(15, sizeof(float)); + float* inter = (float*)my_malloc(2*15* sizeof(float)); + // fxp32* result_fxp = (fxp32*)my_calloc(1*15, sizeof(fxp32)); Cnn_forwardFloat(cnn, inp, result); + for (int i = 0; i < 15; ++i) { + printf("layer2 output[%d] = %d\n", i, (int)(100000*result[i])); + } // Cnn_forwardFxp(cnn, inp_fxp, result_fxp); compareVectorsFloat(result, res, 15, COMP_PREC*10); @@ -131,65 +51,10 @@ void test_cnn() { Cnn_destroy(cnn); } -void compare_tf_conv_3() { - CnnHandle cnn = Cnn_create((Dim2D){3u, 256u}, (Dim2D){3u, 21u}, (Dim2D){3u, 1u}, SAME, VALID); - - Conv2DLayer_setWeightsFloat(cnn->layer1, weights1_2); - Conv2DLayer_setWeightsFloat(cnn->layer2, weights2_2); - - float* result = (float*)calloc(1*256, sizeof(float)); - Cnn_forwardFloat(cnn, xin_2, result); - - compareVectorsFloat(result, xout_2, 256, COMP_PREC); - - Cnn_predictFloat(cnn, xin_2, ppg_2, result); - - compareVectorsFloat(result, ppgf_2, 256, COMP_PREC); - - free(result); - - // Cnn_freezeModel(cnn); - - // fxp32* result_fxp = (fxp32*)calloc(1*256, sizeof(fxp32)); - // Cnn_forwardFxp(cnn, xin_2_fxp, result_fxp); - - // compareVectorsFxp(result_fxp, xout_2_fxp, 256, COMP_PREC_I32); - - // Cnn_predictFxp(cnn, xin_2_fxp, ppg_2_fxp, result_fxp); - - // compareVectorsFxp(result_fxp, ppgf_2_fxp, 256, COMP_PREC_I32); - - // free(result_fxp); - Cnn_destroy(cnn); -} - -int main() { - PRINTF("\033[1;93m====== Test CNN =========\n"); - PRINTF("\033[0m====== Test Same ========\n"); - test_same_layer1(); - PRINTF("\033[1;32m====== Test 1 passed ====\n"); - test_same_layer2(); - PRINTF("\033[1;32m====== Test 2 passed ====\n"); - test_same_layer3(); - PRINTF("\033[1;32m====== Test 3 passed ====\n"); - PRINTF("\033[0m====== Test Same end ====\n\n"); - PRINTF("\033[0m====== Test Valid =======\n"); - test_valid_layer1(); - PRINTF("\033[1;32m====== Test 1 passed ====\n"); - test_valid_layer2(); - PRINTF("\033[1;32m====== Test 2 passed ====\n"); - test_valid_layer3(); - PRINTF("\033[1;32m====== Test 3 passed ====\n"); - PRINTF("\033[0m====== Test Valid end ===\n\n"); +int main(int argc, char *argv[]) { PRINTF("\033[0m====== Test CNN =========\n"); test_cnn(); PRINTF("\033[1;32m====== Test CNN passed ==\n"); PRINTF("\033[0m====== Test CNN end =====\n\n"); return EXIT_SUCCESS; - PRINTF("\033[0m====== Comp TF ==========\n"); - PRINTF("\033[0m====== Sample 3 =========\n"); - // compare_tf_conv_3(); - PRINTF("\033[1;32m====== Comp TF passed ===\n"); - PRINTF("\033[0m====== Comp TF end ======\n"); - return EXIT_SUCCESS; } \ No newline at end of file diff --git a/sw/applications/l_cnn/test_cnnWeights.h b/sw/applications/l_cnn/test_cnnWeights.h index f3d6efb8e..f1723849e 100644 --- a/sw/applications/l_cnn/test_cnnWeights.h +++ b/sw/applications/l_cnn/test_cnnWeights.h @@ -5,84 +5,6 @@ #pragma GCC diagnostic ignored "-Wunused-variable" -//// Case 1 mode: same -static uint16_t xin1 = 3u; -static uint16_t yin1 = 5u; -static uint16_t xker1 = 1u; -static uint16_t yker1 = 3u; -static float input1[] = { 0.4488318299689684, -0.7634520066109527, 1.4589411306665614, -0.6241278873730751, 3.917730007820797, 4.636627605010293, -1.1655848117422227, 2.917250380826646, 0.28894919752904435, 0.6804456109393229, 4.25596638292661, -4.289639418021131, -4.1287070029845925, -4.797816025596743, 3.32619845547938,}; -static float kernel1[] = { 0.48813503927324753, 2.151893663724195, 1.027633760716439,}; -static float result1[] = { 0.18128931430783346, 0.07548016813612674, 2.12544361152851, 3.3950950613321194, 8.12587968919733, 8.779735260745717, 2.752950806946225, 6.005583772654533, 2.7450489586482236, 1.605292826573081, 4.750208805184423, -11.396160270199067, -15.908859470489649, -8.92164263255216, 4.815643266554799,}; -static int32_t input1_fxp[] = { 3765074, -6404299, 12238485, -5235564, 32864302, 38894852, -9777634, 24471670, 2423881, 5707991, 35701632, -35984104, -34634104, -40246996, 27902174,}; -static int32_t kernel1_fxp[] = { 4094773, 18051392, 8620417,}; -static int32_t result1_fxp[] = { 1520765, 633173, 17829514, 28480122, 68164816, 73649760, 23093426, 50378488, 23027140, 13466172, 39847640, -95597920, -133453184, -74840160, 40396544,}; - -//// Case 2 mode: same -static uint16_t xin2 = 5u; -static uint16_t yin2 = 5u; -static uint16_t xker2 = 3u; -static uint16_t yker2 = 3u; -static float input2[] = { 0.38816734003356945, -0.8080548559670522, 1.8521950039675952, -2.9554775026848255, 3.7811743639094537, -4.726124068020738, 1.704675101784022, -0.8269519763287301, 0.5868982844575168, -3.596130614047662, -3.0189851091512123, 3.007445686755366, 4.682615757193975, -1.8657582184075716, 1.9232261566931408, 3.7638915229603835, 3.9460666350384734, -4.149557886302221, -4.609452167671177, -3.3016958043543108, 3.7814250342941307, -4.016531661669499, -0.7889237499494781, 4.578895301505019, 0.33165284973017073,}; -static float kernel2[] = { -0.8297799529742598, 2.2032449344215808, -4.998856251826551, -1.9766742736816023, -3.5324410918288693, -4.0766140523120225, -3.137397886223291, -1.5443927295695223, -1.0323252576933006,}; -static float result2[] = { 7.462162004395981, 7.585168841573074, 2.4258572583235107, -3.23507218200551, -3.802254814609295, 16.197866692278705, -4.676687562908773, 1.9430859639149014, -26.523794841323774, 25.209671456054078, -30.416593954047023, -25.553426393591323, -22.263511994498945, 32.99627549623894, 8.044960659650508, -52.76130634645628, -23.585886440515488, 51.88998308860701, 15.39566943678927, 11.681919811790573, -8.41685670030438, 36.24364652628926, 2.6849419534286167, -6.1750827833904935, -13.672142251992868,}; -static int32_t input2_fxp[] = { 3256183, -6778455, 15537338, -24792342, 31718790, -39645604, 14299851, -6936976, 4923259, -30166530, -25325082, 25228282, 39280628, -15651114, 16133190, 31573810, 33102006, -34809016, -38666888, -27696632, 31720892, -33693108, -6617972, 38410556, 2782105,}; -static int32_t kernel2_fxp[] = { -6960699, 18482158, -41933444, -16581546, -29632264, -34197116, -26318402, -12955305, -8659772,}; -static int32_t result2_fxp[] = { 62597152, 63629008, 20349566, -27137752, -31895626, 135877552, -39230900, 16299786, -222497712, 211474048, -255152880, -214357680, -186759872, 276792832, 67486024, -442593920, -197852752, 435284736, 129148232, 97995048, -70605712, 304033728, 22522926, -51800348, -114690240,}; - -//// Case 3 mode: same -static uint16_t xin3 = 3u; -static uint16_t yin3 = 61u; -static uint16_t xker3 = 3u; -static uint16_t yker3 = 31u; -static float input3[] = { 1.5751238765904398, -2.8536425360004634, -0.8324655980466291, 1.43841934148195, 1.614813267154414, -3.295228665162293, 3.8165223574854856, 2.780081598084683, -3.6604579182830896, 3.6891662643207876, 2.4877787827685447, 2.9858565409908087, 0.43345230583902694, -2.791620849385381, 4.184586465896006, 0.9208457766637634, -1.5376209054485357, -2.3622147071998025, 4.13915477165132, -0.8026453869317933, 0.4019151566713086, 1.0844215780504962, 3.2624982843415786, 1.2356318456213078, -3.2328783841557396, 0.9125735265354589, -0.1073383300514088, 0.4790778009037666, 1.9952061979054818, -2.541888360958693, -3.133728544135198, -3.894168522038389, -2.2594074746394632, -4.897499606004091, 1.2935972282958215, -2.0482769478025755, -3.1271345529810226, -4.047119477391963, -2.1624419253474536, -2.8507561586930477, -2.1439626608909768, -0.2859014253032104, 0.49496821088408005, 3.4511311557470155, 4.885097007841644, -4.5113191377064865, -2.6788174856383606, 1.433114272482352, -3.385334371083233, 3.7014589336900148, -2.825975736949741, 2.4175503888344894, 1.5302051323110106, 2.988855115046296, -4.687524377347384, -2.7042597125565484, 2.0462749912045917, -4.124374909988772, -4.694105168056807, -1.428650663004393, 0.8978199147176378, -4.477772726158397, -4.343363337936225, -4.5649881394028435, -1.048492415324962, 1.6842704601605778, -3.019728881838304, 3.762664517041591, -0.6761090455515264, 1.1964146370996467, -2.0957325706501164, 1.1525457346427626, 4.536599623823227, -0.5198528347591038, -2.9295016174668573, -0.7463270809461351, -0.5542573523624226, 0.07617486671961338, 0.25733249810957926, -4.575708081174539, -3.3558091382988, -0.49772820382626737, 2.079711706189208, 2.7758790830553357, 2.7710299946964465, 0.02778752509221416, 4.567475933072476, -1.6812628746012415, -0.1854183765396673, 2.4837399934103637, 3.5783951843951396, -0.8553994486708847, 3.486845142673234, -0.5542905824929081, 2.1574708428635425, -4.915516364211802, -4.74798576329251, 4.404504935420885, -3.978620517688065, 1.6196779043970766, -2.167412561788792, -2.991576562245546, -1.1166581788524086, 4.259015965084993, 0.7027420975071905, 4.167146119578826, 2.0226424081972496, 0.012164516870869946, 0.06086833512638812, -2.8117921367062726, -4.953364772649501, -0.8216375599728423, 0.6337611033903512, 3.765618684233271, 1.7758484022790944, 3.4233304610347126, 4.2616128692492, 4.411158736336223, 3.1601915192439396, -3.686053945205562, -1.53369985009736, -2.914975054744775, 3.673815371565235, 2.923302193062579, -1.5099835931614058, 0.6914053205563544, 3.051315402557016, 3.3316124473397615, -2.94206323049928, 2.306802005261156, 3.0341416415944895, -4.568709636466373, -3.928125957684201, -0.6491635014117234, -2.791187781392559, 3.8062206616568073, 2.1968753772441127, 2.1300729354698102, 2.6985965699270196, -1.6925297383059523, -2.7138974816690107, 1.263439592010264, -0.8552075047461081, 4.36781072234186, 1.424524189027922, -1.1309965698763182, 3.5511966080419057, -1.1920741628379194, -3.216901722222798, 2.8165942566608138, -0.2779141258419857, -2.405195955957229, 1.92399245881231, 4.8048507899877855, -2.5374641103221016, 2.9024380259404525, 2.6233524422405887, -3.7996020782258597, 3.3890259455900065, -0.3826109524346837, -3.7421861744691243, 0.36343473096068557, -2.03487249205371, -3.242758067432694, -4.207618292992004, -3.0110090817050086, -0.7041138094358619, 1.4181176362883168, -4.126208212102036, 2.1573058777096517, -3.9825518903661794, -3.244009290911781, 2.133215621574106, 3.852636744104892, 4.8313836104307235, 1.529069507012145, -0.22263646993624597, -4.122124451150055, 0.5895079469727671, -3.1884977059135355, 0.1440206047122059, 1.0893014893439155, 3.3321249012260417,}; -static float kernel3[] = { -0.6400509785799624, -4.740737681721087, 0.49662477878709144, -0.6467760738172315, -0.7963219791251097, -1.6966517899612588, -2.953513659621575, 1.192709663506637, -2.0034532632547686, -2.3317272489713337, 1.2113383276929488, 0.2914209427703902, -3.6542005465506646, 0.13578121265746468, -3.155601343530847, 2.8533514781667346, 3.539752926394888, -0.05763162618072215, 3.46561485357468, -4.20354522990939, 0.05246090121703961, -4.347134956131219, -0.7187767240261058, -4.034690843393874, -3.7284002829872254, 0.9674530897859581, -2.7398799939576413, -3.9305431569001703, -2.796937929294403, -1.5017371499670071, -0.32212515417699805, -2.9825677373503465, 1.4040672521491482, -0.1693016444824833, 0.05236720018549157, -1.131073488814407, 2.936374544415771, 0.8000417887780662, -3.3770140149768615, 2.0075234660715626, 4.645510800892552, 8.361170216719671e-05, 3.89520063946145, -1.5838634732889032, 0.6714412762770925, -0.7245403670388999, -0.63252736973201, 2.7655918499710026, 0.35604173497656344, 4.537422269448667, 0.4420816014810214, -4.179050777249752, -1.336575983249796, 3.5085050400450175, -0.9372495695204917, -4.727976341051504, -2.528227610026467, -4.328556292630453, 4.938520114212729, 4.7058031337717345, 3.0025835113258683, 1.0181712140546741, 2.6495986045168154, -3.3077455341582205, -2.069767681805602, 0.240668753003642, -1.433757188772411, -4.543210347545999, 4.831534453572127, -0.5864508064515324, 0.040004393791526205, -1.7645868246530116, -2.4025524725286296, -1.1311011467923757, 3.320168996345009, 2.3674705628711292, -1.2078943305836254, -4.869826633054494, 2.974049390325936, -2.306112024036988, 0.826848885290346, -4.744490581503072, 1.6220201926832578, -1.1247657412671854, -0.029262012654873715, -0.8509416262531282, -1.491280986983595, 0.509779053244193, 4.729106898748249, -3.872237847619285, -1.8674147157028642, -4.5820229018469, 2.383997586209004,}; -static float result3[] = { 20.938379585920273, 8.02302877465514, -16.857694443813095, 4.581030951067322, -47.684895622509956, 25.342934266840597, 68.36750478864711, 2.5676380052143335, 29.055295803968534, -28.04374791458958, 41.83114391463987, -26.50924062032297, -30.51934104241301, 84.46348958070554, 40.264328143733174, 34.31532066976898, -2.511563242949011, -118.27980904392957, -13.571811349883715, -32.09344069285111, 71.49481770295496, -22.06395380628139, -21.131452617454165, -38.37649565949362, -0.308430518672953, 6.564041191235514, 12.525132993990898, 31.79873177667102, 50.146841838223985, -48.03904602832188, -9.240245071834234, 81.75909358754869, 22.224903966714834, -102.75317844603369, -36.0440186033606, 0.4617001857888705, -51.90829635455982, 19.583409528784266, -118.55050489358963, -26.758792800151088, 36.963275595009, -17.143593036238077, -157.05536586299988, -143.16188227276268, -29.809500883012944, 38.09167352342915, 41.28100796765678, -9.532238623212233, 83.53722296123598, 29.477448838273464, -1.3946555128049916, -19.694261623611865, -106.38617304550061, 78.21896118647508, 3.5221474774463024, -37.63561922915278, -20.289313489154242, -22.60404782786653, -0.9210687069457979, 26.983132426984596, 2.419086747570052, -86.98459645783491, -145.07037665385383, -46.325015600230536, 18.816364649088612, 34.3594532282602, -89.6152853592831, -83.96484146396111, -66.0178965718038, 71.80911903889299, 29.92814314806981, 34.92300062419566, -23.130910037195996, -75.96390223746815, -21.00109946515495, -71.85887873345892, -22.328707246431552, 57.26708051045838, -12.24282331844404, -7.1714798944457945, -30.534493818296973, 27.523283778596163, -70.57989303251122, 58.15567049761824, 33.94735267374952, 38.62037691759168, 39.95618959955201, 95.21166639406364, -7.204825910354527, 49.21666388406004, 101.77680517404558, 99.64834752754254, 9.278473282359952, -16.02033177437054, 36.914168164198315, 6.687573994493457, -9.177060033952387, -36.05265657393037, -60.868336303905366, -22.917308368712966, 101.7498445942521, 100.61491816352485, 33.81388073239066, 210.15189920300776, 103.46315808071684, -60.18392583924689, -59.36631256697667, 11.274377955219325, -113.55011114921368, 92.37440604721446, 182.30644467551863, 34.14469184281109, 95.75779516619747, 29.04656936675967, 62.81447648353387, 60.8592166174747, 20.61627755180531, -6.857905961501912, -66.21411470934667, -56.966329923220634, 44.929314745232105, 60.345150906171696, 0.7796203920941505, 132.41714347101967, -30.52398067257131, 27.23418453373192, 32.7731215542053, 6.408595431273242, 49.27582417569214, 63.438697156687184, 23.039260818042102, 39.783514258951904, 50.77600710118369, 31.393895099039494, 15.080702964409335, -60.97851929404653, 34.34640168372713, -22.530726723004733, -124.39234555627823, -30.170425348290156, 6.38116061686459, -12.741724457526818, 15.305365422729299, 60.98560408737588, -26.56097389044882, -8.254757881856829, 6.138376489617752, -56.23954121746415, 43.581883038015484, 40.86606982571536, 37.993522967048726, -4.626738134589028, 2.5182535809600917, -0.4150455049801065, -54.0913458236004, 94.63587924530772, -108.43645668517142, 6.520016156047285, -65.42753241808275, 60.454244741479215, 53.104516440459065, -100.63167603344571, 44.0241053142488, -11.781082893294116, -124.76747909867008, -45.95018405728458, 25.025227680595158, -120.48169114286, -13.862453835535531, -65.28234182954981, -91.37730985309311, 39.48321142614181, -9.869784175782478, 25.04695690346387, 0.9868548102024803, 2.766199922778613, 23.602380623388747, -12.446914947467487, 23.768689599761093, 32.84198722546352, -22.20616112949299, -28.841533549735036, -19.479887199191595,}; -static int32_t input3_fxp[] = { 13213097, -23938088, -6983227, 12066336, 13546035, -27642382, 32015310, 23321014, -30706146, 30946970, 20869000, 25047180, 3636061, -23417812, 35102856, 7724614, -12898499, -19815694, 34721748, -6733077, 3371508, 9096788, 27367820, 10365231, -27119350, 7655221, -900419, 4018795, 16737003, -21322906, -26287620, -32666654, -18953284, -41083204, 10851480, -17182192, -26232306, -33949700, -18139878, -23913876, -17984862, -2398315, 4152094, 28950186, 40979164, -37843688, -22471550, 12021834, -28398242, 31050088, -23706002, 20279882, 12836291, 25072334, -39321804, -22684974, 17165398, -34597764, -39377008, -11984390, 7531459, -37562280, -36434772, -38293896, -8795392, 14128685, -25331322, 31563518, -5671613, 10036253, -17580280, 9668254, 38055756, -4360841, -24574440, -6260645, -4649447, 639001, 2158661, -38383820, -28150568, -4175246, 17445886, 23285762, 23245084, 233098, 38314764, -14103455, -1555402, 20835122, 30017754, -7175610, 29249778, -4649726, 18098178, -41234340, -39828992, 36947664, -33375088, 13586843, -18181574, -25095164, -9367208, 35727216, 5895028, 34956556, 16967154, 102043, 510600, -23587022, -41551836, -6892395, 5316373, 31588300, 14896896, 28716978, 35749000, 37003480, 26509608, -30920862, -12865607, -24452584, 30818198, 24522436, -12666660, 5799928, 25596288, 27947590, -24679816, 19350858, 25452224, -38325116, -32951508, -5445578, -23414180, 31928894, 18428726, 17868346, 22637468, -14197969, -22765822, 10598499, -7174000, 36639852, 11949775, -9487487, 29789596, -9999843, -26985328, 23627306, -2331312, -20176246, 16139619, 40306008, -21285792, 24347414, 22006276, -31873372, 28429210, -3209573, -31391732, 3048711, -17069748, -27202226, -35296060, -25258174, -5906534, 11896033, -34613144, 18096794, -33408066, -27212722, 17894710, 32318260, 40528584, 12826765, -1867610, -34578888, 4945151, -26747058, 1208132, 9137723, 27951890}; -static int32_t kernel3_fxp[] = { -5369137, -39768192, 4165990, -5425551, -6680033, -14232547, -24775868, 10005174, -16806184, -19559946, 10161442, 2444616, -30653656, 1139015, -26471102, 23935648, 29693600, -483449, 29071684, -35261892, 440073, -36466412, -6029536, -33845440, -31276088, 8115584, -22983780, -32971786, -23462416, -12597484, -2702181, -25019592, 11778170, -1420205, 439287, -9488132, 24632094, 6711237, -28328446, 16840328, 38969368, 701, 32675312, -13286410, 5632457, -6077885, -5306024, 23199466, 2986694, 38062656, 3708449, -35056420, -11212012, 29431474, -7862219, -39661140, -21208310, -36310560, 41427308, 39475136, 25187496, 8541039, 22226444, -27747380, -17362470, 2018875, -12027227, -38111212, 40529848, -4919506, 335581, -14802427, -20154070, -9488364, 27851596, 19859782, -10132552, -40851068, 24948134, -19345070, 6936111, -39799672, 13606492, -9435219, -245467, -7138215, -12509772, 4276336, 39670624, -32482686, -15665010, -38436792, 19998422}; -static int32_t result3_fxp[] = { 175643856, 67302040, -141412592, 38428472, -400009888, 212591936, 573508224, 21538908, 243733488, -235248016, 350905056, -222375632, -256014784, 708531136, 337761664, 287857760, -21068520, -992202944, -113848608, -269219296, 599742016, -185085856, -177263472, -321925376, -2587302, 55063168, 105068432, 266747088, 420662208, -402980736, -77512792, 685844992, 186436000, -861956160, -302359136, 3873021, -435438336, 164277552, -994473728, -224469024, 310070432, -143810880, -1317475840, -1200928896, -250060224, 319536128, 346290208, -79962216, 700761024, 247274768, -11699218, -165207440, -892431872, 656148224, 29545914, -315710464, -170199104, -189616496, -7726484, 226350928, 20292770, -729679680, -1216938496, -388602400, 157843104, 288228000, -751747520, -704348160, -553798272, 602378560, 251055456, 292955360, -194036144, -637231424, -176169984, -602795968, -187306768, 480391104, -102700248, -60158732, -256141904, 230882032, -592067072, 487845120, 284771040, 323971200, 335176800, 798693376, -60438460, 412859296, 853765696, 835910912, 77833472, -134388288, 309658496, 56099436, -76982760, -302431616, -510600608, -192244320, 853539584, 844019136, 283651392, 1762881920, 867911872, -504859360, -498000736, 94576336, -952527360, 774892672, 1529297280, 286426432, 803274624, 243660288, 526926016, 510524096, 172941872, -57528284, -555444224, -477868224, 376894400, 506211808, 6539930, 1110795520, -256053712, 228456896, 274920864, 53759196, 413355584, 532162368, 193267328, 333728320, 425940032, 263351072, 126506104, -511524896, 288118496, -189001440, -1043478656, -253087872, 53529056, -106885328, 128390712, 511584320, -222809600, -69245928, 51492436, -471771456, 365591328, 342809440, 318712768, -38811892, 21124642, -3481654, -453751104, 793863296, -909630912, 54693860, -548845952, 507126976, 445472960, -844159680, 369300960, -98826888, -1046625472, -385458080, 209926832, -1010673664, -116286688, -547627968, -766528448, 331209184, -82793752, 210109104, 8278338, 23204566, 197991120, -104412288, 199386224, 275498560, -186278784, -241940320, -163409136,}; - -//// Case 4 mode: valid -static uint16_t xin4 = 5u; -static uint16_t yin4 = 5u; -static uint16_t xker4 = 3u; -static uint16_t yker4 = 3u; -static uint16_t xout4 = 3u; -static uint16_t yout4 = 3u; -static float input4[] = { -0.5919015634936358, -4.701237891214331, -0.43166775605288876, 1.4914404761476074, -2.215127173520247, 1.7625490198013125, 0.9086281741635087, -4.760181176228347, 0.5885408799088196, -2.407475530925346, -0.8489880298993038, -2.164749182286813, 1.9313791831299634, -0.5954628232926051, -3.4313226152503673, 0.4464901803184471, 2.80314764511367, -1.9363646762382025, -2.780421160678186, -1.1202874244435126, 4.363836498604304, 4.759954224729338, 1.7238367591281367, 4.02834108538398, 3.4575087129317925,}; -static float kernel4[] = { 0.5079790257457546, 2.081478226181048, -2.0909526108705565, 0.10827605197663015, 3.9294695434765465, 3.9629308893343804, -3.7441468953616375, -2.9275712186181324, -4.485327966991701,}; -static float result4[] = { -23.43327726779893, -17.55769120994077, 9.675929089779544, 10.602713570309055, 1.9624163946072903, 8.526826051029296, -43.59194373770315, -55.09548172766729, -42.41408763118508,}; -static int32_t input4_fxp[] = { -4965230, -39436840, -3621091, 12511110, -18581834, 14785333, 7622125, -39931292, 4937038, -20195368, -7121828, -18159232, 16201583, -4995104, -28784020, 3745431, 23514506, -16243404, -23323864, -9397652, 36606512, 39929392, 14460591, 33792176, 29003686,}; -static int32_t kernel4_fxp[] = { 4261237, 17460704, -17540182, 908285, 32962780, 33243474, -31408180, -24558248, -37625660,}; -static int32_t result4_fxp[] = { -196572576, -147284592, 81167576, 88942008, 16461942, 71528200, -365675712, -462174400, -355795168,}; - -//// Case 5 mode: valid -static uint16_t xin5 = 3u; -static uint16_t yin5 = 5u; -static uint16_t xker5 = 1u; -static uint16_t yker5 = 3u; -static uint16_t xout5 = 3u; -static uint16_t yout5 = 3u; -static float input5[] = { 2.148159936743647, 1.9772882459727086, -2.839105044196236, 4.762744547762418, -4.937697447954101, -2.4701763761655604, -0.6520846759555425, 2.7938292179375246, -3.0231492539974694, 3.6299323559922225, 4.834006771753128, -3.361577585953013, 0.9733394393285923, -4.91013902332445, -1.1342871735637061,}; -static float kernel5[] = { 4.670298390136766, 0.4723224917572235, 4.726843599648843,}; -static float result5[] = { -2.453539901299475, 30.406301525520632, -34.34963992380085, 1.3615387453760093, -16.015795336628752, 28.77823723432869, 25.58932864147245, -38.44929949447413, -3.1349815483414165,}; -static int32_t input5_fxp[] = { 18020072, 16586696, -23816140, 39952796, -41420408, -20721342, -5470082, 23436338, -25360014, 30450080, 40550588, -28198956, 8164963, -41189232, -9515090,}; -static int32_t kernel5_fxp[] = { 39177304, 3962128, 39651640,}; -static int32_t result5_fxp[] = { -20581784, 255066544, -288145664, 11421415, -134350224, 241409344, 214658848, -322536096, -26298132,}; - -//// Case 6 mode: valid -static uint16_t xin6 = 3u; -static uint16_t yin6 = 31u; -static uint16_t xker6 = 3u; -static uint16_t yker6 = 1u; -static uint16_t xout6 = 1u; -static uint16_t yout6 = 31u; -static float input6[] = { 4.186109079379216, -0.11588811205170835, 1.1174386290264575, 2.6590785648031554, 0.18417987872943264, -2.0319949842377802, -3.122787713387484, -4.192587312351251, 2.3844029619897, -0.5869077710404689, -3.416901322873488, 3.7993703120127886, -2.259135380077754, -0.8576498091894873, -2.0392006726635206, 1.287879088794833, 0.7983781018954508, 0.999291966249876, -2.341808824644928, -2.153141193586362, -2.464117942262125, -1.7243605231126589, -3.5583569934657957, -3.343871387987324, 4.6393052906794185, 4.602267152856939, -3.1158534440406482, -4.75693438370513, -2.9544445362004934, 1.9984361412655751, 2.7951458555552975, -4.770669075609185, 0.77662858129756, -4.983578272839547, 0.15472611905393485, 1.3979517613085708, 4.856244028041889, -2.4090240358889425, 3.024968852628703, 3.704830870014831, 4.227496139456699, -4.977857874675564, -0.30511628242422706, 4.814687376060105, -1.0105519609714952, 3.137324775869182, 0.46456497972477706, 2.7085408714022456, -0.15068925011541978, -4.708884363272823, -4.134743115977949, -3.885461876219767, -2.4875488830945236, 4.64915292526365, 1.3176605273774546, 3.166602026153619, 0.6608199609275633, 1.353562055818931, 3.1190239118041063, 4.2668261524387425, 4.126267637037831, 3.2481072043852546, -4.057972678226774, -1.389515815361909, -4.6449096823966975, 0.46358348540806915, 2.9614272085600426, -4.488571968986529, -3.113322642068921, -1.345222320878352, -2.557091330420916, 2.9508747292103195, -1.4790506423363148, 1.3887768206036535, -0.06584948103240329, 0.8349974372005651, 4.392993519648517, 4.4354008201587956, -3.883075728164286, 3.435549661102238, -1.5397184824404766, -3.9917272713735485, -1.1659093393288478, 0.10354797339247934, 4.6110308196512975, -1.2848738467199272, -4.876305883996423, 3.597068869881916, -3.888892504596347, -0.2166095601321505, 3.499800323861935, 0.14737967000679042, -0.5339217207768678,}; -static float kernel6[] = { -2.780068289102605, 3.7073230617737636, -2.932808446605736,}; -static float result6[] = { -17.42282404360473, 7.276573635261671, -7.959660008373655, -8.178401848526025, -4.014672567672883, 36.816872081897245, 8.881311672638013, 26.815495210093946, 14.605660913908213, 8.649987283561435, -4.617536065501945, -15.76668994325529, 24.323276089618997, -3.8110050890636913, 4.416385118733144, -14.86628034137447, 19.210207686882818, -13.412562703854679, -6.431267771954796, 2.3640225039054252, -4.134857509100443, -4.731993703294383, 13.605117160950932, 17.9494729397995, 13.143302281753002, -20.894257853421145, 25.085753963769875, 25.42310605950678, 13.767816611263237, 9.309381884658203, 5.836976522005776,}; -static int32_t input6_fxp[] = { 35115628, -972139, 9373755, 22305968, 1545012, -17045610, -26195842, -35169972, 20001822, -4923339, -28663046, 31871428, -18951002, -7194488, -17106056, 10803513, 6697281, 8382668, -19644516, -18061858, -20670520, -14464984, -29849662, -28050426, 38917312, 38606616, -26137674, -39904056, -24783678, 16764097, 23447382, -40019272, 6514832, -41805284, 1297936, 11726869, 40737128, -20208358, 25375278, 31078374, 35462808, -41757300, -2559501, 40388524, -8477124, 26317788, 3897053, 22720888, -1264073, -39500984, -34684740, -32593616, -20867072, 38999920, 11053338, 26563384, 5543359, 11354501, 26164268, 35792732, 34613640, 27247098, -34040744, -11656103, -38964328, 3888820, 24842252, -37652872, -26116444, -11284543, -21450436, 24753732, -12407176, 11649904, -552385, 7004466, 36851100, 37206840, -32573600, 28819480, -12916095, -33485036, -9780356, 868623, 38680132, -10778303, -40905420, 30174400, -32622394, -1817052, 29358452, 1236310, -4478860,}; -static int32_t kernel6_fxp[] = { -23320904, 31099280, -24602180,}; -static int32_t result6_fxp[] = { -146153248, 61040324, -66770468, -68605408, -33677516, 308842304, 74501840, 224944672, 122521160, 72561352, -38734700, -132260584, 204038432, -31969028, 37047324, -124707400, 161146896, -112512728, -53949384, 19830858, -34685700, -39694840, 114127992, 150571088, 110254008, -175273744, 210434560, 213264464, 115492816, 78092752, 48964108,}; - //// Test full cnn static float inp[] = { 0.10687026183436554, 0.6433775638335413, 1.5719371774211934, 1.8606301880573626, 1.079730707565259, 1.0363964731573039, 0.8401961894177536, 0.8062113303254894, 1.0693255244711337, 1.8973979155425758, -0.5051419304161229, -1.6677822278462204, -1.0414382415511003, -1.1140689598280193, -0.5456008127835759, 1.241256928449407, -1.7596153205346008, -0.20102576841377573, 1.2526821099333798, -0.9430465018374048, -1.7463960721065255, -1.0315693109762605, -1.6597181529991376, 1.231109741389408, -1.3189996974476714, -1.2186214977502452, 1.2585680462314413, 1.2411421167299825, 0.35749551293497017, 1.658937360469848, -1.7607134529814616, 1.859986561081329, 0.28390086742322307, -0.7899275499984229, 1.3028233059934027, 0.637669121413285, 1.9460057656135836, -1.5700418656996042, 0.3236741105335814, -0.10868737977085274, 0.6090831439508024, -1.0325637836102834, -1.8754797463263757, 0.17692941098474302, -0.54115928483046,}; static float ke1[] = { 1.2019056830022787, -1.9184344862073974, 0.2904745952311969, -0.3544655368824938, 1.9405471907832181, 1.2056061216442804, -1.7841515914194126, -1.2380889061779894, -0.19032461673209822, 0.8117683084719127, -0.6718074018572904, -0.5600672193147695, 1.685882263836708, 1.8145220229093884, -0.36925708529142565,}; diff --git a/sw/applications/l_cnn/utils.h b/sw/applications/l_cnn/utils.h index c90bcfdec..afdfc6bd2 100644 --- a/sw/applications/l_cnn/utils.h +++ b/sw/applications/l_cnn/utils.h @@ -37,6 +37,26 @@ void assert_closei32(int32_t a, int32_t b, int32_t prec, int idx) { } } +void* my_calloc(size_t num, size_t size) { + void *ptr = calloc(num, size); + if (ptr != NULL) { + printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + num * size - 1); + } else { + printf("Failed to allocate memory\n"); + } + return ptr; +} + +void* my_malloc(size_t size) { + void *ptr = malloc(size); + if (ptr != NULL) { + printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + size - 1); + } else { + printf("Failed to allocate memory\n"); + } + return ptr; +} + // Vector export for plots #if defined(SIMULATION) || defined(TARGET) #define VECTOR_EXPORT(...) diff --git a/sw/applications/l_cnn_static/cnn.c b/sw/applications/l_cnn_static/cnn.c new file mode 100644 index 000000000..60cc088da --- /dev/null +++ b/sw/applications/l_cnn_static/cnn.c @@ -0,0 +1,81 @@ +// clang-format off + +#include "cnn.h" + +#include +// clang-format on + +// CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, +// Conv2DPadding layer2Pad) { +// CnnHandle self = (CnnHandle)malloc(sizeof(Cnn)); +// // Create layers +// self->layer1 = Conv2DLayer_create(layer1Dim, layer1Pad); +// self->layer2 = Conv2DLayer_create(layer2Dim, layer2Pad); + +// self->inputDim = inputDim; +// // TODO: calculate +// self->outputDim = (Dim2D){1u, 256u}; +// return self; +// } + +// void Cnn_destroy(CnnHandle self) { +// Conv2DLayer_destroy(self->layer1); +// Conv2DLayer_destroy(self->layer2); +// free(self); +// } + +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output) { +#ifdef DYN_ALLOCATION + fxp32* layer1Output = (fxp32*)calloc(self->inputDim.x * self->inputDim.y, sizeof(fxp32)); + Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1Output, output); + free(layer1Output); +#else + Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, self->layer1OutputFxp); + Conv2DLayer_forwardFxp(self->layer2, self->inputDim, self->layer1OutputFxp, output); +#endif +} + +void Cnn_forwardFloat(CnnHandle self, float* input, float* output) { +#ifdef DYN_ALLOCATION + float* layer1Output = (float*)calloc(self->inputDim.x * self->inputDim.y, sizeof(float)); + Conv2DLayer_forwardFloat(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFloat(self->layer2, self->inputDim, layer1Output, output); + free(layer1Output); +#else + Conv2DLayer_forwardFloat(self->layer1, self->inputDim, input, self->layer1Output); + Conv2DLayer_forwardFloat(self->layer2, self->inputDim, self->layer1Output, output); +#endif +} + +void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output) { + Cnn_forwardFxp(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} + +void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output) { + Cnn_forwardFloat(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} + +// float Cnn_sampleLoss(CnnHandle self, complex_t* ypred, complex_t* ytrue) { +// float loss = 0.0f; +// // NOTE: could be optimized by reusing .r and .i for the abs +// fxpMul* abs = (fxpMul*)calloc(self->outputDim.y * self->outputDim.x, sizeof(fxpMul)); +// for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { +// abs[i] = fxp32_pow2(ytrue[i].r - ypred[i].r) + fxp32_pow2(ytrue[i].i - ypred[i].i); +// } +// for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { +// loss += fxp32_fxpMulToFloat(abs[i]); +// } +// return loss; +// } + +void Cnn_freezeModel(CnnHandle self) { + Conv2DLayer_transformWeightsToFxp(self->layer1); + Conv2DLayer_transformWeightsToFxp(self->layer2); +} \ No newline at end of file diff --git a/sw/applications/l_cnn_static/cnn.h b/sw/applications/l_cnn_static/cnn.h new file mode 100644 index 000000000..857c5a065 --- /dev/null +++ b/sw/applications/l_cnn_static/cnn.h @@ -0,0 +1,42 @@ +#ifndef CNN_H +#define CNN_H + +#include "conv2dlayer.h" +#include "fxp32.h" + +/** + * @brief a two layer cnn model + */ +typedef struct __Cnn { + Conv2DLayerHandle layer1; + Conv2DLayerHandle layer2; + Dim2D inputDim; + Dim2D outputDim; +#ifndef DYN_ALLOCATION + float* layer1Output; + fxp32* layer1OutputFxp; +#endif +} Cnn; + +typedef struct __Cnn* CnnHandle; + +typedef struct __complex_t { + int32_t r; + int32_t i; +} complex_t; + +// CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, +// Conv2DPadding layer2Pad); +// void Cnn_destroy(CnnHandle self); + +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output); +void Cnn_forwardFloat(CnnHandle self, float* input, float* output); + +// void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); +// void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output); + +// float Cnn_sampleLoss(CnnHandle self, complex_t* ypred, complex_t* ytrue); + +void Cnn_freezeModel(CnnHandle self); + +#endif // CNN_H \ No newline at end of file diff --git a/sw/applications/l_cnn_static/conv2dlayer.c b/sw/applications/l_cnn_static/conv2dlayer.c new file mode 100644 index 000000000..38bcf2e18 --- /dev/null +++ b/sw/applications/l_cnn_static/conv2dlayer.c @@ -0,0 +1,161 @@ +#include "conv2dlayer.h" + +#include +#include +#include + +// Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding) { +// Conv2DLayerHandle self = (Conv2DLayerHandle)malloc(sizeof(Conv2DLayer)); +// self->dim = dim; +// self->padding = padding; +// self->weightsFxp = (fxp32*)calloc(dim.x * dim.y, sizeof(fxp32)); +// self->weightsFloat = (float*)calloc(dim.x * dim.y, sizeof(float)); +// return self; +// } + +// void Conv2DLayer_destroy(Conv2DLayerHandle self) { +// free(self->weightsFxp); +// free(self->weightsFloat); +// free(self); +// } + +bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights) { + memcpy(self->weightsFxp, weights, self->dim.x * self->dim.y * sizeof(fxp32)); + // TODO: make sure we get the right size of weights + return true; +} + +bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights) { + memcpy(self->weightsFloat, weights, self->dim.x * self->dim.y * sizeof(float)); + // TODO: make sure we get the right size of weights + return true; +} + +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self) { + for (int i = 0; i < self->dim.x * self->dim.y; ++i) { + self->weightsFxp[i] = fxp32_fromFloat(self->weightsFloat[i]); + } +} + +// Could be optimized +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid) { + + // float max = 0; + // float min = 0; + + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + fxp32 sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = 0; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + // printf("input access: %d, %d\n", m, n); + in = input[m * iny + n]; + // printf("kernel access: %d, %d\n", m - i + cx, n - j + + // cy); + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += fxp32_mul(w, in); + // if (sum > max) { + // max = sum; + // } + // if (sum < min) { + // min = sum; + // } + // printf("factor: %.2f\n", w * in); + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + // printf("access output: %d\n", (i-cx)*(iny - kery + 1) + (j-cy)); + // printf("sum = %.2f\n", sum); + } + } + // printf("max: %.6f, min: %.6f\n", max, min); +} + +// Could be optimized +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid) { + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + float sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = .0f; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + in = input[m * iny + n]; + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += w * in; + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + } + } +} + +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output) { + convolve2DFxp(input, output, self->weightsFxp, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); +} + +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output) { + convolve2DFloat(input, output, self->weightsFloat, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); +} \ No newline at end of file diff --git a/sw/applications/l_cnn_static/conv2dlayer.h b/sw/applications/l_cnn_static/conv2dlayer.h new file mode 100644 index 000000000..ce5254d48 --- /dev/null +++ b/sw/applications/l_cnn_static/conv2dlayer.h @@ -0,0 +1,47 @@ +#ifndef CONV2DLAYER_H +#define CONV2DLAYER_H + +#include +#include + +#include "fxp32.h" + +// TODO: perhaps we could optimize but we need 256 as value... +/** + * @brief a 2D dimension + * @param x the rows of a matrix + * @param y the columns of a matrix + */ +typedef struct __Dim2D { + uint16_t x; + uint16_t y; +} Dim2D; + +typedef enum __Conv2DPadding { + VALID, + SAME +} Conv2DPadding; + +typedef struct __Conv2DLayer { + Dim2D dim; + Conv2DPadding padding; + fxp32* weightsFxp; + float* weightsFloat; +} Conv2DLayer; + +typedef struct __Conv2DLayer* Conv2DLayerHandle; + +// Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding); +// void Conv2DLayer_destroy(Conv2DLayerHandle self); + +bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights); +bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights); +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self); + +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output); +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output); + +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid); +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid); + +#endif // CONV2DLAYER_H \ No newline at end of file diff --git a/sw/applications/l_cnn_static/fxp32.c b/sw/applications/l_cnn_static/fxp32.c new file mode 100644 index 000000000..2755f99a9 --- /dev/null +++ b/sw/applications/l_cnn_static/fxp32.c @@ -0,0 +1,56 @@ +#include "fxp32.h" + +fxp32 fxp32_fromFloat(float f) { + return (fxp32)(f * (1 << FRACTIONAL_BITS)); +} + +float fxp32_toFloat(fxp32 x) { + return (float)x / (1 << FRACTIONAL_BITS); +} + +float fxp32_fxpMulToFloat(fxpMul x) { + return (float)x / ((fxpMul)1 << FRACTIONAL_BITS); +} + +fxp32 fxp32_fromInt(int i) { + return i << FRACTIONAL_BITS; +} + +fxp32 fxp32_mul(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS); +} + +fxpMul fxp32_mul64(fxp32 a, fxp32 b) { + return ((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS; +} + +fxpMul fxp32_pow2(fxp32 a) { + return ((fxpMul)a * (fxpMul)a) >> FRACTIONAL_BITS; +} + +fxp32 fxp32_div(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a << FRACTIONAL_BITS) / b); +} + +bool fxp32_closefxp(fxp32 a, fxp32 b, fxp32 prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= prec; +} + +fxp32 fxp32_sqrt(fxp32 a) { + fxp32 guess = a >> 1; + fxp32 eps = fxp32_fromFloat(0.0001); + while (fxp32_closefxp(guess, fxp32_div(a, guess), eps) == false) { + guess = (guess + fxp32_div(a, guess)) >> 1; + } + return guess; +} + +bool fxp32_close(fxp32 a, fxp32 b, float prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= fxp32_fromFloat(prec); +} diff --git a/sw/applications/l_cnn_static/fxp32.h b/sw/applications/l_cnn_static/fxp32.h new file mode 100644 index 000000000..878d82855 --- /dev/null +++ b/sw/applications/l_cnn_static/fxp32.h @@ -0,0 +1,26 @@ +#ifndef FXP_H +#define FXP_H + +#include +#include + +#define FRACTIONAL_BITS 23 +typedef int32_t fxp32; +typedef int64_t fxpMul; + +fxp32 fxp32_fromFloat(float f); +float fxp32_toFloat(fxp32 x); +float fxp32_fxpMulToFloat(fxpMul x); +fxp32 fxp32_fromInt(int i); + +fxp32 fxp32_mul(fxp32 a, fxp32 b); +fxpMul fxp32_mul64(fxp32 a, fxp32 b); +fxpMul fxp32_pow2(fxp32 a); + +fxp32 fxp32_div(fxp32 a, fxp32 b); + +fxp32 fxp32_sqrt(fxp32 a); + +bool fxp32_close(fxp32 a, fxp32 b, float prec); + +#endif // FXP_H \ No newline at end of file diff --git a/sw/applications/l_cnn_static/main.c b/sw/applications/l_cnn_static/main.c new file mode 100644 index 000000000..c82867e17 --- /dev/null +++ b/sw/applications/l_cnn_static/main.c @@ -0,0 +1,83 @@ +#include +#include +#include + +#include "cnn.h" +#include "fxp32.h" +#include "utils.h" + +// #define DYN_ALLOCATION +#define COMP_PREC 0.0001f +#define COMP_PREC_I32 512 + +void compareVectorsFloat(float* a, float* b, int size, float prec) { + for (int i = 0; i < size; ++i) { + assert_closef(a[i], b[i], prec); + } +} + +void compareVectorsFxp(fxp32* a, fxp32* b, int size, float prec) { + for (int i = 0; i < size; ++i) { + assert_closei32(a[i], b[i], prec); + } +} + +// clang-format off +#ifndef DYN_ALLOCATION + +// Create all arays statically +float inp[] = { 0.10687026183436554, 0.6433775638335413, 1.5719371774211934, 1.8606301880573626, 1.079730707565259, 1.0363964731573039, 0.8401961894177536, 0.8062113303254894, 1.0693255244711337, 1.8973979155425758, -0.5051419304161229, -1.6677822278462204, -1.0414382415511003, -1.1140689598280193, -0.5456008127835759, 1.241256928449407, -1.7596153205346008, -0.20102576841377573, 1.2526821099333798, -0.9430465018374048, -1.7463960721065255, -1.0315693109762605, -1.6597181529991376, 1.231109741389408, -1.3189996974476714, -1.2186214977502452, 1.2585680462314413, 1.2411421167299825, 0.35749551293497017, 1.658937360469848, -1.7607134529814616, 1.859986561081329, 0.28390086742322307, -0.7899275499984229, 1.3028233059934027, 0.637669121413285, 1.9460057656135836, -1.5700418656996042, 0.3236741105335814, -0.10868737977085274, 0.6090831439508024, -1.0325637836102834, -1.8754797463263757, 0.17692941098474302, -0.54115928483046,}; +float ke1[] = { 1.2019056830022787, -1.9184344862073974, 0.2904745952311969, -0.3544655368824938, 1.9405471907832181, 1.2056061216442804, -1.7841515914194126, -1.2380889061779894, -0.19032461673209822, 0.8117683084719127, -0.6718074018572904, -0.5600672193147695, 1.685882263836708, 1.8145220229093884, -0.36925708529142565,}; +float ke2[] = { 1.594284619538893, -0.6789869921619749, -1.6690457236800493,}; +float res[] = { -5.646923391743088, -12.486867124009787, 1.116749990770437, 3.790885823459984, -7.655608446879853, -5.758159525901699, -16.320022556866068, 10.253133245571195, -8.825685721563698, -3.9221345076618483, -11.844371518436887, 16.970633341131517, 5.631829596306385, 4.730155342366402, 6.236770944902686,}; +int32_t inp_fxp[] = { 896492, 5397042, 13186365, 15608097, 9057438, 8693924, 7048076, 6762991, 8970153, 15916527, -4237437, -13990371, -8736217, -9345488, -4576831, 10412418, -14760723, -1686326, 10508259, -7910847, -14649832, -8653431, -13922725, 10327297, -11064571, -10222538, 10557634, 10411455, 2998889, 13916175, -14769935, 15602698, 2381533, -6626392, 10928874, 5349156, 16324280, -13170466, 2715175, -911735, 5109359, -8661773, -15732664, 1484191, -4539573,}; +int32_t ke1_fxp[] = { 10082316, -16092995, 2436677, -2973472, 16278490, 10113357, -14966548, -10385843, -1596558, 6809606, -5635529, -4698184, 14142205, 15221314, -3097553,}; +int32_t ke2_fxp[] = { 13373829, -5695755, -14000970,}; +int32_t res_fxp[] = { -47369828, -104747432, 9367978, 31800256, -64219900, -48302944, -136902272, 86009512, -74035216, -32901248, -99357792, 142359984, 47243212, 39679420, 52317828,}; + +float result[15]; +int32_t result_fxp[15]; + +float layer1Output[3*15]; +int32_t layer1OutputFxp[3*15]; + +#endif +// clang-format on + +int main() { + printf("Running cnn test\n"); + + // Create the CNN + printf("Creating CNN\n"); + Conv2DLayer layer1; + layer1.dim = (Dim2D){3u, 5u}; + layer1.padding = SAME; + layer1.weightsFloat = ke1; + layer1.weightsFxp = ke1_fxp; + + Conv2DLayer layer2; + layer2.dim = (Dim2D){3u, 1u}; + layer2.padding = VALID; + layer2.weightsFloat = ke2; + layer2.weightsFxp = ke2_fxp; + + Cnn cnn; + cnn.layer1 = &layer1; + cnn.layer2 = &layer2; + cnn.inputDim = (Dim2D){3u, 15u}; + cnn.outputDim = (Dim2D){1u, 15u}; + cnn.layer1Output = layer1Output; + cnn.layer1OutputFxp = layer1OutputFxp; + + // Forward pass + printf("Forward pass\n"); + Cnn_forwardFloat(&cnn, inp, result); + Cnn_forwardFxp(&cnn, inp_fxp, result_fxp); + + printf("Comparing results\n"); + compareVectorsFloat(result, res, 15, COMP_PREC * 10); + compareVectorsFxp(result_fxp, res_fxp, 15, COMP_PREC_I32 * 10); + printf("Test passed\n"); + + return 0; +} \ No newline at end of file diff --git a/sw/applications/l_cnn_static/utils.h b/sw/applications/l_cnn_static/utils.h new file mode 100644 index 000000000..bcaed0d25 --- /dev/null +++ b/sw/applications/l_cnn_static/utils.h @@ -0,0 +1,124 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +// Define SIMULATION if you want to disable printing +// #define SIMULATION +#define TARGET + +#ifdef SIMULATION +#pragma message ("SIMULATION environment") +#endif +#ifdef TARGET +#pragma message ("TARGET environment") +#endif + +// Enable or disable printing +#ifndef SIMULATION +#define PRINTF(...) printf(__VA_ARGS__) +#else +#define PRINTF(...) +#endif + +static float maxdiff = 0; +static int32_t maxdiffxp = 0; + +// Assert functions, always print if failing +void assert_closef_si(float a, float b, float prec, int idx) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) + PRINTF("Max diff float: %f\n", maxdiff); + } + if (diff > prec) { + printf("AF %d %f %f %d\n", __LINE__, a, b, idx); + exit(EXIT_FAILURE); + } +} + +void assert_closei32_si(int32_t a, int32_t b, int32_t prec, int idx) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d %d\n", __LINE__, a, b, idx); + exit(EXIT_FAILURE); + } +} + +void assert_closef_s(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) + PRINTF("Max diff float: %f\n", maxdiff); + } + if (diff > prec) { + printf("AF %d %f %f\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +void assert_closei32_s(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +void assert_closef(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AF %d %f %f\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +void assert_closei32(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +// Vector export for plots +#if defined(SIMULATION) || defined(TARGET) +#define VECTOR_EXPORT(...) +#else +void vectorExport(float* a, int size, char filename[]) { + FILE *filePointer; + filePointer = fopen(filename, "w"); + if (filePointer == NULL) { + printf("Failed to create file.\n"); + return; + } + for (int i=0; i Date: Mon, 13 May 2024 15:33:35 +0200 Subject: [PATCH 16/27] add real sample test and expand stack --- mcu_cfg.hjson | 2 +- sw/applications/l_cnn_static_realsample/cnn.c | 81 +++++++++ sw/applications/l_cnn_static_realsample/cnn.h | 42 +++++ .../l_cnn_static_realsample/conv2dlayer.c | 161 ++++++++++++++++++ .../l_cnn_static_realsample/conv2dlayer.h | 47 +++++ .../l_cnn_static_realsample/fxp32.c | 56 ++++++ .../l_cnn_static_realsample/fxp32.h | 26 +++ .../l_cnn_static_realsample/main.c | 79 +++++++++ .../l_cnn_static_realsample/utils.h | 124 ++++++++++++++ 9 files changed, 617 insertions(+), 1 deletion(-) create mode 100644 sw/applications/l_cnn_static_realsample/cnn.c create mode 100644 sw/applications/l_cnn_static_realsample/cnn.h create mode 100644 sw/applications/l_cnn_static_realsample/conv2dlayer.c create mode 100644 sw/applications/l_cnn_static_realsample/conv2dlayer.h create mode 100644 sw/applications/l_cnn_static_realsample/fxp32.c create mode 100644 sw/applications/l_cnn_static_realsample/fxp32.h create mode 100644 sw/applications/l_cnn_static_realsample/main.c create mode 100644 sw/applications/l_cnn_static_realsample/utils.h diff --git a/mcu_cfg.hjson b/mcu_cfg.hjson index 2e6f33f19..9a780c8c9 100644 --- a/mcu_cfg.hjson +++ b/mcu_cfg.hjson @@ -8,7 +8,7 @@ cpu_type: cv32e20 linker_script: { - stack_size: 0x800, + stack_size: 0xC00, heap_size: 0x800, } diff --git a/sw/applications/l_cnn_static_realsample/cnn.c b/sw/applications/l_cnn_static_realsample/cnn.c new file mode 100644 index 000000000..60cc088da --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/cnn.c @@ -0,0 +1,81 @@ +// clang-format off + +#include "cnn.h" + +#include +// clang-format on + +// CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, +// Conv2DPadding layer2Pad) { +// CnnHandle self = (CnnHandle)malloc(sizeof(Cnn)); +// // Create layers +// self->layer1 = Conv2DLayer_create(layer1Dim, layer1Pad); +// self->layer2 = Conv2DLayer_create(layer2Dim, layer2Pad); + +// self->inputDim = inputDim; +// // TODO: calculate +// self->outputDim = (Dim2D){1u, 256u}; +// return self; +// } + +// void Cnn_destroy(CnnHandle self) { +// Conv2DLayer_destroy(self->layer1); +// Conv2DLayer_destroy(self->layer2); +// free(self); +// } + +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output) { +#ifdef DYN_ALLOCATION + fxp32* layer1Output = (fxp32*)calloc(self->inputDim.x * self->inputDim.y, sizeof(fxp32)); + Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1Output, output); + free(layer1Output); +#else + Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, self->layer1OutputFxp); + Conv2DLayer_forwardFxp(self->layer2, self->inputDim, self->layer1OutputFxp, output); +#endif +} + +void Cnn_forwardFloat(CnnHandle self, float* input, float* output) { +#ifdef DYN_ALLOCATION + float* layer1Output = (float*)calloc(self->inputDim.x * self->inputDim.y, sizeof(float)); + Conv2DLayer_forwardFloat(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFloat(self->layer2, self->inputDim, layer1Output, output); + free(layer1Output); +#else + Conv2DLayer_forwardFloat(self->layer1, self->inputDim, input, self->layer1Output); + Conv2DLayer_forwardFloat(self->layer2, self->inputDim, self->layer1Output, output); +#endif +} + +void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output) { + Cnn_forwardFxp(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} + +void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output) { + Cnn_forwardFloat(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} + +// float Cnn_sampleLoss(CnnHandle self, complex_t* ypred, complex_t* ytrue) { +// float loss = 0.0f; +// // NOTE: could be optimized by reusing .r and .i for the abs +// fxpMul* abs = (fxpMul*)calloc(self->outputDim.y * self->outputDim.x, sizeof(fxpMul)); +// for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { +// abs[i] = fxp32_pow2(ytrue[i].r - ypred[i].r) + fxp32_pow2(ytrue[i].i - ypred[i].i); +// } +// for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { +// loss += fxp32_fxpMulToFloat(abs[i]); +// } +// return loss; +// } + +void Cnn_freezeModel(CnnHandle self) { + Conv2DLayer_transformWeightsToFxp(self->layer1); + Conv2DLayer_transformWeightsToFxp(self->layer2); +} \ No newline at end of file diff --git a/sw/applications/l_cnn_static_realsample/cnn.h b/sw/applications/l_cnn_static_realsample/cnn.h new file mode 100644 index 000000000..857c5a065 --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/cnn.h @@ -0,0 +1,42 @@ +#ifndef CNN_H +#define CNN_H + +#include "conv2dlayer.h" +#include "fxp32.h" + +/** + * @brief a two layer cnn model + */ +typedef struct __Cnn { + Conv2DLayerHandle layer1; + Conv2DLayerHandle layer2; + Dim2D inputDim; + Dim2D outputDim; +#ifndef DYN_ALLOCATION + float* layer1Output; + fxp32* layer1OutputFxp; +#endif +} Cnn; + +typedef struct __Cnn* CnnHandle; + +typedef struct __complex_t { + int32_t r; + int32_t i; +} complex_t; + +// CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, +// Conv2DPadding layer2Pad); +// void Cnn_destroy(CnnHandle self); + +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output); +void Cnn_forwardFloat(CnnHandle self, float* input, float* output); + +// void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); +// void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output); + +// float Cnn_sampleLoss(CnnHandle self, complex_t* ypred, complex_t* ytrue); + +void Cnn_freezeModel(CnnHandle self); + +#endif // CNN_H \ No newline at end of file diff --git a/sw/applications/l_cnn_static_realsample/conv2dlayer.c b/sw/applications/l_cnn_static_realsample/conv2dlayer.c new file mode 100644 index 000000000..38bcf2e18 --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/conv2dlayer.c @@ -0,0 +1,161 @@ +#include "conv2dlayer.h" + +#include +#include +#include + +// Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding) { +// Conv2DLayerHandle self = (Conv2DLayerHandle)malloc(sizeof(Conv2DLayer)); +// self->dim = dim; +// self->padding = padding; +// self->weightsFxp = (fxp32*)calloc(dim.x * dim.y, sizeof(fxp32)); +// self->weightsFloat = (float*)calloc(dim.x * dim.y, sizeof(float)); +// return self; +// } + +// void Conv2DLayer_destroy(Conv2DLayerHandle self) { +// free(self->weightsFxp); +// free(self->weightsFloat); +// free(self); +// } + +bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights) { + memcpy(self->weightsFxp, weights, self->dim.x * self->dim.y * sizeof(fxp32)); + // TODO: make sure we get the right size of weights + return true; +} + +bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights) { + memcpy(self->weightsFloat, weights, self->dim.x * self->dim.y * sizeof(float)); + // TODO: make sure we get the right size of weights + return true; +} + +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self) { + for (int i = 0; i < self->dim.x * self->dim.y; ++i) { + self->weightsFxp[i] = fxp32_fromFloat(self->weightsFloat[i]); + } +} + +// Could be optimized +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid) { + + // float max = 0; + // float min = 0; + + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + fxp32 sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = 0; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + // printf("input access: %d, %d\n", m, n); + in = input[m * iny + n]; + // printf("kernel access: %d, %d\n", m - i + cx, n - j + + // cy); + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += fxp32_mul(w, in); + // if (sum > max) { + // max = sum; + // } + // if (sum < min) { + // min = sum; + // } + // printf("factor: %.2f\n", w * in); + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + // printf("access output: %d\n", (i-cx)*(iny - kery + 1) + (j-cy)); + // printf("sum = %.2f\n", sum); + } + } + // printf("max: %.6f, min: %.6f\n", max, min); +} + +// Could be optimized +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid) { + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + float sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = .0f; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + in = input[m * iny + n]; + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += w * in; + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + } + } +} + +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output) { + convolve2DFxp(input, output, self->weightsFxp, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); +} + +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output) { + convolve2DFloat(input, output, self->weightsFloat, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); +} \ No newline at end of file diff --git a/sw/applications/l_cnn_static_realsample/conv2dlayer.h b/sw/applications/l_cnn_static_realsample/conv2dlayer.h new file mode 100644 index 000000000..ce5254d48 --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/conv2dlayer.h @@ -0,0 +1,47 @@ +#ifndef CONV2DLAYER_H +#define CONV2DLAYER_H + +#include +#include + +#include "fxp32.h" + +// TODO: perhaps we could optimize but we need 256 as value... +/** + * @brief a 2D dimension + * @param x the rows of a matrix + * @param y the columns of a matrix + */ +typedef struct __Dim2D { + uint16_t x; + uint16_t y; +} Dim2D; + +typedef enum __Conv2DPadding { + VALID, + SAME +} Conv2DPadding; + +typedef struct __Conv2DLayer { + Dim2D dim; + Conv2DPadding padding; + fxp32* weightsFxp; + float* weightsFloat; +} Conv2DLayer; + +typedef struct __Conv2DLayer* Conv2DLayerHandle; + +// Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding); +// void Conv2DLayer_destroy(Conv2DLayerHandle self); + +bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights); +bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights); +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self); + +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output); +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output); + +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid); +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid); + +#endif // CONV2DLAYER_H \ No newline at end of file diff --git a/sw/applications/l_cnn_static_realsample/fxp32.c b/sw/applications/l_cnn_static_realsample/fxp32.c new file mode 100644 index 000000000..2755f99a9 --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/fxp32.c @@ -0,0 +1,56 @@ +#include "fxp32.h" + +fxp32 fxp32_fromFloat(float f) { + return (fxp32)(f * (1 << FRACTIONAL_BITS)); +} + +float fxp32_toFloat(fxp32 x) { + return (float)x / (1 << FRACTIONAL_BITS); +} + +float fxp32_fxpMulToFloat(fxpMul x) { + return (float)x / ((fxpMul)1 << FRACTIONAL_BITS); +} + +fxp32 fxp32_fromInt(int i) { + return i << FRACTIONAL_BITS; +} + +fxp32 fxp32_mul(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS); +} + +fxpMul fxp32_mul64(fxp32 a, fxp32 b) { + return ((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS; +} + +fxpMul fxp32_pow2(fxp32 a) { + return ((fxpMul)a * (fxpMul)a) >> FRACTIONAL_BITS; +} + +fxp32 fxp32_div(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a << FRACTIONAL_BITS) / b); +} + +bool fxp32_closefxp(fxp32 a, fxp32 b, fxp32 prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= prec; +} + +fxp32 fxp32_sqrt(fxp32 a) { + fxp32 guess = a >> 1; + fxp32 eps = fxp32_fromFloat(0.0001); + while (fxp32_closefxp(guess, fxp32_div(a, guess), eps) == false) { + guess = (guess + fxp32_div(a, guess)) >> 1; + } + return guess; +} + +bool fxp32_close(fxp32 a, fxp32 b, float prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= fxp32_fromFloat(prec); +} diff --git a/sw/applications/l_cnn_static_realsample/fxp32.h b/sw/applications/l_cnn_static_realsample/fxp32.h new file mode 100644 index 000000000..878d82855 --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/fxp32.h @@ -0,0 +1,26 @@ +#ifndef FXP_H +#define FXP_H + +#include +#include + +#define FRACTIONAL_BITS 23 +typedef int32_t fxp32; +typedef int64_t fxpMul; + +fxp32 fxp32_fromFloat(float f); +float fxp32_toFloat(fxp32 x); +float fxp32_fxpMulToFloat(fxpMul x); +fxp32 fxp32_fromInt(int i); + +fxp32 fxp32_mul(fxp32 a, fxp32 b); +fxpMul fxp32_mul64(fxp32 a, fxp32 b); +fxpMul fxp32_pow2(fxp32 a); + +fxp32 fxp32_div(fxp32 a, fxp32 b); + +fxp32 fxp32_sqrt(fxp32 a); + +bool fxp32_close(fxp32 a, fxp32 b, float prec); + +#endif // FXP_H \ No newline at end of file diff --git a/sw/applications/l_cnn_static_realsample/main.c b/sw/applications/l_cnn_static_realsample/main.c new file mode 100644 index 000000000..b36062ce4 --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/main.c @@ -0,0 +1,79 @@ +#include +#include +#include + +#include "cnn.h" +#include "fxp32.h" +#include "utils.h" + +// #define DYN_ALLOCATION +#define COMP_PREC 0.0001f +#define COMP_PREC_I32 512 + +void compareVectorsFloat(float* a, float* b, int size, float prec) { + for (int i = 0; i < size; ++i) { + assert_closef(a[i], b[i], prec); + } +} + +void compareVectorsFxp(fxp32* a, fxp32* b, int size, float prec) { + for (int i = 0; i < size; ++i) { + assert_closei32(a[i], b[i], prec); + } +} + +// clang-format off +#ifndef DYN_ALLOCATION + +// Create all arays statically +float weights1[] = { -0.12116f, 0.15478f, 0.15130f, -0.15055f, -0.10036f, 0.14400f, -0.00204f, -0.03408f, -0.18936f, 0.03390f, 0.12200f, -0.13891f, 0.05332f, -0.21001f, -0.18876f, -0.06263f, -0.00274f, -0.14772f, 0.06332f, 0.05349f, -0.05071f, 0.13313f, -0.07974f, -0.06264f, -0.03501f, -0.21790f, 0.05925f, -0.14536f, -0.21762f, 0.08203f, -0.08571f, 0.16886f, 0.12439f, 0.12544f, 0.04466f, -0.16849f, 0.04673f, 0.03604f, -0.04811f, 0.21726f, 0.17702f, 0.16138f, 0.08862f, 0.02971f, -0.05572f, -0.07597f, -0.02538f, 0.06561f, -0.08724f, -0.20014f, -0.09443f, -0.13890f, -0.00194f, 0.03216f, 0.14760f, 0.09256f, 0.13031f, -0.09160f, 0.03522f, -0.03609f, -0.08631f, -0.02040f, -0.19688f,}; +float weights2[] = { -0.71606f, 0.06657f, -0.07737f,}; +float xin[] = { -1.30298f, -0.96491f, -0.51415f, -0.17608f, 0.27468f, 0.50006f, 0.04930f, -0.73953f, -0.96491f, -1.97912f, -2.65526f, -2.31719f, -2.54257f, -2.88064f, -3.21871f, -2.76795f, -2.31719f, -2.20450f, 1.17620f, 1.51427f, 0.95082f, 0.38737f, -0.17608f, 0.04930f, 0.61275f, 0.61275f, 0.50006f, 0.50006f, 0.50006f, -0.17608f, -0.40146f, -0.73953f, -0.73953f, -0.73953f, -0.85222f, -0.73953f, -0.85222f, -0.73953f, -0.17608f, 0.27468f, 0.50006f, 0.61275f, 0.95082f, 1.06351f, 1.06351f, 0.83813f, 0.50006f, 0.16199f, 0.04930f, 0.04930f, 0.50006f, 0.50006f, 0.72544f, 0.83813f, 0.83813f, 0.61275f, 0.04930f, -0.06339f, 0.04930f, 0.61275f, 0.83813f, 1.06351f, 0.95082f, 0.72544f, 0.61275f, 0.38737f, 0.27468f, 0.50006f, 0.50006f, 0.38737f, 0.27468f, 0.27468f, 0.72544f, 0.61275f, 0.50006f, 0.50006f, 0.38737f, 0.04930f, 0.04930f, 0.38737f, 0.38737f, -0.06339f, -0.17608f, -0.17608f, -0.06339f, -0.17608f, -0.51415f, -0.73953f, -0.96491f, -0.73953f, -0.62684f, -0.62684f, 0.16199f, 0.83813f, 1.06351f, 1.28889f, 1.17620f, 0.61275f, 0.38737f, 0.27468f, 0.16199f, 0.38737f, 0.50006f, 0.50006f, 0.38737f, 0.27468f, 0.38737f, 0.50006f, 0.61275f, 0.50006f, 0.50006f, 0.61275f, 0.61275f, 0.16199f, -0.06339f, -0.06339f, -0.06339f, 0.04930f, 0.04930f, -0.17608f, -0.40146f, -0.62684f, -0.73953f, -0.62684f, -0.17608f, 0.16199f, 0.61275f, 0.61275f, 0.72544f, 0.50006f, 0.50006f, 0.27468f, 0.27468f, 0.27468f, 0.38737f, 0.50006f, 0.50006f, 0.38737f, 0.38737f, 0.38737f, 0.27468f, 0.16199f, 0.04930f, 0.27468f, 0.50006f, 0.61275f, 0.72544f, 0.61275f, 0.50006f, -0.06339f, -0.28877f, -0.17608f, -0.06339f, 0.04930f, -0.06339f, -0.17608f, -0.40146f, -0.62684f, -0.62684f, -0.40146f, -0.06339f, 0.27468f, 0.72544f, 0.95082f, 1.06351f, 0.72544f, 0.50006f, 0.38737f, 0.27468f, 0.27468f, 0.27468f, 0.38737f, 0.38737f, 0.27468f, -0.06339f, 0.04930f, 0.27468f, 0.38737f, 0.61275f, 0.61275f, 0.27468f, 0.38737f, 0.50006f, 0.61275f, 0.38737f, -0.06339f, -0.17608f, 0.04930f, 0.27468f, 0.38737f, 0.16199f, -0.06339f, -0.28877f, -0.28877f, -0.40146f, -0.40146f, -0.17608f, 0.16199f, 0.38737f, 0.95082f, 1.17620f, 1.17620f, 0.95082f, 0.27468f, 0.04930f, -0.06339f, 0.16199f, 0.27468f, 0.04930f, -0.28877f, -0.28877f, -0.28877f, -0.17608f, -0.28877f, -0.73953f, -0.85222f, -0.85222f, -0.51415f, -0.17608f, 0.38737f, 0.61275f, 0.72544f, 0.83813f, 0.95082f, 1.06351f, 1.06351f, 0.95082f, 0.95082f, 0.50006f, 0.38737f, 0.04930f, 0.16199f, 0.27468f, -0.40146f, -1.19029f, -2.09181f, -1.86643f, -0.28877f, -2.65526f, -9.75473f, -2.65526f, -1.07760f, -0.06339f, 0.16199f, 0.61275f, 0.38737f, 0.04930f, 0.27468f, 0.04930f, -0.51415f, -0.62684f, -0.51415f, -0.51415f, -0.85222f, -1.19029f, -1.19029f, 0.33916f, 0.92881f, 0.92881f, 0.63399f, -0.25048f, -0.76643f, -1.65090f, -2.09313f, -2.09313f, -1.79831f, -1.65090f, -1.94572f, -2.01942f, -1.06125f, -0.39790f, 1.00251f, 3.72963f, 3.36110f, 1.07622f, 1.07622f, 0.85510f, 0.63399f, -0.17678f, -0.61901f, -1.35607f, -1.65090f, -1.72460f, -1.50348f, -1.28237f, -0.54531f, -0.25048f, 0.48657f, 1.14993f, 1.37104f, 1.66587f, 2.25551f, 1.73957f, 1.07622f, 0.70769f, 0.41287f, 0.19175f, -0.17678f, -0.61901f, -1.57719f, -1.79831f, -1.65090f, -1.28237f, -0.54531f, -0.17678f, 0.56028f, 1.07622f, 1.37104f, 1.59216f, 1.51846f, 1.73957f, 1.44475f, 0.48657f, 0.26546f, 0.19175f, 0.85510f, 0.56028f, -0.25048f, -0.61901f, -0.76643f, -0.91384f, -1.20866f, -0.98754f, -0.61901f, -0.02937f, 0.26546f, 1.07622f, 1.22363f, 1.81328f, 1.44475f, 0.04434f, -0.10307f, 0.33916f, -0.02937f, -0.25048f, -0.61901f, -0.54531f, -0.47160f, -0.32419f, -0.25048f, -0.02937f, 0.48657f, 0.85510f, 1.00251f, 1.37104f, 0.04434f, -0.69272f, -0.25048f, 0.56028f, 0.26546f, 0.04434f, 0.33916f, 0.26546f, -0.32419f, -0.32419f, -0.25048f, -0.17678f, 0.41287f, 0.78140f, 0.92881f, 1.44475f, 1.14993f, -0.10307f, -0.32419f, 0.19175f, 0.26546f, -0.02937f, -0.54531f, -0.76643f, -0.76643f, -0.76643f, -0.69272f, -0.54531f, 0.04434f, 0.19175f, 0.63399f, 0.70769f, 1.07622f, 0.85510f, 0.19175f, 0.11804f, 0.33916f, 0.63399f, 0.56028f, 0.11804f, -0.17678f, -0.47160f, -0.47160f, -0.61901f, -0.69272f, -0.47160f, -0.17678f, 0.04434f, 1.00251f, 1.22363f, 1.51846f, 1.51846f, 1.07622f, -0.32419f, -0.47160f, -0.02937f, -0.32419f, -0.54531f, -0.98754f, -1.13495f, -0.98754f, -0.98754f, -0.69272f, -0.39790f, 0.26546f, 0.70769f, 0.70769f, 0.92881f, 1.59216f, 0.41287f, 0.11804f, 0.33916f, 0.41287f, 0.56028f, 0.04434f, -0.17678f, -0.39790f, -0.61901f, -1.06125f, -1.06125f, -0.76643f, -0.54531f, 0.11804f, 0.85510f, 1.07622f, 1.51846f, 1.96069f, 1.51846f, 0.63399f, -0.54531f, 0.11804f, 0.11804f, -0.39790f, -0.61901f, -0.91384f, -0.98754f, -1.06125f, -0.98754f, -0.47160f, -0.17678f, 0.04434f, 0.56028f, 0.70769f, 0.92881f, 1.29734f, 1.73957f, 1.00251f, -0.69272f, -0.17678f, 0.11804f, 0.33916f, 0.04434f, -0.54531f, -0.84013f, -1.06125f, -1.28237f, -1.79831f, -1.57719f, -1.28237f, -0.54531f, 0.04434f, 0.63399f, 0.78140f, 1.44475f, 1.66587f, 2.18181f, 2.03440f, 1.44475f, 0.33916f, 0.48657f, 0.63399f, 0.41287f, -0.32419f, -0.47160f, -0.39790f, -0.84013f, -1.06125f, -1.35607f, -1.42978f, -1.50348f, -1.57719f, -1.42978f, -1.28237f, -1.20866f, -0.84013f, -0.69272f, -0.69272f, -0.69272f, -0.17678f, 0.48657f, 1.51846f, 1.00251f, 0.56028f, -0.02937f, 0.33916f, 0.33916f, 0.33916f, -0.10307f, -0.54531f, -1.20866f, -1.50348f, -1.42978f, -1.42978f, -1.35607f, -1.06125f, -0.84013f, 0.04434f, 1.10447f, 1.23962f, 0.69900f, 0.42869f, -0.11193f, -0.24708f, -0.65255f, -0.78770f, -1.05801f, -1.19317f, -0.92286f, -0.24708f, -0.24708f, 0.56385f, 0.96931f, 0.96931f, 0.29354f, 1.64509f, 2.18571f, 0.69900f, -0.51739f, -0.92286f, -1.32832f, -1.19317f, -0.65255f, -0.24708f, -0.24708f, -0.38224f, -0.65255f, -1.32832f, -1.32832f, -1.19317f, -1.32832f, -1.59863f, -2.13925f, -2.27441f, -2.13925f, -1.32832f, -0.92286f, -1.32832f, -0.78770f, -0.38224f, -0.38224f, -0.24708f, -0.11193f, 0.02323f, 0.15838f, 0.83416f, 1.23962f, 1.91540f, 2.45602f, 2.86148f, 3.40211f, 3.40211f, 2.59117f, 2.45602f, 1.64509f, 1.23962f, 0.42869f, 0.02323f, -0.11193f, -0.51739f, -0.38224f, -0.24708f, -0.51739f, -0.92286f, -0.92286f, -0.65255f, -0.11193f, 0.15838f, 0.69900f, 0.69900f, -0.11193f, -0.11193f, 0.69900f, 0.69900f, 0.15838f, 0.29354f, 0.15838f, 0.15838f, 0.29354f, 0.42869f, 0.29354f, 0.15838f, 0.02323f, 0.15838f, 0.29354f, 0.15838f, 0.56385f, 0.69900f, 0.56385f, 0.56385f, 0.42869f, 0.02323f, -0.11193f, 0.15838f, 0.15838f, -0.24708f, -0.51739f, -0.92286f, -1.05801f, -0.78770f, -0.65255f, -0.51739f, -0.51739f, -0.38224f, 0.29354f, 0.29354f, 0.42869f, 0.29354f, 0.15838f, 0.02323f, 0.15838f, 0.15838f, 0.02323f, -0.51739f, -0.51739f, -0.11193f, 0.02323f, 0.15838f, 0.15838f, 0.15838f, 1.10447f, 0.83416f, 0.42869f, 0.29354f, 0.02323f, 0.02323f, -0.24708f, -0.38224f, -0.38224f, -0.65255f, -0.78770f, -1.19317f, -0.92286f, -0.65255f, -0.51739f, -0.24708f, -0.24708f, -0.24708f, -0.11193f, 0.29354f, 1.10447f, 0.83416f, 0.69900f, 0.56385f, 0.42869f, 0.29354f, 0.29354f, 0.02323f, 0.02323f, -0.24708f, -0.11193f, 0.29354f, 0.42869f, 0.56385f, 0.42869f, 0.69900f, 1.23962f, 0.96931f, 0.69900f, 0.56385f, 0.29354f, -0.24708f, -0.38224f, -0.38224f, -0.38224f, -0.92286f, -1.05801f, -1.32832f, -1.32832f, -0.92286f, -0.51739f, -0.38224f, -0.11193f, 0.02323f, 0.02323f, 0.29354f, 0.83416f, 0.42869f, 0.29354f, 0.15838f, 0.15838f, 0.15838f, 0.15838f, 0.15838f, 0.02323f, 0.02323f, 0.29354f, 0.42869f, 0.56385f, 0.56385f, 0.56385f, 0.56385f, 1.10447f, 1.37478f, 0.56385f, 0.42869f, 0.42869f, 0.02323f, -0.24708f, -0.51739f, -0.65255f, -0.65255f, -0.92286f, -1.46348f, -1.46348f, -1.32832f, -0.92286f, -0.78770f, -0.78770f, -0.65255f, -0.24708f, -0.11193f, 0.02323f, -0.51739f, -0.51739f, 0.29354f, 0.29354f, 0.42869f, 0.29354f, -0.24708f, -0.24708f, -0.24708f, -0.24708f, -0.38224f, -0.38224f, -0.51739f, -0.38224f, -0.24708f, 0.15838f, 0.69900f, 0.96931f, 1.10447f, 0.83416f, 0.83416f, 0.83416f, 1.23962f, 2.18571f, 7.18645f, 1.78024f, 0.29354f, -1.46348f, -2.00410f, -1.59863f, -1.59863f, -1.05801f, -0.92286f, -1.32832f, -1.19317f, -1.05801f, -1.05801f, -1.32832f, -1.19317f, -0.92286f, -0.78770f,}; +float xout[] = { 0.39249f, 0.48451f, 0.73383f, 0.73852f, 1.10356f, 1.51326f, 1.76721f, 1.83251f, 1.31946f, 0.81513f, 0.02872f, 0.09573f, -0.31142f, -1.19899f, -1.17897f, -1.59128f, -1.68418f, -1.58064f, -1.61839f, -0.72027f, -0.35841f, 0.28717f, 1.03874f, 1.20660f, 1.62155f, 1.03099f, 0.66879f, 0.72900f, 0.17669f, -0.18862f, -0.35105f, -0.40824f, -0.50307f, -0.60400f, -0.51233f, -0.42268f, -0.19640f, -0.11939f, -0.08316f, 0.09401f, -0.03073f, 0.05474f, 0.00948f, -0.14916f, -0.29699f, -0.46144f, -0.41988f, -0.36489f, -0.28950f, -0.25506f, -0.41477f, -0.55599f, -0.60255f, -0.37059f, -0.06804f, 0.10718f, 0.32548f, 0.28716f, 0.30881f, 0.11494f, 0.19378f, 0.36549f, 0.49032f, 0.39248f, -0.11330f, -0.16019f, -0.25388f, -0.31549f, -0.40915f, -0.69023f, -0.56602f, -0.27259f, -0.03218f, 0.30087f, 0.40573f, 0.69364f, 0.80954f, 0.71345f, 0.81998f, 0.72108f, 0.49698f, 0.16719f, -0.10408f, -0.19166f, -0.49458f, -0.48501f, -0.35172f, -0.08854f, 0.20221f, 0.22341f, 0.01684f, -0.12514f, -0.43663f, -0.55029f, -0.36411f, -0.09131f, -0.07201f, -0.16178f, -0.05504f, 0.10212f, -0.12362f, -0.37001f, -0.33644f, -0.23595f, -0.00672f, 0.11853f, 0.06942f, 0.20967f, 0.21001f, 0.23882f, 0.36883f, 0.66184f, 0.71750f, 0.42784f, 0.21827f, 0.07058f, -0.03356f, -0.28731f, -0.41314f, -0.34149f, -0.14244f, 0.07088f, 0.01262f, -0.05669f, -0.19078f, -0.33944f, -0.39268f, -0.18032f, 0.07529f, 0.33591f, 0.39141f, 0.32471f, 0.16026f, -0.03295f, -0.25818f, -0.64155f, -0.79985f, -0.93584f, -0.66718f, -0.20500f, 0.17531f, 0.48883f, 0.47296f, 0.40285f, 0.35651f, 0.18949f, 0.29055f, 0.61037f, 0.46581f, 0.27488f, -0.01284f, -0.15280f, -0.17538f, -0.58741f, -0.57524f, -0.47580f, -0.21400f, -0.06589f, -0.10277f, -0.08980f, -0.12787f, -0.20503f, -0.26596f, -0.03148f, 0.32823f, 0.54433f, 0.48218f, 0.34929f, 0.02126f, -0.20263f, -0.49552f, -0.67534f, -0.62950f, -0.68360f, -0.35470f, -0.08379f, 0.14763f, 0.26742f, 0.29058f, 0.28133f, 0.20008f, 0.14494f, 0.41903f, 0.54173f, 0.52405f, 0.29799f, -0.07209f, -0.14607f, -0.46453f, -0.58305f, -0.57501f, -0.69478f, -0.46508f, -0.23234f, 0.07366f, 0.14385f, 0.05992f, -0.20362f, -0.39168f, -0.48250f, -0.07809f, 0.42712f, 0.71610f, 0.81517f, 0.82542f, 0.97280f, 0.77453f, 0.36336f, 0.04949f, -0.26119f, -0.45929f, -0.60788f, -0.69065f, -0.31942f, -0.13976f, 0.09962f, 0.01459f, -0.23469f, -0.43849f, -0.63786f, -0.63111f, -0.59381f, -0.51142f, -0.44169f, -0.36079f, 0.11435f, 0.52288f, 0.56995f, 0.72460f, 1.52536f, 1.96464f, 1.74206f, 0.39785f, 0.85132f, 0.47575f, -0.53417f, 0.15703f, 0.37793f, 0.68815f, 0.26998f, -0.36431f, -0.00991f, -1.89429f, -1.31357f, -0.37315f, -0.73792f, 0.22693f, -0.13869f, -0.04304f, 0.91745f, 0.28913f, 0.00824f, -0.09014f, -0.22067f, -0.39314f, -0.55479f,}; +float ppg[] = { 0.46327f, 0.32949f, 0.07201f, -0.29688f, -0.62103f, -0.64516f, -0.27495f, 0.24132f, 0.54530f, 0.49617f, 0.18035f, -0.26311f, -0.72587f, -1.14389f, -1.46848f, -1.61454f, -1.46102f, -1.00265f, -0.46707f, -0.07844f, 0.23123f, 0.65189f, 1.15061f, 1.54846f, 1.77962f, 1.84234f, 1.68487f, 1.28177f, 0.72119f, 0.13912f, -0.37057f, -0.75262f, -0.92106f, -0.80702f, -0.53024f, -0.32013f, -0.19117f, 0.04964f, 0.44617f, 0.83831f, 1.10851f, 1.21904f, 1.13570f, 0.90103f, 0.59486f, 0.20316f, -0.28679f, -0.74780f, -1.03247f, -1.10836f, -1.04695f, -0.96712f, -0.96142f, -0.95659f, -0.78377f, -0.44865f, -0.14380f, 0.03078f, 0.13561f, 0.21939f, 0.30010f, 0.43695f, 0.63083f, 0.75716f, 0.72338f, 0.56635f, 0.33563f, 0.00841f, -0.39163f, -0.67148f, -0.58419f, -0.11046f, 0.53038f, 1.13658f, 1.57872f, 1.68092f, 1.36379f, 0.82866f, 0.33212f, -0.07932f, -0.46620f, -0.83991f, -1.20091f, -1.53734f, -1.77377f, -1.83781f, -1.73824f, -1.49787f, -1.13511f, -0.75526f, -0.49339f, -0.37189f, -0.32057f, -0.27758f, -0.18547f, -0.01659f, 0.15754f, 0.23167f, 0.19878f, 0.11763f, 0.00490f, -0.15038f, -0.30127f, -0.28197f, 0.12903f, 0.97034f, 1.93402f, 2.56521f, 2.62969f, 2.21650f, 1.58793f, 0.99797f, 0.55670f, 0.18079f, -0.25433f, -0.73201f, -1.14652f, -1.47374f, -1.73648f, -1.85492f, -1.69262f, -1.25530f, -0.75262f, -0.41400f, -0.28372f, -0.25390f, -0.23898f, -0.23591f, -0.24337f, -0.23372f, -0.20477f, -0.17889f, -0.16924f, -0.19249f, -0.29381f, -0.44251f, -0.44909f, -0.11573f, 0.54179f, 1.30633f, 1.97086f, 2.42310f, 2.54723f, 2.23185f, 1.54012f, 0.72251f, -0.00212f, -0.55436f, -0.94782f, -1.27811f, -1.66060f, -2.05800f, -2.24311f, -2.00405f, -1.36847f, -0.59866f, 0.01893f, 0.34660f, 0.38344f, 0.21808f, 0.01586f, -0.09994f, -0.15038f, -0.19249f, -0.21223f, -0.21924f, -0.30873f, -0.49646f, -0.61182f, -0.48769f, -0.15257f, 0.26633f, 0.70189f, 1.12386f, 1.46906f, 1.69321f, 1.80988f, 1.82217f, 1.65329f, 1.23659f, 0.63917f, -0.00124f, -0.61752f, -1.16319f, -1.48558f, -1.43953f, -1.09388f, -0.68244f, -0.39163f, -0.23328f, -0.11704f, 0.00753f, 0.09087f, 0.11280f, 0.12552f, 0.13561f, 0.09613f, -0.01616f, -0.26618f, -0.68815f, -0.99387f, -0.72192f, 0.13210f, 1.01376f, 1.45108f, 1.42871f, 1.11026f, 0.58521f, -0.03677f, -0.55261f, -0.79868f, -0.76710f, -0.55568f, -0.30302f, -0.15871f, -0.13897f, -0.00300f, 0.50143f, 1.26905f, 1.93840f, 2.29326f, 2.33712f, 2.12044f, 1.71207f, 1.24931f, 0.88787f, 0.68522f, 0.52337f, 0.20053f, -0.38417f, -1.12722f, -1.75578f, -2.00668f, -1.83562f, -1.42110f, -0.94255f, -0.41356f, 0.19527f, 0.57732f, 0.19132f, -0.83114f, -1.50488f, -1.29566f, -0.74561f, -0.39163f, -0.17714f, -0.09511f, -0.24337f, -0.29601f, 0.11807f, 0.73567f, 1.00543f, 0.84269f, 0.53345f, 0.17465f, -0.34645f,}; +float ppgf[] = { 0.07078f, -0.15502f, -0.66182f, -1.03541f, -1.72459f, -2.15842f, -2.04216f, -1.59119f, -0.77417f, -0.31896f, 0.15163f, -0.35884f, -0.41444f, 0.05510f, -0.28951f, -0.02326f, 0.22316f, 0.57799f, 1.15132f, 0.64183f, 0.58965f, 0.36472f, 0.11188f, 0.34186f, 0.15807f, 0.81135f, 1.01608f, 0.55277f, 0.54450f, 0.32774f, -0.01952f, -0.34439f, -0.41799f, -0.20301f, -0.01791f, 0.10255f, 0.00523f, 0.16903f, 0.52932f, 0.74430f, 1.13924f, 1.16430f, 1.12622f, 1.05019f, 0.89185f, 0.66460f, 0.13308f, -0.38291f, -0.74298f, -0.85330f, -0.63218f, -0.41113f, -0.35886f, -0.58600f, -0.71572f, -0.55583f, -0.46928f, -0.25638f, -0.17320f, 0.10445f, 0.10632f, 0.07146f, 0.14051f, 0.36468f, 0.83668f, 0.72655f, 0.58951f, 0.32389f, 0.01752f, 0.01875f, -0.01817f, 0.16213f, 0.56257f, 0.83571f, 1.17299f, 0.98729f, 0.55425f, 0.11521f, -0.48786f, -0.80040f, -0.96317f, -1.00710f, -1.09683f, -1.34569f, -1.27919f, -1.35280f, -1.38652f, -1.40933f, -1.33732f, -0.97867f, -0.51023f, -0.24675f, 0.11606f, 0.27271f, 0.17864f, 0.07471f, 0.22955f, 0.39345f, 0.25381f, 0.01551f, 0.12852f, 0.21963f, 0.03517f, -0.04602f, 0.13576f, 0.85180f, 1.86460f, 2.35555f, 2.41968f, 1.97768f, 1.21910f, 0.33612f, -0.16080f, -0.24705f, -0.47261f, -0.80259f, -1.11296f, -1.18643f, -1.32334f, -1.51343f, -1.55018f, -1.32618f, -0.76524f, -0.35730f, -0.09294f, 0.08555f, 0.15369f, -0.05559f, -0.31866f, -0.56963f, -0.59618f, -0.50360f, -0.32950f, -0.15953f, -0.03563f, 0.19904f, 0.35076f, 0.82012f, 1.20897f, 1.51133f, 1.79555f, 1.93426f, 2.07427f, 1.82900f, 1.18362f, 0.53301f, -0.29267f, -1.16473f, -1.41363f, -1.55299f, -1.64776f, -1.90520f, -2.06773f, -1.41664f, -0.79323f, -0.12286f, 0.23294f, 0.41249f, 0.48621f, 0.30788f, 0.14374f, 0.10509f, 0.11559f, -0.16101f, -0.54046f, -0.76358f, -0.79090f, -0.84575f, -0.63308f, -0.28506f, 0.34295f, 0.94167f, 1.33139f, 1.80746f, 1.82377f, 1.77700f, 1.66225f, 1.55474f, 1.36271f, 0.95526f, 0.43909f, -0.14618f, -1.03655f, -1.70492f, -2.00963f, -1.73752f, -1.02179f, -0.53637f, 0.07291f, 0.34977f, 0.45797f, 0.70231f, 0.55596f, 0.34514f, 0.05186f, -0.00824f, 0.03621f, 0.18746f, 0.12550f, -0.20564f, -0.91578f, -1.14904f, -0.58400f, 0.19859f, 0.62565f, 0.45591f, 0.33573f, 0.22186f, -0.08626f, -0.29142f, -0.33939f, -0.15922f, 0.13497f, 0.01640f, -0.01895f, -0.23859f, -0.01758f, 0.73613f, 1.70753f, 2.57627f, 2.92437f, 2.93094f, 2.63186f, 2.15376f, 1.61009f, 0.77352f, 0.16235f, -0.04659f, -0.52407f, -1.90953f, -3.09186f, -3.49785f, -2.40454f, -2.68693f, -1.89685f, -0.40838f, -0.57059f, -0.18267f, -0.11083f, -0.07867f, -0.46683f, -1.49498f, 0.59864f, 0.56797f, -0.01848f, 0.56078f, -0.32204f, -0.10468f, -0.25296f, -0.79938f, 0.44654f, 0.99719f, 0.93283f, 0.75412f, 0.56779f, 0.20835f,}; + +float result[256]; + +float layer1Output[3*256]; + +#endif +// clang-format on + +int main() { + printf("Running cnn test\n"); + + // Create the CNN + printf("Creating CNN\n"); + Conv2DLayer layer1; + layer1.dim = (Dim2D){3u, 21u}; + layer1.padding = SAME; + layer1.weightsFloat = weights1; + layer1.weightsFxp = NULL; + + Conv2DLayer layer2; + layer2.dim = (Dim2D){3u, 1u}; + layer2.padding = VALID; + layer2.weightsFloat = weights2; + layer2.weightsFxp = NULL; + + Cnn cnn; + cnn.layer1 = &layer1; + cnn.layer2 = &layer2; + cnn.inputDim = (Dim2D){3u, 256u}; + cnn.outputDim = (Dim2D){1u, 256u}; + cnn.layer1Output = layer1Output; + cnn.layer1OutputFxp = NULL; + + // Forward pass + printf("Forward pass\n"); + Cnn_forwardFloat(&cnn, xin, result); + // Cnn_forwardFxp(&cnn, inp_fxp, result_fxp); + + printf("Comparing results\n"); + compareVectorsFloat(result, xout, 15, COMP_PREC); + // compareVectorsFxp(result_fxp, res_fxp, 15, COMP_PREC_I32 * 10); + printf("Test passed\n"); + + return 0; +} \ No newline at end of file diff --git a/sw/applications/l_cnn_static_realsample/utils.h b/sw/applications/l_cnn_static_realsample/utils.h new file mode 100644 index 000000000..bcaed0d25 --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/utils.h @@ -0,0 +1,124 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +// Define SIMULATION if you want to disable printing +// #define SIMULATION +#define TARGET + +#ifdef SIMULATION +#pragma message ("SIMULATION environment") +#endif +#ifdef TARGET +#pragma message ("TARGET environment") +#endif + +// Enable or disable printing +#ifndef SIMULATION +#define PRINTF(...) printf(__VA_ARGS__) +#else +#define PRINTF(...) +#endif + +static float maxdiff = 0; +static int32_t maxdiffxp = 0; + +// Assert functions, always print if failing +void assert_closef_si(float a, float b, float prec, int idx) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) + PRINTF("Max diff float: %f\n", maxdiff); + } + if (diff > prec) { + printf("AF %d %f %f %d\n", __LINE__, a, b, idx); + exit(EXIT_FAILURE); + } +} + +void assert_closei32_si(int32_t a, int32_t b, int32_t prec, int idx) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d %d\n", __LINE__, a, b, idx); + exit(EXIT_FAILURE); + } +} + +void assert_closef_s(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) + PRINTF("Max diff float: %f\n", maxdiff); + } + if (diff > prec) { + printf("AF %d %f %f\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +void assert_closei32_s(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +void assert_closef(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AF %d %f %f\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +void assert_closei32(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); + exit(EXIT_FAILURE); + } +} + +// Vector export for plots +#if defined(SIMULATION) || defined(TARGET) +#define VECTOR_EXPORT(...) +#else +void vectorExport(float* a, int size, char filename[]) { + FILE *filePointer; + filePointer = fopen(filename, "w"); + if (filePointer == NULL) { + printf("Failed to create file.\n"); + return; + } + for (int i=0; i Date: Mon, 13 May 2024 15:45:19 +0200 Subject: [PATCH 17/27] Add vivado to env --- .env | 1 + 1 file changed, 1 insertion(+) diff --git a/.env b/.env index d73f6b18b..4e4b629ca 100644 --- a/.env +++ b/.env @@ -2,3 +2,4 @@ conda activate core-v-mini-mcu export RISCV=/home/linus/tools/riscv export VERILATOR_VERSION=4.210 export PATH=/home/$USER/tools/verilator/$VERILATOR_VERSION/bin:$PATH +source /home/linus/Xilinx/Vivado/2022.2/settings64.sh From e1cca3f2bb994cf57c6eeb342970d8d99596386f Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Mon, 13 May 2024 16:29:30 +0200 Subject: [PATCH 18/27] Test with real sample works --- sw/applications/l_cnn_static_realsample/cnn.h | 4 +- .../l_cnn_static_realsample/main.c | 42 +++++++++++++++---- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/sw/applications/l_cnn_static_realsample/cnn.h b/sw/applications/l_cnn_static_realsample/cnn.h index 857c5a065..0168884de 100644 --- a/sw/applications/l_cnn_static_realsample/cnn.h +++ b/sw/applications/l_cnn_static_realsample/cnn.h @@ -32,8 +32,8 @@ typedef struct __complex_t { void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output); void Cnn_forwardFloat(CnnHandle self, float* input, float* output); -// void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); -// void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output); +void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); +void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output); // float Cnn_sampleLoss(CnnHandle self, complex_t* ypred, complex_t* ytrue); diff --git a/sw/applications/l_cnn_static_realsample/main.c b/sw/applications/l_cnn_static_realsample/main.c index b36062ce4..5b5c224a2 100644 --- a/sw/applications/l_cnn_static_realsample/main.c +++ b/sw/applications/l_cnn_static_realsample/main.c @@ -12,13 +12,13 @@ void compareVectorsFloat(float* a, float* b, int size, float prec) { for (int i = 0; i < size; ++i) { - assert_closef(a[i], b[i], prec); + assert_closef_si(a[i], b[i], prec, i); } } void compareVectorsFxp(fxp32* a, fxp32* b, int size, float prec) { for (int i = 0; i < size; ++i) { - assert_closei32(a[i], b[i], prec); + assert_closei32_si(a[i], b[i], prec, i); } } @@ -32,11 +32,20 @@ float xin[] = { -1.30298f, -0.96491f, -0.51415f, -0.17608f, 0.27468f, 0.50006f, float xout[] = { 0.39249f, 0.48451f, 0.73383f, 0.73852f, 1.10356f, 1.51326f, 1.76721f, 1.83251f, 1.31946f, 0.81513f, 0.02872f, 0.09573f, -0.31142f, -1.19899f, -1.17897f, -1.59128f, -1.68418f, -1.58064f, -1.61839f, -0.72027f, -0.35841f, 0.28717f, 1.03874f, 1.20660f, 1.62155f, 1.03099f, 0.66879f, 0.72900f, 0.17669f, -0.18862f, -0.35105f, -0.40824f, -0.50307f, -0.60400f, -0.51233f, -0.42268f, -0.19640f, -0.11939f, -0.08316f, 0.09401f, -0.03073f, 0.05474f, 0.00948f, -0.14916f, -0.29699f, -0.46144f, -0.41988f, -0.36489f, -0.28950f, -0.25506f, -0.41477f, -0.55599f, -0.60255f, -0.37059f, -0.06804f, 0.10718f, 0.32548f, 0.28716f, 0.30881f, 0.11494f, 0.19378f, 0.36549f, 0.49032f, 0.39248f, -0.11330f, -0.16019f, -0.25388f, -0.31549f, -0.40915f, -0.69023f, -0.56602f, -0.27259f, -0.03218f, 0.30087f, 0.40573f, 0.69364f, 0.80954f, 0.71345f, 0.81998f, 0.72108f, 0.49698f, 0.16719f, -0.10408f, -0.19166f, -0.49458f, -0.48501f, -0.35172f, -0.08854f, 0.20221f, 0.22341f, 0.01684f, -0.12514f, -0.43663f, -0.55029f, -0.36411f, -0.09131f, -0.07201f, -0.16178f, -0.05504f, 0.10212f, -0.12362f, -0.37001f, -0.33644f, -0.23595f, -0.00672f, 0.11853f, 0.06942f, 0.20967f, 0.21001f, 0.23882f, 0.36883f, 0.66184f, 0.71750f, 0.42784f, 0.21827f, 0.07058f, -0.03356f, -0.28731f, -0.41314f, -0.34149f, -0.14244f, 0.07088f, 0.01262f, -0.05669f, -0.19078f, -0.33944f, -0.39268f, -0.18032f, 0.07529f, 0.33591f, 0.39141f, 0.32471f, 0.16026f, -0.03295f, -0.25818f, -0.64155f, -0.79985f, -0.93584f, -0.66718f, -0.20500f, 0.17531f, 0.48883f, 0.47296f, 0.40285f, 0.35651f, 0.18949f, 0.29055f, 0.61037f, 0.46581f, 0.27488f, -0.01284f, -0.15280f, -0.17538f, -0.58741f, -0.57524f, -0.47580f, -0.21400f, -0.06589f, -0.10277f, -0.08980f, -0.12787f, -0.20503f, -0.26596f, -0.03148f, 0.32823f, 0.54433f, 0.48218f, 0.34929f, 0.02126f, -0.20263f, -0.49552f, -0.67534f, -0.62950f, -0.68360f, -0.35470f, -0.08379f, 0.14763f, 0.26742f, 0.29058f, 0.28133f, 0.20008f, 0.14494f, 0.41903f, 0.54173f, 0.52405f, 0.29799f, -0.07209f, -0.14607f, -0.46453f, -0.58305f, -0.57501f, -0.69478f, -0.46508f, -0.23234f, 0.07366f, 0.14385f, 0.05992f, -0.20362f, -0.39168f, -0.48250f, -0.07809f, 0.42712f, 0.71610f, 0.81517f, 0.82542f, 0.97280f, 0.77453f, 0.36336f, 0.04949f, -0.26119f, -0.45929f, -0.60788f, -0.69065f, -0.31942f, -0.13976f, 0.09962f, 0.01459f, -0.23469f, -0.43849f, -0.63786f, -0.63111f, -0.59381f, -0.51142f, -0.44169f, -0.36079f, 0.11435f, 0.52288f, 0.56995f, 0.72460f, 1.52536f, 1.96464f, 1.74206f, 0.39785f, 0.85132f, 0.47575f, -0.53417f, 0.15703f, 0.37793f, 0.68815f, 0.26998f, -0.36431f, -0.00991f, -1.89429f, -1.31357f, -0.37315f, -0.73792f, 0.22693f, -0.13869f, -0.04304f, 0.91745f, 0.28913f, 0.00824f, -0.09014f, -0.22067f, -0.39314f, -0.55479f,}; float ppg[] = { 0.46327f, 0.32949f, 0.07201f, -0.29688f, -0.62103f, -0.64516f, -0.27495f, 0.24132f, 0.54530f, 0.49617f, 0.18035f, -0.26311f, -0.72587f, -1.14389f, -1.46848f, -1.61454f, -1.46102f, -1.00265f, -0.46707f, -0.07844f, 0.23123f, 0.65189f, 1.15061f, 1.54846f, 1.77962f, 1.84234f, 1.68487f, 1.28177f, 0.72119f, 0.13912f, -0.37057f, -0.75262f, -0.92106f, -0.80702f, -0.53024f, -0.32013f, -0.19117f, 0.04964f, 0.44617f, 0.83831f, 1.10851f, 1.21904f, 1.13570f, 0.90103f, 0.59486f, 0.20316f, -0.28679f, -0.74780f, -1.03247f, -1.10836f, -1.04695f, -0.96712f, -0.96142f, -0.95659f, -0.78377f, -0.44865f, -0.14380f, 0.03078f, 0.13561f, 0.21939f, 0.30010f, 0.43695f, 0.63083f, 0.75716f, 0.72338f, 0.56635f, 0.33563f, 0.00841f, -0.39163f, -0.67148f, -0.58419f, -0.11046f, 0.53038f, 1.13658f, 1.57872f, 1.68092f, 1.36379f, 0.82866f, 0.33212f, -0.07932f, -0.46620f, -0.83991f, -1.20091f, -1.53734f, -1.77377f, -1.83781f, -1.73824f, -1.49787f, -1.13511f, -0.75526f, -0.49339f, -0.37189f, -0.32057f, -0.27758f, -0.18547f, -0.01659f, 0.15754f, 0.23167f, 0.19878f, 0.11763f, 0.00490f, -0.15038f, -0.30127f, -0.28197f, 0.12903f, 0.97034f, 1.93402f, 2.56521f, 2.62969f, 2.21650f, 1.58793f, 0.99797f, 0.55670f, 0.18079f, -0.25433f, -0.73201f, -1.14652f, -1.47374f, -1.73648f, -1.85492f, -1.69262f, -1.25530f, -0.75262f, -0.41400f, -0.28372f, -0.25390f, -0.23898f, -0.23591f, -0.24337f, -0.23372f, -0.20477f, -0.17889f, -0.16924f, -0.19249f, -0.29381f, -0.44251f, -0.44909f, -0.11573f, 0.54179f, 1.30633f, 1.97086f, 2.42310f, 2.54723f, 2.23185f, 1.54012f, 0.72251f, -0.00212f, -0.55436f, -0.94782f, -1.27811f, -1.66060f, -2.05800f, -2.24311f, -2.00405f, -1.36847f, -0.59866f, 0.01893f, 0.34660f, 0.38344f, 0.21808f, 0.01586f, -0.09994f, -0.15038f, -0.19249f, -0.21223f, -0.21924f, -0.30873f, -0.49646f, -0.61182f, -0.48769f, -0.15257f, 0.26633f, 0.70189f, 1.12386f, 1.46906f, 1.69321f, 1.80988f, 1.82217f, 1.65329f, 1.23659f, 0.63917f, -0.00124f, -0.61752f, -1.16319f, -1.48558f, -1.43953f, -1.09388f, -0.68244f, -0.39163f, -0.23328f, -0.11704f, 0.00753f, 0.09087f, 0.11280f, 0.12552f, 0.13561f, 0.09613f, -0.01616f, -0.26618f, -0.68815f, -0.99387f, -0.72192f, 0.13210f, 1.01376f, 1.45108f, 1.42871f, 1.11026f, 0.58521f, -0.03677f, -0.55261f, -0.79868f, -0.76710f, -0.55568f, -0.30302f, -0.15871f, -0.13897f, -0.00300f, 0.50143f, 1.26905f, 1.93840f, 2.29326f, 2.33712f, 2.12044f, 1.71207f, 1.24931f, 0.88787f, 0.68522f, 0.52337f, 0.20053f, -0.38417f, -1.12722f, -1.75578f, -2.00668f, -1.83562f, -1.42110f, -0.94255f, -0.41356f, 0.19527f, 0.57732f, 0.19132f, -0.83114f, -1.50488f, -1.29566f, -0.74561f, -0.39163f, -0.17714f, -0.09511f, -0.24337f, -0.29601f, 0.11807f, 0.73567f, 1.00543f, 0.84269f, 0.53345f, 0.17465f, -0.34645f,}; float ppgf[] = { 0.07078f, -0.15502f, -0.66182f, -1.03541f, -1.72459f, -2.15842f, -2.04216f, -1.59119f, -0.77417f, -0.31896f, 0.15163f, -0.35884f, -0.41444f, 0.05510f, -0.28951f, -0.02326f, 0.22316f, 0.57799f, 1.15132f, 0.64183f, 0.58965f, 0.36472f, 0.11188f, 0.34186f, 0.15807f, 0.81135f, 1.01608f, 0.55277f, 0.54450f, 0.32774f, -0.01952f, -0.34439f, -0.41799f, -0.20301f, -0.01791f, 0.10255f, 0.00523f, 0.16903f, 0.52932f, 0.74430f, 1.13924f, 1.16430f, 1.12622f, 1.05019f, 0.89185f, 0.66460f, 0.13308f, -0.38291f, -0.74298f, -0.85330f, -0.63218f, -0.41113f, -0.35886f, -0.58600f, -0.71572f, -0.55583f, -0.46928f, -0.25638f, -0.17320f, 0.10445f, 0.10632f, 0.07146f, 0.14051f, 0.36468f, 0.83668f, 0.72655f, 0.58951f, 0.32389f, 0.01752f, 0.01875f, -0.01817f, 0.16213f, 0.56257f, 0.83571f, 1.17299f, 0.98729f, 0.55425f, 0.11521f, -0.48786f, -0.80040f, -0.96317f, -1.00710f, -1.09683f, -1.34569f, -1.27919f, -1.35280f, -1.38652f, -1.40933f, -1.33732f, -0.97867f, -0.51023f, -0.24675f, 0.11606f, 0.27271f, 0.17864f, 0.07471f, 0.22955f, 0.39345f, 0.25381f, 0.01551f, 0.12852f, 0.21963f, 0.03517f, -0.04602f, 0.13576f, 0.85180f, 1.86460f, 2.35555f, 2.41968f, 1.97768f, 1.21910f, 0.33612f, -0.16080f, -0.24705f, -0.47261f, -0.80259f, -1.11296f, -1.18643f, -1.32334f, -1.51343f, -1.55018f, -1.32618f, -0.76524f, -0.35730f, -0.09294f, 0.08555f, 0.15369f, -0.05559f, -0.31866f, -0.56963f, -0.59618f, -0.50360f, -0.32950f, -0.15953f, -0.03563f, 0.19904f, 0.35076f, 0.82012f, 1.20897f, 1.51133f, 1.79555f, 1.93426f, 2.07427f, 1.82900f, 1.18362f, 0.53301f, -0.29267f, -1.16473f, -1.41363f, -1.55299f, -1.64776f, -1.90520f, -2.06773f, -1.41664f, -0.79323f, -0.12286f, 0.23294f, 0.41249f, 0.48621f, 0.30788f, 0.14374f, 0.10509f, 0.11559f, -0.16101f, -0.54046f, -0.76358f, -0.79090f, -0.84575f, -0.63308f, -0.28506f, 0.34295f, 0.94167f, 1.33139f, 1.80746f, 1.82377f, 1.77700f, 1.66225f, 1.55474f, 1.36271f, 0.95526f, 0.43909f, -0.14618f, -1.03655f, -1.70492f, -2.00963f, -1.73752f, -1.02179f, -0.53637f, 0.07291f, 0.34977f, 0.45797f, 0.70231f, 0.55596f, 0.34514f, 0.05186f, -0.00824f, 0.03621f, 0.18746f, 0.12550f, -0.20564f, -0.91578f, -1.14904f, -0.58400f, 0.19859f, 0.62565f, 0.45591f, 0.33573f, 0.22186f, -0.08626f, -0.29142f, -0.33939f, -0.15922f, 0.13497f, 0.01640f, -0.01895f, -0.23859f, -0.01758f, 0.73613f, 1.70753f, 2.57627f, 2.92437f, 2.93094f, 2.63186f, 2.15376f, 1.61009f, 0.77352f, 0.16235f, -0.04659f, -0.52407f, -1.90953f, -3.09186f, -3.49785f, -2.40454f, -2.68693f, -1.89685f, -0.40838f, -0.57059f, -0.18267f, -0.11083f, -0.07867f, -0.46683f, -1.49498f, 0.59864f, 0.56797f, -0.01848f, 0.56078f, -0.32204f, -0.10468f, -0.25296f, -0.79938f, 0.44654f, 0.99719f, 0.93283f, 0.75412f, 0.56779f, 0.20835f,}; +int32_t xin_fxp[] = { -10930188, -8094251, -4313003, -1477066, 2304182, 4194807, 413558, -6203627, -8094251, -16602062, -22273936, -19437998, -21328624, -24164560, -27000496, -23219248, -19437998, -18492686, 9866681, 12702617, 7976056, 3249495, -1477066, 413558, 5140119, 5140119, 4194807, 4194807, 4194807, -1477066, -3367690, -6203627, -6203627, -6203627, -7148939, -6203627, -7148939, -6203627, -1477066, 2304182, 4194807, 5140119, 7976056, 8921368, 8921368, 7030744, 4194807, 1358870, 413558, 413558, 4194807, 4194807, 6085432, 7030744, 7030744, 5140119, 413558, -531753, 413558, 5140119, 7030744, 8921368, 7976056, 6085432, 5140119, 3249495, 2304182, 4194807, 4194807, 3249495, 2304182, 2304182, 6085432, 5140119, 4194807, 4194807, 3249495, 413558, 413558, 3249495, 3249495, -531753, -1477066, -1477066, -531753, -1477066, -4313003, -6203627, -8094251, -6203627, -5258315, -5258315, 1358870, 7030744, 8921368, 10811993, 9866681, 5140119, 3249495, 2304182, 1358870, 3249495, 4194807, 4194807, 3249495, 2304182, 3249495, 4194807, 5140119, 4194807, 4194807, 5140119, 5140119, 1358870, -531753, -531753, -531753, 413558, 413558, -1477066, -3367690, -5258315, -6203627, -5258315, -1477066, 1358870, 5140119, 5140119, 6085432, 4194807, 4194807, 2304182, 2304182, 2304182, 3249495, 4194807, 4194807, 3249495, 3249495, 3249495, 2304182, 1358870, 413558, 2304182, 4194807, 5140119, 6085432, 5140119, 4194807, -531753, -2422378, -1477066, -531753, 413558, -531753, -1477066, -3367690, -5258315, -5258315, -3367690, -531753, 2304182, 6085432, 7976056, 8921368, 6085432, 4194807, 3249495, 2304182, 2304182, 2304182, 3249495, 3249495, 2304182, -531753, 413558, 2304182, 3249495, 5140119, 5140119, 2304182, 3249495, 4194807, 5140119, 3249495, -531753, -1477066, 413558, 2304182, 3249495, 1358870, -531753, -2422378, -2422378, -3367690, -3367690, -1477066, 1358870, 3249495, 7976056, 9866681, 9866681, 7976056, 2304182, 413558, -531753, 1358870, 2304182, 413558, -2422378, -2422378, -2422378, -1477066, -2422378, -6203627, -7148939, -7148939, -4313003, -1477066, 3249495, 5140119, 6085432, 7030744, 7976056, 8921368, 8921368, 7976056, 7976056, 4194807, 3249495, 413558, 1358870, 2304182, -3367690, -9984876, -17547374, -15656750, -2422378, -22273936, -81828608, -22273936, -9039564, -531753, 1358870, 5140119, 3249495, 413558, 2304182, 413558, -4313003, -5258315, -4313003, -4313003, -7148939, -9984876, -9984876, 2845080, 7791423, 7791423, 5318293, -2101178, -6429281, -13848753, -17558448, -17558448, -15085318, -13848753, -16321882, -16940122, -8902410, -3337827, 8409663, 31286404, 28194950, 9027988, 9027988, 7173098, 5318293, -1482938, -5192632, -11375540, -13848753, -14466993, -12612104, -10757299, -4574392, -2101178, 4081645, 9646312, 11501117, 13974330, 18920590, 14592571, 9027988, 5936534, 3463404, 1608515, -1482938, -5192632, -13230429, -15085318, -13848753, -10757299, -4574392, -1482938, 4699969, 9027988, 11501117, 13356006, 12737766, 14592571, 12119441, 4081645, 2226840, 1608515, 7173098, 4699969, -2101178, -5192632, -6429281, -7665845, -10138975, -8284086, -5192632, -246373, 2226840, 9027988, 10264552, 15210895, 12119441, 371950, -864613, 2845080, -246373, -2101178, -5192632, -4574392, -3956067, -2719502, -2101178, -246373, 4081645, 7173098, 8409663, 11501117, 371950, -5810956, -2101178, 4699969, 2226840, 371950, 2845080, 2226840, -2719502, -2719502, -2101178, -1482938, 3463404, 6554858, 7791423, 12119441, 9646312, -864613, -2719502, 1608515, 2226840, -246373, -4574392, -6429281, -6429281, -6429281, -5810956, -4574392, 371950, 1608515, 5318293, 5936534, 9027988, 7173098, 1608515, 990191, 2845080, 5318293, 4699969, 990191, -1482938, -3956067, -3956067, -5192632, -5810956, -3956067, -1482938, 371950, 8409663, 10264552, 12737766, 12737766, 9027988, -2719502, -3956067, -246373, -2719502, -4574392, -8284086, -9520651, -8284086, -8284086, -5810956, -3337827, 2226840, 5936534, 5936534, 7791423, 13356006, 3463404, 990191, 2845080, 3463404, 4699969, 371950, -1482938, -3337827, -5192632, -8902410, -8902410, -6429281, -4574392, 990191, 7173098, 9027988, 12737766, 16447460, 12737766, 5318293, -4574392, 990191, 990191, -3337827, -5192632, -7665845, -8284086, -8902410, -8284086, -3956067, -1482938, 371950, 4699969, 5936534, 7791423, 10882877, 14592571, 8409663, -5810956, -1482938, 990191, 2845080, 371950, -4574392, -7047521, -8902410, -10757299, -15085318, -13230429, -10757299, -4574392, 371950, 5318293, 6554858, 12119441, 13974330, 18302348, 17065784, 12119441, 2845080, 4081645, 5318293, 3463404, -2719502, -3956067, -3337827, -7047521, -8902410, -11375540, -11993864, -12612104, -13230429, -11993864, -10757299, -10138975, -7047521, -5810956, -5810956, -5810956, -1482938, 4081645, 12737766, 8409663, 4699969, -246373, 2845080, 2845080, 2845080, -864613, -4574392, -10138975, -12612104, -11993864, -11993864, -11375540, -8902410, -7047521, 371950, 9264966, 10398686, 5863637, 3596112, -938936, -2072657, -5473986, -6607706, -8875231, -10009035, -7741511, -2072657, -2072657, 4729916, 8131161, 8131161, 2462392, 13800015, 18335064, 5863637, -4340182, -7741511, -11142756, -10009035, -5473986, -2072657, -2072657, -3206461, -5473986, -11142756, -11142756, -10009035, -11142756, -13410280, -17945330, -19079134, -17945330, -11142756, -7741511, -11142756, -6607706, -3206461, -3206461, -2072657, -938936, 194867, 1328587, 6997441, 10398686, 16067540, 20602590, 24003834, 28538968, 28538968, 21736310, 20602590, 13800015, 10398686, 3596112, 194867, -938936, -4340182, -3206461, -2072657, -4340182, -7741511, -7741511, -5473986, -938936, 1328587, 5863637, 5863637, -938936, -938936, 5863637, 5863637, 1328587, 2462392, 1328587, 1328587, 2462392, 3596112, 2462392, 1328587, 194867, 1328587, 2462392, 1328587, 4729916, 5863637, 4729916, 4729916, 3596112, 194867, -938936, 1328587, 1328587, -2072657, -4340182, -7741511, -8875231, -6607706, -5473986, -4340182, -4340182, -3206461, 2462392, 2462392, 3596112, 2462392, 1328587, 194867, 1328587, 1328587, 194867, -4340182, -4340182, -938936, 194867, 1328587, 1328587, 1328587, 9264966, 6997441, 3596112, 2462392, 194867, 194867, -2072657, -3206461, -3206461, -5473986, -6607706, -10009035, -7741511, -5473986, -4340182, -2072657, -2072657, -2072657, -938936, 2462392, 9264966, 6997441, 5863637, 4729916, 3596112, 2462392, 2462392, 194867, 194867, -2072657, -938936, 2462392, 3596112, 4729916, 3596112, 5863637, 10398686, 8131161, 5863637, 4729916, 2462392, -2072657, -3206461, -3206461, -3206461, -7741511, -8875231, -11142756, -11142756, -7741511, -4340182, -3206461, -938936, 194867, 194867, 2462392, 6997441, 3596112, 2462392, 1328587, 1328587, 1328587, 1328587, 1328587, 194867, 194867, 2462392, 3596112, 4729916, 4729916, 4729916, 4729916, 9264966, 11532491, 4729916, 3596112, 3596112, 194867, -2072657, -4340182, -5473986, -5473986, -7741511, -12276560, -12276560, -11142756, -7741511, -6607706, -6607706, -5473986, -2072657, -938936, 194867, -4340182, -4340182, 2462392, 2462392, 3596112, 2462392, -2072657, -2072657, -2072657, -2072657, -3206461, -3206461, -4340182, -3206461, -2072657, 1328587, 5863637, 8131161, 9264966, 6997441, 6997441, 6997441, 10398686, 18335064, 60284312, 14933736, 2462392, -12276560, -16811610, -13410280, -13410280, -8875231, -7741511, -11142756, -10009035, -8875231, -8875231, -11142756, -10009035, -7741511, -6607706,}; +int32_t xout_fxp[] = { 3292444, 4064364, 6155812, 6195155, 9257332, 12694145, 14824432, 15372208, 11068433, 6837806, 240920, 803041, -2612380, -10057857, -9889917, -13348624, -14127926, -13259369, -13576039, -6042062, -3006561, 2408956, 8713583, 10121694, 13602547, 8648571, 5610217, 6115295, 1482183, -1582259, -2944820, -3424565, -4220057, -5066719, -4297735, -3545696, -1647522, -1001515, -697596, 788613, -257781, 459192, 79524, -1251244, -2491332, -3870839, -3522208, -3060919, -2428502, -2139598, -3479343, -4663982, -5054556, -3108734, -570760, 899091, 2730324, 2408872, 2590486, 964186, 1625544, 3065952, 4113102, 3292360, -950429, -1343771, -2129699, -2646522, -3432199, -5790069, -4748120, -2286650, -269945, 2523880, 3403510, 5818674, 6790913, 5984852, 6878491, 6048857, 4168970, 1402491, -873086, -1607760, -4148837, -4068558, -2950441, -742727, 1696260, 1874098, 141264, -1049750, -3662718, -4616167, -3054376, -765963, -604063, -1357109, -461708, 856644, -1036999, -3103868, -2822263, -1979292, -56371, 994301, 582337, 1758839, 1761691, 2003367, 3093970, 5551916, 6018826, 3588982, 1830981, 592067, -281521, -2410131, -3465669, -2864625, -1194873, 594584, 105864, -475550, -1600378, -2847429, -3294038, -1512633, 631578, 2817817, 3283385, 2723865, 1344358, -276404, -2165770, -5381711, -6709628, -7850395, -5596711, -1719664, 1470606, 4100603, 3967476, 3379350, 2990622, 1589557, 2437310, 5120154, 3907497, 2305860, -107709, -1281779, -1471194, -4927552, -4825463, -3991299, -1795162, -552725, -862097, -753297, -1072651, -1719916, -2231034, -264073, 2753392, 4566171, 4044819, 2930057, 178341, -1699783, -4156723, -5665162, -5280628, -5734452, -2975439, -702881, 1238410, 2243281, 2437561, 2359967, 1678392, 1215844, 3515078, 4544360, 4396050, 2499721, -604734, -1225324, -3896760, -4890978, -4823533, -5828237, -3901373, -1949009, 617904, 1206701, 502645, -1708088, -3285650, -4047503, -655066, 3582942, 6007082, 6838141, 6924125, 8160438, 6497228, 3048084, 415152, -2191020, -3852803, -5099267, -5793592, -2679489, -1172391, 835673, 122389, -1968722, -3678320, -5350757, -5294134, -4981239, -4290102, -3705164, -3026526, 959237, 4386235, 4781087, 6078385, 12795647, 16480595, 14613458, 3337407, 7141390, 3990880, -4480942, 1317263, 3170306, 5772620, 2264756, -3056053, -83131, -15890456, -11019024, -3130209, -6190121, 1903626, -1163416, -361045, 7696128, 2425398, 69122, -756149, -1851114, -3297897, -4653916,}; float result[256]; float layer1Output[3*256]; +fxp32* weights1Fxp[63]; +fxp32* weights2Fxp[3]; + +fxp32 layer1OutputFxp[3*256]; + +fxp32 resultFxp[256]; + #endif // clang-format on @@ -49,13 +58,13 @@ int main() { layer1.dim = (Dim2D){3u, 21u}; layer1.padding = SAME; layer1.weightsFloat = weights1; - layer1.weightsFxp = NULL; + layer1.weightsFxp = weights1Fxp; Conv2DLayer layer2; layer2.dim = (Dim2D){3u, 1u}; layer2.padding = VALID; layer2.weightsFloat = weights2; - layer2.weightsFxp = NULL; + layer2.weightsFxp = weights2Fxp; Cnn cnn; cnn.layer1 = &layer1; @@ -63,17 +72,34 @@ int main() { cnn.inputDim = (Dim2D){3u, 256u}; cnn.outputDim = (Dim2D){1u, 256u}; cnn.layer1Output = layer1Output; - cnn.layer1OutputFxp = NULL; + cnn.layer1OutputFxp = layer1OutputFxp; // Forward pass printf("Forward pass\n"); Cnn_forwardFloat(&cnn, xin, result); - // Cnn_forwardFxp(&cnn, inp_fxp, result_fxp); printf("Comparing results\n"); - compareVectorsFloat(result, xout, 15, COMP_PREC); - // compareVectorsFxp(result_fxp, res_fxp, 15, COMP_PREC_I32 * 10); + compareVectorsFloat(result, xout, 256, COMP_PREC); + printf("Test passed\n"); + + printf("Test predict method\n"); + Cnn_predictFloat(&cnn, xin, ppg, result); + + compareVectorsFloat(result, ppgf, 256, COMP_PREC); + printf("Test passed\n"); + + // Freeze model + printf("Freezing model\n"); + Cnn_freezeModel(&cnn); + + printf("Running cnn test with fixed point\n"); + Cnn_forwardFxp(&cnn, xin_fxp, resultFxp); + + printf("Comparing results\n"); + compareVectorsFxp(resultFxp, xout_fxp, 256, COMP_PREC_I32); printf("Test passed\n"); + printf("CNN test finished\n"); + return 0; } \ No newline at end of file From db5cfc468f8327b6d38e31970ab1c8ffaf78c16d Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Tue, 14 May 2024 17:18:10 +0200 Subject: [PATCH 19/27] Add loss function and test --- sw/applications/l_loss/cnn.c | 87 +++++++++++++ sw/applications/l_loss/cnn.h | 44 +++++++ sw/applications/l_loss/conv2dlayer.c | 163 ++++++++++++++++++++++++ sw/applications/l_loss/conv2dlayer.h | 49 +++++++ sw/applications/l_loss/fxp32.c | 56 ++++++++ sw/applications/l_loss/fxp32.h | 26 ++++ sw/applications/l_loss/main.c | 149 ++++++++++++++++++++++ sw/applications/l_loss/testdata_learn.h | 30 +++++ sw/applications/l_loss/utils.h | 162 +++++++++++++++++++++++ 9 files changed, 766 insertions(+) create mode 100644 sw/applications/l_loss/cnn.c create mode 100644 sw/applications/l_loss/cnn.h create mode 100644 sw/applications/l_loss/conv2dlayer.c create mode 100644 sw/applications/l_loss/conv2dlayer.h create mode 100644 sw/applications/l_loss/fxp32.c create mode 100644 sw/applications/l_loss/fxp32.h create mode 100644 sw/applications/l_loss/main.c create mode 100644 sw/applications/l_loss/testdata_learn.h create mode 100644 sw/applications/l_loss/utils.h diff --git a/sw/applications/l_loss/cnn.c b/sw/applications/l_loss/cnn.c new file mode 100644 index 000000000..3e724209c --- /dev/null +++ b/sw/applications/l_loss/cnn.c @@ -0,0 +1,87 @@ +// clang-format off + +#include "cnn.h" + +#include +// clang-format on + +#ifdef DYN_ALLOCATION +CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, + Conv2DPadding layer2Pad) { + CnnHandle self = (CnnHandle)malloc(sizeof(Cnn)); + // Create layers + self->layer1 = Conv2DLayer_create(layer1Dim, layer1Pad); + self->layer2 = Conv2DLayer_create(layer2Dim, layer2Pad); + + self->inputDim = inputDim; + // TODO: calculate + self->outputDim = (Dim2D){1u, 256u}; + return self; +} + +void Cnn_destroy(CnnHandle self) { + Conv2DLayer_destroy(self->layer1); + Conv2DLayer_destroy(self->layer2); + free(self); +} +#endif + +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output) { +#ifdef DYN_ALLOCATION + fxp32* layer1Output = (fxp32*)calloc(self->inputDim.x * self->inputDim.y, sizeof(fxp32)); + Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1Output, output); + free(layer1Output); +#else + Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, self->layer1OutputFxp); + Conv2DLayer_forwardFxp(self->layer2, self->inputDim, self->layer1OutputFxp, output); +#endif +} + +void Cnn_forwardFloat(CnnHandle self, float* input, float* output) { +#ifdef DYN_ALLOCATION + float* layer1Output = (float*)calloc(self->inputDim.x * self->inputDim.y, sizeof(float)); + Conv2DLayer_forwardFloat(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFloat(self->layer2, self->inputDim, layer1Output, output); + free(layer1Output); +#else + Conv2DLayer_forwardFloat(self->layer1, self->inputDim, input, self->layer1Output); + Conv2DLayer_forwardFloat(self->layer2, self->inputDim, self->layer1Output, output); +#endif +} + +void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output) { + Cnn_forwardFxp(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} + +void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output) { + Cnn_forwardFloat(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} + +float Cnn_sampleLoss(CnnHandle self, complex_t* ypred, complex_t* ytrue) { + float loss = 0.0f; + // NOTE: could be optimized by reusing .r and .i for the abs +#ifdef DYN_ALLOCATION + fxpMul* abs = (fxpMul*)calloc(self->outputDim.y * self->outputDim.x, sizeof(fxpMul)); +#else // NOTE: this is on the stack could be pre-allocated as well or static... + fxpMul abs[self->outputDim.y * self->outputDim.x]; +#endif + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + abs[i] = fxp32_pow2(ytrue[i].r - ypred[i].r) + fxp32_pow2(ytrue[i].i - ypred[i].i); + } + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + loss += fxp32_fxpMulToFloat(abs[i]); + } + return loss; +} + +void Cnn_freezeModel(CnnHandle self) { + Conv2DLayer_transformWeightsToFxp(self->layer1); + Conv2DLayer_transformWeightsToFxp(self->layer2); +} \ No newline at end of file diff --git a/sw/applications/l_loss/cnn.h b/sw/applications/l_loss/cnn.h new file mode 100644 index 000000000..123a80ebf --- /dev/null +++ b/sw/applications/l_loss/cnn.h @@ -0,0 +1,44 @@ +#ifndef CNN_H +#define CNN_H + +#include "conv2dlayer.h" +#include "fxp32.h" + +/** + * @brief a two layer cnn model + */ +typedef struct __Cnn { + Conv2DLayerHandle layer1; + Conv2DLayerHandle layer2; + Dim2D inputDim; + Dim2D outputDim; +#ifndef DYN_ALLOCATION + float* layer1Output; + fxp32* layer1OutputFxp; +#endif +} Cnn; + +typedef struct __Cnn* CnnHandle; + +typedef struct __complex_t { + int32_t r; + int32_t i; +} complex_t; + +#ifdef DYN_ALLOCATION +CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Conv2DPadding layer1Pad, + Conv2DPadding layer2Pad); +void Cnn_destroy(CnnHandle self); +#endif + +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output); +void Cnn_forwardFloat(CnnHandle self, float* input, float* output); + +// void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); +// void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output); + +float Cnn_sampleLoss(CnnHandle self, complex_t* ypred, complex_t* ytrue); + +void Cnn_freezeModel(CnnHandle self); + +#endif // CNN_H \ No newline at end of file diff --git a/sw/applications/l_loss/conv2dlayer.c b/sw/applications/l_loss/conv2dlayer.c new file mode 100644 index 000000000..e64b597fa --- /dev/null +++ b/sw/applications/l_loss/conv2dlayer.c @@ -0,0 +1,163 @@ +#include "conv2dlayer.h" + +#include +#include +#include + +#ifdef DYN_ALLOCATION +Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding) { + Conv2DLayerHandle self = (Conv2DLayerHandle)malloc(sizeof(Conv2DLayer)); + self->dim = dim; + self->padding = padding; + self->weightsFxp = (fxp32*)calloc(dim.x * dim.y, sizeof(fxp32)); + self->weightsFloat = (float*)calloc(dim.x * dim.y, sizeof(float)); + return self; +} + +void Conv2DLayer_destroy(Conv2DLayerHandle self) { + free(self->weightsFxp); + free(self->weightsFloat); + free(self); +} +#endif + +bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights) { + memcpy(self->weightsFxp, weights, self->dim.x * self->dim.y * sizeof(fxp32)); + // TODO: make sure we get the right size of weights + return true; +} + +bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights) { + memcpy(self->weightsFloat, weights, self->dim.x * self->dim.y * sizeof(float)); + // TODO: make sure we get the right size of weights + return true; +} + +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self) { + for (int i = 0; i < self->dim.x * self->dim.y; ++i) { + self->weightsFxp[i] = fxp32_fromFloat(self->weightsFloat[i]); + } +} + +// Could be optimized +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid) { + + // float max = 0; + // float min = 0; + + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + fxp32 sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = 0; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + // printf("input access: %d, %d\n", m, n); + in = input[m * iny + n]; + // printf("kernel access: %d, %d\n", m - i + cx, n - j + + // cy); + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += fxp32_mul(w, in); + // if (sum > max) { + // max = sum; + // } + // if (sum < min) { + // min = sum; + // } + // printf("factor: %.2f\n", w * in); + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + // printf("access output: %d\n", (i-cx)*(iny - kery + 1) + (j-cy)); + // printf("sum = %.2f\n", sum); + } + } + // printf("max: %.6f, min: %.6f\n", max, min); +} + +// Could be optimized +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid) { + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + float sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = .0f; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + in = input[m * iny + n]; + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += w * in; + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + } + } +} + +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output) { + convolve2DFxp(input, output, self->weightsFxp, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); +} + +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output) { + convolve2DFloat(input, output, self->weightsFloat, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); +} \ No newline at end of file diff --git a/sw/applications/l_loss/conv2dlayer.h b/sw/applications/l_loss/conv2dlayer.h new file mode 100644 index 000000000..d4e14f3ef --- /dev/null +++ b/sw/applications/l_loss/conv2dlayer.h @@ -0,0 +1,49 @@ +#ifndef CONV2DLAYER_H +#define CONV2DLAYER_H + +#include +#include + +#include "fxp32.h" + +// TODO: perhaps we could optimize but we need 256 as value... +/** + * @brief a 2D dimension + * @param x the rows of a matrix + * @param y the columns of a matrix + */ +typedef struct __Dim2D { + uint16_t x; + uint16_t y; +} Dim2D; + +typedef enum __Conv2DPadding { + VALID, + SAME +} Conv2DPadding; + +typedef struct __Conv2DLayer { + Dim2D dim; + Conv2DPadding padding; + fxp32* weightsFxp; + float* weightsFloat; +} Conv2DLayer; + +typedef struct __Conv2DLayer* Conv2DLayerHandle; + +#ifdef DYN_ALLOCATION +Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding); +void Conv2DLayer_destroy(Conv2DLayerHandle self); +#endif + +bool Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights); +bool Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights); +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self); + +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output); +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output); + +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid); +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid); + +#endif // CONV2DLAYER_H \ No newline at end of file diff --git a/sw/applications/l_loss/fxp32.c b/sw/applications/l_loss/fxp32.c new file mode 100644 index 000000000..2755f99a9 --- /dev/null +++ b/sw/applications/l_loss/fxp32.c @@ -0,0 +1,56 @@ +#include "fxp32.h" + +fxp32 fxp32_fromFloat(float f) { + return (fxp32)(f * (1 << FRACTIONAL_BITS)); +} + +float fxp32_toFloat(fxp32 x) { + return (float)x / (1 << FRACTIONAL_BITS); +} + +float fxp32_fxpMulToFloat(fxpMul x) { + return (float)x / ((fxpMul)1 << FRACTIONAL_BITS); +} + +fxp32 fxp32_fromInt(int i) { + return i << FRACTIONAL_BITS; +} + +fxp32 fxp32_mul(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS); +} + +fxpMul fxp32_mul64(fxp32 a, fxp32 b) { + return ((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS; +} + +fxpMul fxp32_pow2(fxp32 a) { + return ((fxpMul)a * (fxpMul)a) >> FRACTIONAL_BITS; +} + +fxp32 fxp32_div(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a << FRACTIONAL_BITS) / b); +} + +bool fxp32_closefxp(fxp32 a, fxp32 b, fxp32 prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= prec; +} + +fxp32 fxp32_sqrt(fxp32 a) { + fxp32 guess = a >> 1; + fxp32 eps = fxp32_fromFloat(0.0001); + while (fxp32_closefxp(guess, fxp32_div(a, guess), eps) == false) { + guess = (guess + fxp32_div(a, guess)) >> 1; + } + return guess; +} + +bool fxp32_close(fxp32 a, fxp32 b, float prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= fxp32_fromFloat(prec); +} diff --git a/sw/applications/l_loss/fxp32.h b/sw/applications/l_loss/fxp32.h new file mode 100644 index 000000000..878d82855 --- /dev/null +++ b/sw/applications/l_loss/fxp32.h @@ -0,0 +1,26 @@ +#ifndef FXP_H +#define FXP_H + +#include +#include + +#define FRACTIONAL_BITS 23 +typedef int32_t fxp32; +typedef int64_t fxpMul; + +fxp32 fxp32_fromFloat(float f); +float fxp32_toFloat(fxp32 x); +float fxp32_fxpMulToFloat(fxpMul x); +fxp32 fxp32_fromInt(int i); + +fxp32 fxp32_mul(fxp32 a, fxp32 b); +fxpMul fxp32_mul64(fxp32 a, fxp32 b); +fxpMul fxp32_pow2(fxp32 a); + +fxp32 fxp32_div(fxp32 a, fxp32 b); + +fxp32 fxp32_sqrt(fxp32 a); + +bool fxp32_close(fxp32 a, fxp32 b, float prec); + +#endif // FXP_H \ No newline at end of file diff --git a/sw/applications/l_loss/main.c b/sw/applications/l_loss/main.c new file mode 100644 index 000000000..67712f6a5 --- /dev/null +++ b/sw/applications/l_loss/main.c @@ -0,0 +1,149 @@ +#include "fxp32.h" +#include "cnn.h" +#include "conv2dlayer.h" +#include "utils.h" + +#include +#include +#include +#include + +#include "testdata_learn.h" + +#define COMP_PREC 0.000001f + +#ifndef DYN_ALLOCATION +complex_t ypred[256]; +complex_t ytrue[256]; +#endif + +void test_learn_sampleLoss() { +#ifdef DYN_ALLOCATION + complex_t* ypred = (complex_t*)calloc(256, sizeof(complex_t)); + complex_t* ytrue = (complex_t*)calloc(256, sizeof(complex_t)); +#endif + for (int i = 0; i < 256; ++i) { + ypred[i].r = fxp32_fromFloat(arp[i]); + ypred[i].i = fxp32_fromFloat(aip[i]); + ytrue[i].r = fxp32_fromFloat(art[i]); + ytrue[i].i = fxp32_fromFloat(ait[i]); + } + Cnn cnn; + cnn.outputDim = (Dim2D){1u, 256u}; + float loss = Cnn_sampleLoss(&cnn, ypred, ytrue); + PRINTF("Loss: %d\n", (int)(loss*SCL)); + PRINTF("Expected: %d\n", (int)(ares*SCL)); + PRINTF("Diff: %d\n", (int)((ares - loss)*SCL)); + assert_closef(loss, ares, 0.005f); +#ifdef DYN_ALLOCATION + free(ypred); + free(ytrue); +#endif +} + +void test_learn_sampleLoss_2() { +#ifdef DYN_ALLOCATION + complex_t* ypred = (complex_t*)calloc(256, sizeof(complex_t)); + complex_t* ytrue = (complex_t*)calloc(256, sizeof(complex_t)); +#endif + for (int i = 0; i < 256; ++i) { + ypred[i].r = fxp32_fromFloat(arp_2[i]); + ypred[i].i = fxp32_fromFloat(aip_2[i]); + ytrue[i].r = fxp32_fromFloat(art_2[i]); + ytrue[i].i = fxp32_fromFloat(ait_2[i]); + } + Cnn cnn; + cnn.outputDim = (Dim2D){1u, 256u}; + float loss = Cnn_sampleLoss(&cnn, ypred, ytrue); + PRINTF("Loss: %d\n", (int)(loss*SCL)); + PRINTF("Expected: %d\n", (int)(ares_2*SCL)); + PRINTF("Diff: %d\n", (int)((ares_2 - loss)*SCL)); + assert_closef(loss, ares_2, 0.005f); +#ifdef DYN_ALLOCATION + free(ypred); + free(ytrue); +#endif +} + +void test_learn_sampleLoss_3() { +#ifdef DYN_ALLOCATION + complex_t* ypred = (complex_t*)calloc(256, sizeof(complex_t)); + complex_t* ytrue = (complex_t*)calloc(256, sizeof(complex_t)); +#endif + for (int i = 0; i < 256; ++i) { + ypred[i].r = fxp32_fromFloat(arp_3[i]); + ypred[i].i = fxp32_fromFloat(aip_3[i]); + ytrue[i].r = fxp32_fromFloat(art_3[i]); + ytrue[i].i = fxp32_fromFloat(ait_3[i]); + } + Cnn cnn; + cnn.outputDim = (Dim2D){1u, 256u}; + float loss = Cnn_sampleLoss(&cnn, ypred, ytrue); + PRINTF("Loss: %d\n", (int)(loss*SCL)); + PRINTF("Expected: %d\n", (int)(ares_3*SCL)); + PRINTF("Diff: %d\n", (int)((ares_3 - loss)*SCL)); + assert_closef(loss, ares_3, 0.01f); +#ifdef DYN_ALLOCATION + free(ypred); + free(ytrue); +#endif +} + +void test_learn_sampleLoss_4() { +#ifdef DYN_ALLOCATION + complex_t* ypred = (complex_t*)calloc(256, sizeof(complex_t)); + complex_t* ytrue = (complex_t*)calloc(256, sizeof(complex_t)); +#endif + for (int i = 0; i < 256; ++i) { + ypred[i].r = fxp32_fromFloat(arp_4[i]); + ypred[i].i = fxp32_fromFloat(aip_4[i]); + ytrue[i].r = fxp32_fromFloat(art_4[i]); + ytrue[i].i = fxp32_fromFloat(ait_4[i]); + } + Cnn cnn; + cnn.outputDim = (Dim2D){1u, 256u}; + float loss = Cnn_sampleLoss(&cnn, ypred, ytrue); + PRINTF("Loss: %d\n", (int)(loss*SCL)); + PRINTF("Expected: %d\n", (int)(ares_4*SCL)); + PRINTF("Diff: %d\n", (int)((ares_4 - loss)*SCL)); + assert_closef(loss, ares_4, 0.01f); +#ifdef DYN_ALLOCATION + free(ypred); + free(ytrue); +#endif +} + +void test_sampleLossFloat() { + float loss = 0.0f; + for (int i = 0; i < 256; ++i) { + loss += (art[i] - arp[i])*(art[i] - arp[i]) + (ait[i] - aip[i])*(ait[i] - aip[i]); + } + PRINTF("Loss float: %d\n", (int)(loss*SCL)); +} + +void test_sampleLossFloat_2() { + float loss = 0.0f; + for (int i = 0; i < 256; ++i) { + loss += (art_2[i] - arp_2[i])*(art_2[i] - arp_2[i]) + (ait_2[i] - aip_2[i])*(ait_2[i] - aip_2[i]); + } + PRINTF("Loss 2 float: %d\n", (int)(loss*SCL)); +} + +int main() { + PRINTF("====== Test Learn ========\n"); + PRINTF("====== Test Loss =======\n"); + test_learn_sampleLoss(); + test_sampleLossFloat(); + test_learn_sampleLoss_2(); + test_sampleLossFloat_2(); + test_learn_sampleLoss_3(); + test_learn_sampleLoss_4(); + PRINTF("====== Test passed =======\n"); + PRINTF("====== Test Learn end ====\n\n"); + return EXIT_SUCCESS; +} + +/* +Note: Error is about 100x smaller than the last step with 1000 epochs so I think it's fine since we average +TODO: Figure out why the error is consistently a multiple of the same value +*/ \ No newline at end of file diff --git a/sw/applications/l_loss/testdata_learn.h b/sw/applications/l_loss/testdata_learn.h new file mode 100644 index 000000000..222f608a4 --- /dev/null +++ b/sw/applications/l_loss/testdata_learn.h @@ -0,0 +1,30 @@ +#ifndef TESTDATA_LEARN_H +#define TESTDATA_LEARN_H + +#pragma GCC diagnostic ignored "-Wunused-variable" + +float arp[] = { 3.8620986701, 4.1452136173, -0.2327150445, 0.1657330663, -4.6537758352, -5.3887113380, 1.8724638771, -9.4767683695, -3.0047269511, -8.4616192937, -9.8370617690, -8.2139992662, 3.1838127564, 5.0188572979, 5.5466502681, 6.0785824992, -9.6929461143, -6.8873507343, -6.3238766836, 3.7737047451, 0.8370054295, 1.7090216802, -3.2517527469, 7.6818595344, 2.6401835699, 6.6449582743, -3.7181929670, -2.9741664129, 4.0930120477, 9.5700598110, 8.6884214119, 5.1226560371, 3.0985291921, -3.1379207339, -4.2477646306, 4.9549153731, -1.7324186556, 9.4774042054, -8.7168798237, 4.0324174209, -4.6202880387, -3.8706768706, 8.2177343651, 6.1635247424, -9.4754010676, 2.7817199667, -3.0151139422, -4.1151017064, 9.0958754239, -6.4865660508, -9.2589833347, -6.0375861240, -3.7855234885, -5.6497650722, -4.3469652128, -0.7169717582, -9.6322521632, 9.5006526095, 0.2984179859, -3.9211316085, 4.3973829000, 7.5054741808, -7.4042987530, -1.4970097415, -7.2085582000, -3.3185901861, -0.8788032139, 8.5463661824, 4.3837046567, -0.9024541307, 9.3285306179, -0.9854229929, 1.7688779780, -9.4849076903, 0.6500411728, -5.9055421191, -7.8750751594, -4.4764114395, -6.2725840398, -7.9641815332, 6.4526843224, -0.8454012957, 0.4821503040, -3.6145357924, -4.3669942866, -0.5123830820, 9.3066033622, 7.1066955479, -9.8795154942, -5.8669824164, -8.0816313369, -9.8608154025, -0.4240679261, 7.5262423085, 7.9555968068, 1.4391275682, 6.1716204854, 0.3653752390, 4.5835614546, 2.7810827482, 1.6362777886, -8.4611476253, 6.2952487819, 6.2358647223, 5.1234954665, 2.1999246908, 5.2347343945, 5.5414910599, -1.2612095330, 3.3580604697, -3.1910857647, -5.5068942013, -2.3149891293, 8.8893099535, -9.6295577587, 1.9675986028, -6.5339927204, -8.3724538046, -4.4259209116, 8.3463780977, 1.1472919448, 0.1590388958, -1.3459256237, 4.0460269605, 1.0759483840, -8.4195630336, 9.8973265093, 6.7076895445, -7.4049458683, -1.8479420626, 3.4259084397, -7.7013560494, 7.7252306359, 8.7847825977, 6.5056826083, -8.2268122106, -8.2271324465, -8.0395079831, -0.8420331777, 1.5567536530, 8.1894506951, 9.0714095257, -2.2808145519, -7.6661282298, -0.4214896859, -3.3377951973, 2.9770145380, 2.8150138631, 3.7280385481, 4.5095620185, 0.7420588140, -4.8162854997, -0.1370541908, 0.4402096779, -1.5405872031, 8.8181769287, 3.3830251014, 5.3330642336, -4.3386856978, -5.3269329944, -0.6742515631, -2.6202036395, 4.8498689971, 1.8515460396, -6.3008634869, 1.4948282882, 1.9531430620, -4.1610736208, 6.2439655452, -0.5865809211, 7.5117601146, -1.1893284400, -9.7672539358, -4.6607924029, -3.4423414436, -3.5390793882, 6.1045718129, 2.2554731926, 6.2870108582, 2.3756614067, 1.1259892731, 7.8152516195, -7.3088116710, 2.4595287663, 2.6514728321, 6.4570080579, 1.8820700409, 0.0706323358, -4.7683066493, 3.5219585789, -3.4841430535, 3.7204630426, 3.9247417688, -4.9445687001, 8.4478655808, -0.6039699163, 3.2105398740, -6.7701855040, 2.5177816802, 7.2744943593, 5.6915893352, 1.2031848520, -4.0196127508, 1.7983276085, -9.8023256379, -0.1782207856, 9.7905303486, -8.5562875112, -1.4007090873, -4.4378571925, -4.8316948425, -6.2778697113, 2.1138822017, 4.2982130230, 6.0477259151, 1.7713210242, -1.6021948470, 3.6713045211, -6.1780705861, 2.6335023223, -6.3268407337, 7.8606891126, -1.4508800453, 4.6122409212, 3.1585626835, -3.9417771165, 6.7476064466, -5.6955153344, -8.8484649723, 6.8377941532, -4.5035479841, -7.9456291933, 2.9779808083, 0.5368780666, -1.1628147330, 1.5694840350, 9.1350293573, -8.9896694545, 8.5143125989, -9.1430273761, 8.0187018959, 7.0766542064, 8.2002951533, 8.6697088670, -2.8736505896, 8.4874961681, -9.9419675856, -9.9126072549, 7.7671751106, -6.1183121618, -9.5955006217, -9.8201315667, 3.4160028879, -6.2076543223, 5.8909380655, 4.8318157214,}; +float aip[] = { -4.4373920397, 5.6172445040, 7.6589014495, 0.4190239616, -8.1083570179, -7.0089352668, -1.6725473163, 9.2234212340, -2.9897911415, 2.3208132203, 5.8541076871, -5.2936918523, -2.7806073641, -2.6621239255, 1.6585959976, 3.5049533099, 5.9289910212, -2.6979369676, 8.7745308400, -8.4163258147, 1.9272259229, 6.2184766952, -7.0617285677, -7.0768704553, 9.1258458784, 7.5867989331, 5.6427679800, 8.8495817118, 2.0510126289, 9.9957815744, -0.3913193373, 6.4314678024, 0.7648134777, 6.0690035622, -0.7763738621, -7.5430687320, -9.9321745889, 2.3139261073, 6.7663419315, -6.6038453768, -7.0046414756, -2.4244312004, 0.0469744253, 4.9087148371, -8.6771450894, -7.0135797203, -8.1146828502, -5.0501565061, 3.1023149290, -0.9799450907, 8.4377115874, 0.5507824910, 2.7325602686, -2.4370268768, 5.5986968987, -5.7773035544, -5.0797213957, -7.3255616344, -6.7876943292, -4.5430150410, -0.0366762531, 3.1238716780, 4.6857606026, 8.6796813565, -7.8899714272, 0.4973970463, 8.6671447385, -6.4318423650, 2.5101521211, 9.4386364654, 2.1218099984, -2.9224394802, 6.1260027814, 8.1073149051, 0.3055693951, -9.8504898734, -8.6626673050, -9.8506057756, 1.4009397648, -0.1389886927, -4.2188571211, 8.6093760528, -3.1142229011, 2.2576231181, -5.1850903333, -9.3943653260, -7.0351791406, -6.4720383470, -1.8034268579, 1.1239745014, -6.0508049091, 1.5733429533, 2.9722836451, -7.3893127268, 4.1898247247, 2.3423954993, 1.3795578750, -9.2078874823, -7.0909341886, 6.4416279557, -4.6388666188, -8.6251493108, 7.0568271308, 0.6543685493, -3.9099186362, -7.1735916591, 1.0851013208, -0.7550856903, 6.8406957408, -6.8384593129, -3.1815511239, -9.7533054324, 0.1116260023, 6.2472267009, -7.1369702153, -5.2850572311, -6.7313290910, 8.4338037293, -8.8891814611, 3.3863504808, -4.2808620084, -2.7811803241, 6.2763266481, -9.7802891367, 2.0076324483, 0.9116814467, 7.2907567979, -5.6782009804, -2.6931569900, -7.5607161480, 1.8518744797, 9.8864594507, -5.4902935310, 9.2746280820, 1.7560225684, 9.2359682736, 6.4505435737, 9.0976559809, 3.5526687382, 0.3426111254, -1.2962896471, 5.8601986427, 6.9472346597, -3.0637093731, 3.6017500676, 3.0695655630, -9.6251271256, -8.0284703849, 3.9907445966, -4.4217722592, -1.8144898335, 8.5024959073, -4.0434942816, 6.8706758295, 1.4382471606, -6.9694858733, -0.1005103121, -7.7060911499, -3.7857665331, -2.6285824190, -7.8942909490, 5.4566995334, 9.3234511625, 9.4665921858, 2.8772836522, -5.3797992743, 7.8647303587, -3.3082604126, -3.3286749662, 1.5431125264, -5.0402209320, -4.0413295994, 0.3665805070, -6.6798516420, -7.1613033600, -7.1448037565, 3.4512552868, -2.0338700733, 9.2069271511, 2.9831418838, -4.3497766540, 3.9052601527, -0.5342047576, -6.4792937820, -4.9602048191, 0.5331105864, 8.4889044357, 4.6917605219, -7.0913759963, 0.2414521078, -3.5347292334, -7.7416350531, 4.1907384327, -8.5574405486, 6.4268218180, -8.2498225011, 1.2498345579, 2.1027855942, -1.9667010078, 1.9040059306, -3.4031210085, 8.8557398540, -2.1294586193, -4.0050503961, -6.6493903411, -9.9236275916, 0.1476953552, -1.7970955463, 2.0414694097, -2.0969223247, 0.6025904283, -5.2375543410, 2.8506169801, -3.7038536018, 7.0504788912, 0.0041812043, -3.1318777780, 0.0406210072, 9.4136713118, -4.3374537974, 7.6535052246, -7.1866522735, -0.9247636109, 9.0419042675, -2.0183277096, 4.5250891726, 2.5919844679, -8.3906116878, 7.6545271748, 9.7958190644, -4.5146458948, -9.7346261457, -8.0471920338, -9.8964854783, 5.6181590828, -3.7744811576, 3.0075788312, -8.1240567340, 5.8890623157, 2.0499830861, 9.2935022619, 9.8759360530, -7.2875549341, -1.0690799807, -4.1226995364, -9.1444094530, -3.5231868830, 2.7235577162, -6.6002408405, 1.5018441154, 1.4823951848, -5.7437740172, 7.4023902109, 5.7612579412, 5.0161525239, -5.5946364326,}; +float art[] = { -6.7492393504, -7.3298555798, 0.8539318143, -1.4261566917, 2.0803185595, -1.3887044390, -4.0825360822, -0.6593325928, 5.6461504146, 4.7727519551, 0.0646925848, -4.9741489070, -1.9229377293, -2.9067049692, -0.3142968228, 1.8833826679, -9.1454342720, -6.0740635190, -7.4625715930, -9.3353812743, 3.7967361534, 4.1668166654, -6.5679814423, 4.7309852399, 9.6095062143, -2.6075815413, 0.6900264281, -4.6487916387, -7.6322589805, 3.2042905955, 7.8963032972, 1.6473116490, -6.6998082359, 7.6038350314, -7.1927389822, 9.1476275423, 6.2909998653, 1.4642273357, -9.5501951739, 9.8495042602, 9.2792346626, 6.5850142911, 9.1481147155, 3.2717620201, -6.2570477736, 0.6467967566, -2.0566205953, 7.8755278035, 3.3126740110, -3.8588718574, 4.2124045817, -0.8267380206, 5.4488904568, 7.9735717435, 9.7307764795, -8.7638531596, -0.4565630628, -2.0470666049, -1.7821981241, 9.0252381503, -9.9404990148, -3.4372928962, 0.7658226042, -0.5020838317, 4.1309557947, -2.4204393237, 4.7467520487, -3.1089515184, 9.4938709082, 5.8859356780, 1.3182867364, -3.5484082990, 6.4944248994, -3.1989073508, -7.6336633188, 2.7993483457, -7.3682584800, -6.6132548783, 9.3623132874, -4.6991657463, -8.8694207602, 8.2236648916, 3.4677703104, 3.4254663524, 8.0542122438, 7.5722176062, -6.9314342836, -4.6881167043, 3.0664938648, 1.3828401286, -9.8644341691, 2.4907992060, -9.8325521075, -2.6888567134, 5.8717572844, 3.0317293716, -7.5474649587, 0.2326022507, 7.9688787602, -0.6272644793, -6.7592933301, 7.8492304334, 9.3160130411, -7.2628638974, 0.6620503182, 8.8885477692, -1.7966417168, -4.9407696684, 2.1541596999, -7.3306150798, -7.0416472369, 0.0402018712, 9.7813799301, 3.4568444450, 5.3772560898, 6.3692002240, -8.8433299358, -9.5790631754, -1.2706177506, -6.3353947334, 2.2160104150, 3.5574375530, 5.8551014562, -5.9698041610, 8.0272411032, -8.4170546645, 8.8228822195, -7.6203297762, 4.2278748152, -0.8590296064, 0.1743294222, -7.7655534560, 7.9779930833, 5.2114767204, -0.0925560661, -4.7026881387, 1.5998927477, 4.9884579624, 2.4085562738, 5.0682935123, -3.7115924354, 9.9132124618, 1.2963322017, 3.1519005785, -5.4885511064, 2.5448671425, 5.3300817262, -3.7518507302, 1.3673966633, -4.7776782766, -8.0916328850, 1.9880079610, -8.2660613842, -7.2027734622, -2.9133227778, -9.6839500153, -3.4648830617, 3.0753727875, -9.8173585325, 3.8510961007, 3.2082679041, -9.3804983706, -5.7386316509, 5.6597211751, -5.0694205866, -1.6143137371, 9.7669102071, 2.7757115032, -1.4590048577, -1.1957623598, -5.5965712712, 9.2489001989, -2.1355473941, 2.9050186521, -1.9867750195, -0.5010356632, -1.8036885652, -6.5897825135, 5.5058460102, -2.5698410063, -7.6035369457, -6.4667362259, 9.5834578631, -3.9843398195, 6.9579880413, -1.7201388625, -2.9056093316, 6.2171039612, -6.4674119114, 0.5757446557, 3.5838332803, -9.2283128642, -4.8742060116, 5.9334443341, 9.4048010556, -6.6132317392, 6.1702760218, 2.5171760835, 6.1328179716, -9.7786343670, 0.2062657294, 4.1092915137, -8.4745553277, 0.2213582796, -2.6177797931, 2.0738129700, 3.0556529947, -4.3437090882, -4.0980243924, 4.8222484578, 2.3276243962, -0.2002996400, 5.2037207872, 3.6957769123, -4.0027024553, -7.4568627055, -9.6542687136, -2.4076397489, -4.1468258353, -9.9775646785, 3.6223085664, 6.4042746594, 4.3205665043, 4.5389201384, 0.8934509051, 6.8750226905, -3.4120241985, 5.0159316340, 0.1202094076, -5.6483094634, -4.1174936189, 8.3122423749, -4.0161183806, -4.5148044745, -2.0434901390, 4.6392640745, -5.9624350273, 9.3269589543, 0.6883778288, -1.8759491147, -0.4982322276, -0.3113549470, -5.4695038298, 0.6286965610, -3.2742332924, -2.3937037345, 7.5250222602, 6.2318991924, 4.2373811417, 2.8924597360, -3.7430201314, 8.3659184750, -3.1646180034, 7.4637698328, -2.5777578100, -5.5504031710,}; +float ait[] = { 8.3238664621, 0.4022249149, -5.1466998777, 4.9527412867, -0.1007085587, 7.4972895319, -2.5445148465, -3.2046299018, 2.9359964764, -6.3758588143, -0.8311685313, -8.1359001694, 9.0959947663, 6.5906471632, 3.2589324108, -2.2837811652, 6.2848307471, -2.6926895562, 9.6654803999, 2.8340560795, -0.9039621727, 2.9721743144, -1.0228673167, -8.8261810183, -5.5446504845, 0.0604681853, -4.2529445427, 7.6041334245, 5.5805624218, -0.2157354371, -0.9746472387, -3.5119179787, -6.0205503388, 2.1406311955, -5.9233965978, -8.9881797422, 9.3537332111, 5.7196520499, 1.0349885104, -4.4199482503, -3.8621001611, 4.7114798536, 8.0036761751, -1.2733880856, 9.0226793066, 8.1117745339, -4.1827075774, 9.0094575125, 3.5803152034, -7.9720601977, -0.8097954081, 2.7430814773, -2.3507848392, -8.0985127339, -3.7296449133, 0.4642089313, 5.8475539573, -2.4757714933, 7.3370103711, 4.6451951410, 8.8971877724, -4.2116375424, 3.4193241307, -9.4772633561, -8.8202039245, 1.3957857418, -2.1301210778, -3.1756381706, -1.8675042095, -8.5716156490, 3.1025352634, -5.3108886678, 2.6781081415, 0.1191050684, 9.6127709903, 8.2825922539, 7.0144911639, 9.7460234593, -8.0753231826, 6.7654906193, -8.0184211189, 2.5484241805, 9.0400019323, -4.3001223555, 5.3639729348, 1.0181752991, -1.5054216047, -1.7604344748, 5.0210305828, 7.8397300982, 2.1697680688, -9.7372771360, 0.0976350038, -5.0956013599, 1.1743486688, 9.0665040270, -6.2994061652, -2.6056430835, -7.0840442144, -1.3821565508, 8.0431075795, 0.1540274647, -7.4191736969, -6.4650766789, 4.0062083136, -2.2611452582, -6.3868010007, 3.0779908395, -6.7260936529, -0.2344740621, 9.7839136916, 3.4396623368, 7.9177854555, -9.0822312498, -4.5886218169, 0.4823675643, -1.3510922030, -7.1955238468, 4.7277116334, 5.4337755647, -7.0224009592, 7.3675880499, 2.6148074943, 2.2473441530, -2.6289218894, 6.4580088383, 0.1626935186, 3.6103041116, -8.8558776455, -6.0921715384, -3.5142676005, 2.6021736057, 8.2963956151, 3.8182248955, -4.5351036477, 2.4644814753, -8.6468543090, -0.3584758141, -1.8865874787, 4.5680096242, 0.4692346492, 3.1006620955, 4.2018572098, 2.8128360190, 3.9098658058, -3.9764079934, 3.2770300845, -8.9620371926, -6.4841941092, -4.3958141487, 6.5917051565, -8.5438907897, 7.6643709267, 3.4785340138, -9.1520930655, -9.8952594997, 9.7206786241, -7.6432605031, -4.9941442022, -0.0365020401, 1.4375876921, 3.0361571599, -2.9939196494, 5.1894401407, 4.3632929517, 4.5832439039, -9.7381840073, -0.4388817809, 6.2299989825, -8.1521711941, -3.8830236692, -4.3967585744, -6.6844379135, -5.5648621091, -4.0258609905, 0.8678393731, 1.6192996376, 4.4017338540, 2.6212855753, -8.0397547550, 5.7042611521, -4.3419867227, 7.6448477346, 0.6949567681, 7.3486355063, 2.1986469572, -3.9789666286, -5.0983501495, -4.9295663062, 4.3449849268, -4.0605353985, 4.3698563053, 6.6153016881, 2.3885210222, -4.7435187276, -6.4867335249, -9.4563097559, 6.1518138128, -0.7476474259, 6.3960289146, -4.6024087232, -7.7058505782, 8.9349017065, -8.5557401869, -3.0501479629, 6.2529932676, 9.9620260762, 2.6770391657, 6.0936426337, -5.4715467777, -6.2332124576, -9.7427516907, -3.3924558337, 5.1310049537, 2.7118027322, -1.0547455632, -0.2552233772, 8.5099894125, -6.6829067551, -7.3280306106, -3.0618917580, -8.8066237074, 4.7942810231, -7.2703360438, -3.0742573933, 4.3206533310, 8.7948243618, -5.6427344295, 4.2943471727, 8.4831460094, -7.4690919030, -9.3939582075, -9.3102950158, -0.4253459181, -2.5760024574, -5.0868386487, -8.2259279133, 4.7099656812, -7.4299083181, 7.7381961646, 8.3336273930, 1.2400944515, -4.1684077219, -5.8583795088, 8.9686532025, 8.2001559976, -2.6343411019, -0.1494648957, -6.4130385134, -3.2858139486, 0.6485863271, -4.5107957096, 0.2066376057, -7.5563433453, 7.9185554658, 7.1508423518,}; +float ares = 35213.992109239305; + +float arp_2[] = { 8.43745929274069084158, -3.79090656501482037299, 6.34370892538020569873, -1.47887837599783367182, 2.67789124755077168061, -9.16837279377615210763, -7.98948525619791638519, -2.49809574153659852414, 9.27131083277144796284, -8.00042204195103145992, -5.38045522061010039749, 1.19898571605430959153, -8.16198335168825295227, -7.22622641534854182055, -4.56600881430357485868, -9.52390609803552479207, -3.12111804246965007792, -2.78173169070610803999, -1.51350621996514611567, -0.55447640502994666178, -2.00960424279231375522, -1.68913823138827723369, 2.07978884306218070321, 5.83665058577171258491, -7.26478017387455654585, 3.99251759486548962741, 7.38286150283266806582, 4.21946100783414124180, -8.24149885481974031620, 3.25604764715986050305, 0.21021953313548102926, -2.09401869834978704432, 2.01854445924738712392, -9.85593621871356084796, 1.35242651361052246273, 0.27188463026615394824, 5.24624667465851679538, -1.74703565988227005334, 1.81563768208864395604, -1.50202535662375780134, 2.36284394003468634082, 8.55247787301424367001, 9.04710896220459659389, 0.18345145351380054421, 5.74991112178676111455, -9.11233026108099153362, -8.07779140843319254373, 9.48454356885453719883, 7.35434280339288903861, 9.62345878907374441269, 4.15953241033099097024, 5.80194889240915046003, 4.89912845661735474323, 4.52092719256254582660, -9.91032439197770109729, 7.99244808544464291344, -8.56131293103668689071, 1.27115035532738573920, 6.25101570358570768349, -5.91441193534615106131, -2.24205839788937666412, 9.79841147200573558962, 8.13261726632628878519, -7.37018783513855080258, 8.54266302174871228203, -7.40280468864210394742, 5.41846759209841977167, -5.95994298901477748132, 9.98108088980375640631, -7.01390299457630206348, -4.88920441479300027510, 5.85522004311156507583, -1.19267575360910704774, 9.27526112575892724976, -0.27333804660631599859, -9.56908614838259197199, -0.67190821378639142836, 7.84694341760398472729, -3.67158721171722390864, -0.01838444601156652425, -8.86576994104704319000, 5.44461424023196727262, -5.83450905578402068130, 9.75213737904397959255, -9.74623652018476782644, -8.06117877542999394791, -9.65572959175746525773, -4.26489449518653973570, -1.47873645641798212580, -9.82719523421518559303, 0.21554165314874573767, 7.41044643378252132493, -3.83531523772845339693, -9.81887333754515090334, -4.43666760422909423056, 5.52518427594592864693, 3.06077181816325349928, 4.87779541573586605807, -3.88561611112442140126, -2.25491127862772255952, 6.41218795816992681580, 8.91915163335891136853, 2.89437568803806755113, 1.15142033560120893299, 7.29625692895704958119, -6.29199945903546442594, 8.51306002312758991479, 2.02260082643052108153, 0.19620686143089471898, 8.10385446297406630833, 1.04342365955007743139, -4.06640678724358828333, 8.10914796635041312811, 9.10930022681989015609, -6.64076384175658240849, 6.01813568615231986314, -0.78682313814781679184, -5.37115528469642455889, -3.55163007577543332616, 4.26242305994175119110, 1.01446877700475468487, -3.55002509051653269978, 0.11648934776100183797, -2.65796908933602971103, -3.31441031931680107903, -0.75745394372454555310, -6.74565828261024336143, -6.36116880688163188751, 3.87100188037377712647, 5.72613819932646528343, -6.81416576770232040872, -3.47893647113846071761, -8.88019324731695824937, -5.72129684082809220058, 3.71422413160721731629, 5.16910079466337535337, -1.76333304694959025483, 0.88978683100153510566, 4.44697245403173369027, -4.69883035067948817698, -5.92179392372163437841, 1.36450231865565996259, 4.79682985829470354133, 3.02398353186073443055, 6.91600836517033101813, -6.85819698764124119350, 8.74554281427864665943, 5.28037104472775453701, -4.96387725869230411746, -3.20562692661876091904, 7.15680223463682807505, 7.41600516175046209355, 1.50242565385942050682, 7.32570595833397675278, -1.63819512704125536118, -0.40230124748712192684, -3.60594923563696490021, 6.65044685399354307265, -0.24862345792605644590, -9.82641954640317294434, 9.72877933367979608192, -3.51979148568266175801, 1.28948340874663713862, -1.92593535558879835889, -1.38919106184027540962, -7.62303686760058063498, 9.25583712864444763113, -3.38110891777227973165, -9.06986561709004845966, -7.35299121912741426854, -1.67142072972155730781, -9.44049202714897006672, 7.07841105331610620510, 0.54513400554044721957, 8.34224569650051961389, 4.63719967080511885626, 5.78293239146153581487, -2.27540004519534555527, 5.18105805395101803867, -2.37234065815443884162, -3.54216555021444712281, -3.21407288949570713044, 3.28453951033595004105, -8.31270172715844424260, 8.62728478520843466981, 5.64625293939205086247, 7.66676634960801450802, -7.22232388357114629684, 5.47383945817207795415, 7.91267494827721407091, -7.60363337705962472057, -3.71883103991214269968, 9.85216834330755375504, 6.10568373679249276620, 6.97067342292287150940, 4.65194801081626252426, -2.63898617473498298125, -1.09740590680184624262, 4.59540895158393425390, -2.59424095191993941967, -0.54944711723834238626, -4.39063001096158522785, -3.47994645225751408901, 2.82314072057588205666, 2.76261086876896300168, 6.66325582901657398338, -1.19530678098399079090, -1.85821435670871082380, -5.05957064399499945040, -0.99496318357314272873, 9.14461123416830190536, 8.30103451768925992837, -0.83929510645675264868, -3.12777410037787806374, 2.41230916468304101841, 6.26229750431260967503, -4.97070197860130669198, 7.42884809528757017461, 8.40172290662585652399, 5.69064787744867928154, 9.12468525679910413828, 6.41673754162246723354, 4.17884795262540009730, 5.45978676023996811750, -6.03988750125687090531, 1.36842434506783661163, -6.29430960707381181862, -2.28971787561085982077, -3.16479769452650039341, 7.01912679006949957738, -5.36488149109412226778, 1.34675158403361372450, 7.67412080701577181685, 9.38503308220563781106, -7.98731333189390380056, 1.98560520069355028738, 0.63636268435340426208, 4.30807923941802428658, -6.27108391892946315238, -9.41977972517009121134, 8.78153040065096490707, 9.95683441855834416856, 3.05324836997415793860, 4.19348294151376421723, -5.65073748770769057614, 5.11981625548496133149, -5.07906175696448869417, -9.89484987477634803099, 3.57468041673507208600, -6.34707486234554707494, 2.56790138258603128918, 3.16755998596420695890, -6.91248356681354536590, 9.75379602889355368234, -7.79835211233388392316, -4.40382527721216998629,}; +float aip_2[] = { -5.65350936039103135755, -7.53461986533300542845, 2.38238808823602532527, 0.65703343091317023550, -3.86853701966980878524, -5.37487171984653855361, 1.61688487157937821337, -3.26583315467565071799, 9.89255750418323742679, -1.54778555552860908051, -1.19839473905197557713, 6.45456964526858456566, 5.86627815068747615612, -2.42300820097285818377, 0.10568894162696551575, 6.70668328703702343319, -7.51191058283233914494, -2.80680774014149747586, -9.84323171435889676673, 6.77177026528960368523, 8.75123502318218271512, 3.72524109056920238459, -8.65215435213179162588, 5.40787287301669294948, 8.77331285785154690871, -1.29982101710801600802, 7.39000463218791381337, -8.09338643257603074233, -1.28356352929903039239, -2.10724579035063186438, 3.91105750726549672436, -5.28804183699144658704, -3.68113352881895838919, -8.01847411034694879106, -8.15686525546457197322, 2.95589278491281071126, -5.00632630819475288320, -3.60879982593434345262, 7.41813581766985308263, 2.72271140261294952722, -1.12146433971588699308, -0.70213343579274400952, 9.08310297656054999038, 4.84405615374122078265, -0.89814607953952574348, -6.91794162030389614415, 8.49939764106344952665, 9.63932192649696517606, 3.49111336194552634993, -6.06538342447720246753, 5.28820992321296756700, 8.97848315052059575692, 5.64967642405270531469, -5.73310653001466441481, -1.57935332864451183355, -5.66635938418059659227, 3.02845655733756657924, -7.97584858067018398486, -0.11855199585911257998, 5.05458978324015539840, 8.04496230226541086950, -0.37888659391822443467, -5.79979572050115343984, -6.13690277997179833847, 8.10287686782804783547, -2.54128932165685839095, -2.28034108911649990858, -5.88115717220247091745, 0.95605725425229337588, -5.54183770126532238010, -6.62255511343907699029, 7.46126060104428034947, -3.52578933089151558278, -0.34325297197913506864, -2.29656251517999798750, -2.66651020995028353866, 4.37975711938535461343, -9.40132682096012928241, 2.51680992114453339070, -8.99979043907614695286, -4.03071688964711238867, 8.74368094696242437180, -7.70840735442256885790, 6.86945508348752298389, -0.29765900190507110779, -2.27674651715465969914, -2.14673941451483507592, -9.80875235760580999056, -1.90648494462334383570, -9.07159835346690535118, -4.07648428385307237676, 5.07760090791725460235, -2.12880216975946900959, 0.11253841618262860891, 2.47883088666034190339, -4.88658454317203894135, 0.69825278678206537109, -3.95102818814893286259, 4.13020025672998514210, 9.19277235390656954905, -8.53563705294434882376, 9.37832785439148608475, 6.16743694244934204107, -9.33011831321337581358, 2.41902089531714103998, -4.91232147899358473353, -1.64423506916041084480, 6.99269913284214084115, 1.25220432561346939337, 6.86134527186928266929, -8.16911108343916048113, -1.31020661215102407482, 8.20077836852112795896, -9.85932804382389349485, 2.62836319185545264077, -6.23215575621655482053, 5.94738376141915203732, -8.92705553560861631013, 7.16122907017497922766, 2.22358397393553275379, -3.40056274117893586606, 8.11465529621898440382, 4.45410144998337287348, 5.10159784930889870225, 6.27622996857370907264, -1.44167359181930798684, -4.61857273924280331556, 0.46146116521202173999, -5.67387856597982409568, 3.30500874906882735615, 8.40687418688687415624, -2.10106504153127993106, -7.21945171305157273878, 2.38280432022062349517, 9.57287521123297935333, 2.54033985984325205720, 2.80343835143212594119, -2.14531992814503524869, -6.53220414944285732162, -3.66900026966375580884, -8.51267612504278048391, 5.20667135541658154807, 6.69744412054107840504, -2.21626286119987625511, 8.09841938357177681951, -5.48358643501108566909, 5.48294700273428503579, 3.87165222202425951536, 2.37144122186972694522, 0.08682635929894644278, -4.76223837484058609704, -9.33818557434340057455, -5.56671034982397117119, -6.07422896694584935062, -0.13562947005065240091, -1.40992588860957823726, -2.88432357463131339159, 2.25082850408499268724, 6.25559931091854437568, -7.01925707819746058647, -2.05060908010521636413, -7.50128717503440078929, -4.13439494373242411029, -7.20386088027946058787, 1.11110877146580477870, -5.12598132370935211100, -6.64405465330753663977, 1.45029877289811892638, 1.88246158214118786134, -4.41181552040718116814, -4.97838775484780882152, 4.19265867390973312467, -3.92948554642326541853, 4.59210894942650327266, 7.42069003529477910774, -4.51950683787727847118, -2.76393019097342040880, 7.62272334684279329053, -2.42723977064160756356, 6.29926976900224033784, -0.86908731219534018919, 2.12909386431308433885, -0.90233527925219370047, 7.81014131178671178191, -4.19977393940230481206, 6.87761524219364517307, -3.22440832843055069645, 1.35081121813558269196, -5.88111844742546630016, -8.35172629708919345148, -6.77852906445066238916, -7.43946590072329705379, -1.47446644788528047343, -2.51207691137060429298, 7.81011309059402236699, -3.28835332415695624775, -2.91449953632264779912, -4.74367125573315462361, 3.08348345734415829611, 9.76264410847672436944, -1.76954041748202328677, -0.59576998792539370697, -7.90602609198301564675, 4.53034971845075951080, 1.21156021900716481809, -0.73056837803975049894, 5.78157645769298511595, -8.25246260346956539422, -7.91820082755266341223, -6.75758051948325544345, -4.06930145992641811858, -7.85680784478061156051, 0.79832027588554943520, -2.13099971822921219200, -6.67232703289103845634, -4.64818712492645058632, -7.22039650036147939716, -5.37596178885364750499, 8.56768416722904646576, -8.64743004864453723712, 1.84795398044376391056, 7.37397187918929475359, -1.33225380914748292582, -2.36550850499985543252, -8.98546562922326153000, 7.03308837583150747719, 1.58870186009972336194, 7.68509922177204174432, 0.94190213850124315798, 0.08014887710806206655, -8.62444348827414586367, 1.55743415767695303487, 5.50809819898437957875, -1.11143678380283716933, 5.63777349499060065341, -6.32662545881286231264, 9.04608920353810219694, 2.61025560733524386592, 5.51619777615333717335, -5.48399223419705261762, 8.57360604742735432637, 9.03460073955444897820, -9.29499517606604896969, -3.89201645796850392856, 0.13203197291327128937, -1.49788170838492362691, -8.14914153431171683906, -3.04584596279814601161, -4.56690202028287917102, 8.41744786474986739222, 5.07502003919835686929, -9.45422172061839205526, 5.01466134022855314356, -7.70683668825200740571, -6.32326846776050555121, 8.05033051053458592605,}; +float art_2[] = { -8.04278908895324207151, 1.12785142657715731218, 6.86059157227089499997, 8.92967755769522852916, -0.51482705171826026458, 6.31893808805814671814, -1.73088026352827561993, -8.36713114597813500950, -4.55745186710498728644, -9.39834209487190719301, 5.01865862781949623184, -2.31813537537875813399, -1.89802105292708489515, 7.27182682535846680594, 8.42663014902932161476, 1.26640785447157178112, 4.06882177623799634603, 3.04825683132877323089, 5.76659887320094988183, -9.34030148670915849607, 6.44104029305729142152, -5.51670314441993880905, -3.49259938787497326018, 5.66472502167401970041, 7.84146266589478457831, -3.87055140444468825933, 8.41531288444148728445, 4.74331476791792994163, -8.88911155138595532321, 6.53328285727053525989, 0.48737487379482047345, -5.55828069357876319856, -0.87836580549503828763, -2.72399839210968153225, 0.14998669370485373520, -7.35849252508224260794, 0.79780214615022160274, -1.22319433313663417096, -8.35560546979123408562, 0.45796911639621029622, -0.75899359197963534029, 1.23394876601657088599, -9.24490703121305656964, -1.18859778636170254629, 7.04667316048542957674, -0.09868125325787246993, -5.67147664327546863205, 9.32674111707506270363, 1.14973127920692874682, -2.17776306255969664960, -2.23475498933185967587, -5.05181005837814112169, -2.14848235316545554952, 1.41565149630761943911, -3.94930517930389868297, 0.01858140157810872495, 7.11199858388465244730, -8.46771263124121098542, 0.42662727915782383548, 2.58342394475135961329, -4.14151014656263427582, -9.04009879181798226000, -2.88616709740585797306, 5.77264416626676535316, 4.82816862180129469095, 9.55768057368295131937, 4.14795553063710187303, -2.39965742840013085413, 9.14201251747381604673, 5.41897133127992880475, 2.55076269310611536412, -5.29501866852852565160, -0.01216834345154893526, -1.46180009792680110081, 2.67485092458743878296, -8.17062595866073593243, -7.67475142449991931670, -5.45590823253436063567, 5.43561720227924105586, -8.35528329989406692846, 2.16119053339798838920, 9.81426574388344974409, 2.47462411486452893428, 5.26778675313031641281, -1.85790734502383614313, -1.35176219254075746790, -8.45903154357261222174, 8.30263675163426739800, 2.94278937658216399598, 1.38424252714235684891, 7.12131758519153024167, 9.66558114473473750650, -5.34187305264095790847, 5.44576918079834548791, 3.25407850537928311496, -3.61516236712179406254, -5.54284178345941747068, -1.43195684158161462562, -6.81199900110548206555, 6.62348028457272874903, 5.27817023866659340570, 0.19173772455599369380, -0.47522689678076446285, 2.82515333966201609428, 7.17776509472852808358, -4.44229385640943874591, 7.68889578285657648848, 1.70407633102228217581, -1.72074221636134794267, -1.29432933299898778046, 4.83884307149483383625, 4.36176378814671572570, -9.48945545759084296833, -1.58796296469618347658, 6.93786980700744493333, 4.46687311593197833304, 0.77510752969318730266, 0.76045278457525178339, 4.52183966437964990348, -5.02652538286536110235, -3.24273443314588583064, 6.70544888813595818533, -3.25141161220058094727, 7.39970622405706635050, 4.89777781287984126379, -3.60639865037963858185, -9.94133206251487067107, 2.28413332516883471612, -4.49294195847568644808, 1.79700439851676918579, -6.60076853781827566081, -4.84718497022608385549, 2.95498254444445329625, 9.83101000498242783010, 4.20406888745896267778, 5.73679473019967645087, -4.96923302483140982844, -2.06898738228969136799, -2.23451037242572869701, 6.05406578017850094398, 3.39225211960647854426, 0.30391392708608400142, 2.29799067504979248611, -3.95916622694869424492, 3.66367441292338114067, 7.95394637390075587291, -9.97921924860560949355, 7.65629909749230108673, 6.56506938616354318583, 0.90798356042071937111, 2.96697381854639452570, 8.55442923674540622869, 5.86606608728752654258, 8.08948961644039599150, 9.92625080453851182938, -2.84585194693497633978, -7.69070851396261367938, -8.07230909390881556931, 3.34280240158158292729, -8.50992096493246741318, 7.00178310806557391288, -1.64566725531840596375, -0.74044311744808766207, 0.02253993428691813961, 6.92553770755583641971, 9.74794816636211791661, -1.88742984940143543326, 9.98046557935695943797, -8.79090108023687477612, 5.94234698676665828998, -2.37435496791257705240, -9.53175138057738990938, -6.67705811781023683693, -8.89910860284260607500, 7.34452986725120027245, -3.70719733140780149938, -8.25423842312207867167, 5.40848794646273489661, -4.09521608043228724227, 7.66994178983691909934, 1.00480214682535695658, 1.73694208718403508840, 8.99028875371183744392, 0.03479744700516107514, 4.93928033966865953630, -7.39728045502817188606, 6.74198234377155358743, 7.67800355078334462178, -0.27987886902048408899, 8.08323031204988140530, -7.47972672963337359420, -0.25408300300252406601, 6.44424115546086895279, 0.95813584559620679215, -7.71855925612579873984, -3.13239677819721329399, 9.03057034884302822775, 0.00488720759867611321, 9.83840330520076378207, -2.73831052443344713510, 9.95954595620166571734, -1.37622083348651891299, -4.80171484129668080243, -3.28886592175033065644, -2.23558561478779438403, -9.44813871449114373036, -6.51981534628976611145, -2.76948096725008863217, -7.83193872753720654600, -7.29554233722163214537, 9.45004430551233554070, -4.47071270329848680092, 8.32423003301535757714, -1.18767904871489093921, 7.69329611230538290556, 6.49206168725135768227, 1.21462262148277311269, -6.01302351581854299667, 9.53333663650502671771, -0.77923282134104887575, 8.90150599199465020206, -8.16508411869359740365, -3.82380303723143732952, -4.39150415086599021919, 4.20456138983024096945, 2.38914267345303699130, -0.00438489743831382839, 0.80952012072329182502, -5.00229663266405832900, 0.99703445541934598850, 9.15917003563760800944, -1.27713636791374085533, -8.94004113661896937515, 6.45608080690110952560, 8.38582696602515298423, -7.29105349372333844116, 5.56824703153233002695, -3.22794803005794150863, -0.38265554286023828467, 5.22580989573675758209, -8.77412643037001060975, -3.56694445227235146234, 8.62543880171540422452, 9.69993435660552805189, 6.70671889193423709230, -3.89808864235575924795, -1.68175952123648286829, 2.96291616308029048810, -2.12944310933316849344, -2.26629290001815597577, -3.72072887922986161868, 0.92620158847197231466, -8.00860401017888889896, -0.85305760409802822153, 7.62636637056669286494, -3.16009950457701194182,}; +float ait_2[] = { -1.38642544862208083600, -3.68012923040837591770, -2.32046426258931504094, -4.31837406215113617236, 0.93101649577043410488, -5.35336892062208580256, -9.40947601267632904865, 4.43082816798664502755, 4.47741017513797245897, 8.50000482072559648827, 1.48767058316395761608, 9.09371633779299060052, -7.44368572021548047246, -5.43059767376518287563, -4.29875035704901264921, 7.75277713078927632750, -4.12308959295899768449, 9.32062849314826991076, 5.22151593549145687234, -5.38658698488573062235, -4.28250723015196399501, -3.85495386590079647249, -2.00689330550637379247, -6.00807747675003156473, -6.38135011650375894021, 7.54862925316705357659, 6.85451787270373458227, 9.54373090744682173181, 9.32543277200554499018, 4.22343239174374573963, 5.47304564157856709983, -6.40132713093814409433, -6.76091132598729060987, -7.15881341100396362265, -0.11719026412229638368, 5.86296039657631595787, -6.79004282787237301022, 1.44994267116828723374, -1.81900168366559356059, 0.45552998866489602392, -3.36358526316346484464, 5.93845194453027502846, -7.39304684195029704341, 6.63527174633508565194, -2.12723045437173752248, -6.86285808229549587622, 6.18881895235511336750, 6.22835945130804091718, 5.20162542776903258357, -5.94850094484163705033, 9.92613843950459084908, -9.59187223411606559864, -1.32430895614682242467, 9.45054396848865252423, -9.49811939580921738013, -6.92428934382954164306, 2.96938427289267536935, 5.46817787777650110570, -7.20664501563796910943, -4.50653186177245146382, -4.78019111877659064191, -4.31851516020218362257, 6.15620239765564747358, 5.95050024032967428411, 1.06149114820270717985, -6.91228899712841027281, -0.40489484993416802183, 9.30824181200017264359, -9.77630638597504031395, -0.52527333799283937310, 7.66107460223699376911, 8.50329845751967638989, 6.27904045180589776010, -0.99513558018209202771, 1.93249975157454123575, -8.59288575444469238107, -8.07650803051157772927, -8.57748223368255402477, -4.31648999975298330867, -8.69196294388153134491, 5.30803388284349964010, 9.31649450386909805388, 5.44409805199011032073, 5.41373534272923784272, -0.22162784029983662037, 7.46661323176995495032, -6.10445849058969436385, -0.31293064214185584149, -2.21899152058672122223, 7.64591582416979420600, -6.21421097987868265022, -5.64590881050830617482, 5.61013075041661046782, 7.55412694957528429995, -6.60515858042055548083, 5.71558975029983074023, 1.07050651330480306456, -7.25089530342225607740, 6.25616177150686780806, 8.64853648076640624254, -6.76949002074103312054, -5.72253677803034488392, -4.90355432512312727766, 2.01081456860315022084, 0.46590262174767538284, 7.47328059444058823146, 6.17912563687525562273, 8.47829978174267040458, 2.60426176318954105682, 0.93638255366501965682, 1.88403766846867526397, -0.72068739657787084241, -6.17849016189693855949, -2.34547083940113587630, 1.22040708140938214399, 0.62387754418382712629, -7.30667299716242624186, -6.49845884001454088263, 5.93637861389826682057, 7.70076418139821683440, -9.31504194018821074508, 3.77057223381074457791, -8.20011943313577162940, 6.26001309007186179656, -6.66807764512333633888, 6.64749219259805101956, -2.22628606019940722405, -7.03209267466179177575, -4.69214583472043855750, -7.08276626944011944431, 6.42208547275813756983, -2.08448021175567266283, -8.66874657563626094259, -0.46985910641734918158, -7.93119181040996323873, -8.17903341151542129239, 3.72639054967378058336, -2.87819808404945920444, -9.15201676995017621152, -8.56891422113876366495, 8.38512112202015202911, -2.78714041955275426687, -0.25464272423212186425, -4.80891014943849448571, 8.03131332188321422905, -4.15608255598436393541, 5.79180856710472014015, -6.86924339535374350874, 4.22514561053155901504, -5.38514144936190319868, -1.13575892643621401135, -5.47381086307812836367, 3.76299288241311913339, 8.99598761231441201858, -2.58019390797431302786, -4.03256141762696529440, 9.70501581055474815685, 0.74610420264919952160, 8.51900935114798230074, -7.76465090238649402465, -2.04849424980820149500, -9.09266307359636805074, 5.50337088216883607572, 5.01803166019883661875, 7.30177572898176308058, 5.26361784036289925837, -2.80837389161305672758, -4.98752949011971047355, -7.64409643244571945786, 4.16360490504215796648, -7.56869699746497648363, -1.42419881490274491398, -1.64240704038575380252, -6.20494451010476666397, 8.41224908333931864490, -3.06431795089268099730, 8.94134816631751405680, 7.91412362764237187207, 5.09197272837896974806, 3.99163021097724346475, 2.22272705855133700936, 7.27866435422302515690, 6.43782953182771322531, -2.07305062144505392041, -4.75924829177096775368, 3.22039189085832333603, -7.08171878985538860718, 1.91039514081695571690, 4.58773055309061206231, 9.89786714252520027912, -1.59209244520215875696, 8.00696994377743109794, -0.51166088592701086668, 7.73385021614687673264, -1.32117896496298392606, -6.62245844459322619002, 4.34540577563866925459, -8.94110549891519212906, 9.77061270573923223992, -8.60752666722604864447, 7.76139216306778578769, 2.50862707120674777173, -0.34064906155069252236, 4.23642847278797596289, 6.85668825313377894304, -3.15041522310383292904, -8.40704288375700414804, -0.73099399358191874398, 1.42650027326667228067, 9.79571532747868545243, -5.82284828139834331751, -2.26306437945561000902, -0.38485808626059814230, 3.42829540506996366389, -7.32658336920655806068, -5.53549518959959918618, 4.48756019668188166349, -0.34892311819540999807, -4.40318402960679033953, -8.39340315303571848915, 6.04529546736424094888, -6.55886408826799005567, 1.64833177707915545795, 7.52718085485077637031, -7.75060778523374693805, -0.33529697468626373791, -0.89151894567459599727, 5.21347958621116092104, 9.17089421355154854609, 8.09923737361599904716, -7.58009895941637878991, 8.42928129869805431440, 2.65380018004430695555, 0.22970059676560516948, -8.42363752211973526585, 4.75805039561763365441, -6.69036104808416176581, 0.04157036379013234750, 7.94998201612649424419, -0.73931777740913240393, -2.64551419444840174577, -3.58812360034725941205, -2.49977462847194509266, -4.11868062956712854827, -0.18804622665149040017, 9.42238387063862958826, 7.93383651025321157135, -2.00780671689105361111, -5.47201436003079688675, 3.68427682308369242037, 8.12729387389320123702, 0.81826830291566388098, -8.48657409267646833939, -1.60697301717162943646, -1.01221418560744069737, 2.16694464388995378101,}; +float ares_2 = 33565.83373326744; + +float arp_3[] = { 3.56311459251687878691, 6.87669501490476164918, 8.04824892216409892853, 3.51112032601978363289, -1.97368256401849073711, 4.02703305240997799785, 3.02825497097968820981, 2.63390539949563162736, -7.83160221003649859028, -6.76429128251786870862, -3.60893309101216530621, -9.74643981934546665968, 1.72280892597845358694, -1.88722575932827929535, 8.97838709852246452670, -1.24025544898843520514, -7.30502787809978393341, 5.92962391449515280328, -4.42490176264096035652, -6.23569038826299326672, -3.28013331843828481738, 6.25919386765870555678, -2.18710487420635324440, -6.08960895536189994459, 1.45790935028777113303, -6.79781417170497448410, 7.55635979780016242557, 8.16200212772106681314, -0.64769757806323546845, -0.15837421562432574262, 8.44044114331519068628, -9.28929240057046357038, -4.96605806968236418442, -0.66588130555826019474, 8.80056491075757563181, -8.34109448142270792914, -8.20221090248867312766, 9.80007957323224232482, -4.08488398900010984960, 1.28346757852884785223, 9.39060691483102516486, 0.62772030075299767304, -1.02082915170314691977, 9.05841626293278423532, -5.32184613334958989128, -7.57504454082835287920, -2.51343439573671556531, 6.14681963112752782763, -5.50645668481084982204, -0.43257454682674989499, -9.27987578575188543084, -2.96352584080264058741, -0.97159720774843094659, 0.65039829833013840243, -0.85941100821734828230, 7.71740830745872230523, 6.22974246214566207414, 7.39576632090834351629, -0.16576257266739702345, -0.05010953647974147884, 3.55730724466563685837, 1.46478846140106000462, 0.07925704804276278992, 9.81293041112045827390, 4.69550319142971517294, -8.61753259248146896709, -5.54926949017441373257, 3.47046534196457656662, -8.85850525220049789255, -2.95010061819817259732, 5.82558610495686934883, 7.57901367583468399403, -6.45940639622849310797, -3.76507998547408284651, 1.61603417948466088205, -6.03103233392393534018, -4.53723834890511046325, 7.59992752974651253339, -6.42017110425984860456, -7.78336029420503283660, -2.64356789734231156785, 7.53185087385263329907, -6.53992516310179361483, 4.78163553556396614397, 0.62098199077994564732, -4.85549718584820588774, -8.55887437005744189378, -6.01471110874013881187, -7.88584118044930448832, 9.53832256319535787270, 0.63457428025757067758, 0.30926676793269258781, 3.87993059837500098297, 3.35048453022575465354, 6.83190620762249523068, -1.61978868785157459342, 8.52840804896203152907, 5.33896149791479324165, 0.69921672949238633521, -1.81405575280639652647, -5.41629603227566214230, -5.49911074253945741930, -0.87607200094535819801, -6.10000694565501433431, 9.38840360842628385285, -2.97841924933603952041, -6.75411037315643625334, -2.02408578607934153126, -1.20165464640175478905, 0.83117431352006043710, 6.90493710934665116952, -2.68756703858543222196, -5.64245715992813146045, 5.05510531218558512023, -4.43033564029795279282, 1.93111193539949255182, -6.83850769296360283533, 7.42592942565409330768, 2.58977195755251621279, -1.78037175970787053814, 9.74779940469786509993, 9.98205307786672335624, -7.65314046470621001106, 4.46775228602642293652, 5.61089590261347126443, 1.46331923576634181927, 3.40763329691176863889, 0.52927916573874789208, -6.58750471788093427961, -9.02722456576425713592, -0.60763357435111409188, -5.14178491686459615551, 3.43497162182969617561, 5.23374799999741391332, -5.57449102299060683663, -7.06963983788950578457, 0.79888816538092655151, 8.22443603400709122297, 5.51151177102772571459, -7.90406394964403347103, -0.74858715771753026047, -8.77692868116277047363, 6.69540038982793817013, 5.71634904922266606775, -3.68337716080638877258, 6.37343034419597032070, -0.25519577201423970791, 6.67002457709965668187, 6.15916868130819494809, 9.94053107931699742039, 7.77435699640686195266, 7.76953517271261162591, -2.44535780339237618364, -2.74114714961039673824, 0.26760473031649212317, 5.49491558203097696378, -7.42518727047692728149, 8.82139268846452395678, -1.89943996842568019190, 1.00574011248244943317, 7.14154581243523622902, 8.34474765509500215899, -3.93695093635180182190, 3.18801696295252767754, -1.72970695774043647930, -8.05676311598719330220, 9.69237937598396825933, -6.01244773973911250664, -1.45679619437512997138, -4.82956544901299622552, -4.32728105785851369092, 1.74488662743962663626, -2.98247059444288709074, 9.80534794110738872064, -6.22099636730733873691, 9.86501502923766437902, 4.81317246855425828755, -8.04256623686154270558, 3.55069307285497259841, 0.75972149394285182211, -1.97195160568379179722, 7.22980389877809059840, 9.60681966157597955203, 6.33156815277531848096, -1.90999525046834506270, 1.90953593036048729914, 8.24545248341478043130, 9.90441966045307964350, -9.02612739357729942924, 9.92550355710624998551, -3.82172330201195897814, 0.50959340856448775980, -4.66610344944045074556, -9.68873560874004269294, -1.47092813018183221629, -9.07094062598763528626, 5.09721894030756139671, -2.18984456553846840166, 5.77790177243907976390, -5.10098049578775203372, 3.97417145556771878034, 6.86909402261634127740, 4.60618806157796356615, 2.88677395887320820123, 9.94317573991408920620, 7.84189273062447256279, 1.46717345161206758064, -7.02423291922136883159, 9.00526432045018054851, 0.56589253204460021607, -7.88026985887613218296, 2.35124227071876390482, 7.04955125718840491800, -7.98032202825996073159, -6.15433538570061333672, -2.39526745328913648336, 2.30865825107081334977, 9.19505269268583447229, 1.35917314265243938110, 1.49845836390053399612, -8.51414362877386210471, 6.82172450081503711772, 4.81342001759808013617, 7.65582905063017449265, 3.05288369584060603756, 8.75652496374863531514, 9.47738154949120925608, 9.28432362347395923052, 0.90673802135007264269, -2.55944892466041018508, -2.51403008785588255591, -1.18276840234485547398, 1.82247090493920183008, 7.86599495368318457622, 9.70974983110780698325, 5.29818644835075147626, -4.68890290483802196775, -2.54827940997801327683, 0.39533219647188921897, -3.92756139691443451056, -2.18178944253850026769, 7.82952447479691215904, 5.30719721784620013239, -1.07579446824950508699, -5.81536912908673997435, -8.07653239697032176991, -0.25358952777117416133, -5.55753245713567878283, 8.07243396735280782650, -9.05891236562821333678, -8.10314909846952424743, 9.30033580423992844999, 4.86259717637908650545, -6.57445294875917074506, -1.06835534638009654884, -4.27639305899190524940,}; +float aip_3[] = { -3.23290250684263646974, 9.03498252240548893610, -9.49262435882835475809, -5.06286062461963659587, 9.11661562027508054484, 3.12020403327400508431, -8.90796052974812724301, -2.46704883624293103139, -5.54729104835194952727, 9.86496398013113662273, 0.70528024480505280280, 7.79795383857151591656, -1.99217004545159070972, 1.08048947257009331224, 2.03906463316376829198, 5.63796818997950310859, 5.43869149835281007199, 6.03495885703701517855, 8.24575739560672360540, 4.18129642518703370513, -2.89424091100116420705, -6.70147665335671760545, 7.61145031867309285190, -6.94407489136886724168, 0.01728899688501250864, 1.53488724921396268996, -2.34800984783001709388, 3.13957512278810568773, -2.74974348636134280355, 1.86202648410375104504, 1.60034535397205068818, 7.68988757688509139143, -3.84946065572923501463, -7.28938026111536530038, 0.41897534323372020992, 7.27224071884252154518, -7.83737168066969758229, -9.68354772041077715983, -4.51377909080517003559, 4.58882198108206296183, -4.51199883369870669014, -4.14714756057910083342, -8.26998675838738961374, -1.41114894040527616426, 8.67915385536009154066, -5.41986454060657596443, -3.38483183575430057743, 7.99323643732721933475, -4.15345921138991158728, -2.42635793596354609747, 1.13265621031462693225, -9.76077846581815933291, -5.96125824542250448701, 4.14594847435560787119, 3.78639388847457958320, 3.34551689947329755626, 8.23039481205885081749, 6.28929408465090489244, -6.96656419631784373792, 6.23761822873746396567, -4.31500282377182031723, 9.03389473832473299808, 1.12577261561116692690, -1.07877344743784142622, -4.25347989376385804405, 7.61330288051513903724, 9.00550306237522590891, 9.30460820273084721066, -1.60045297138852760099, 3.24185540293925278377, 7.01135478122724720151, 5.75206890039485330135, -5.19409363487857600461, 2.28567062374207807807, 8.71177028202903258602, -7.67526929609185781800, -1.48248295567338139733, -1.78448293456024131842, -6.52243876703096958636, -1.11343374608298617545, 1.91473067005522779027, -0.99103591163158810673, 2.87610550910202888986, -7.42377579606383797284, 9.23797074194707334982, 0.52987262369649457128, -2.85975584166040075473, -5.56238080735875328742, 4.54011821426302120130, -6.75623002529207994371, -3.41945261029414915299, 2.11021314601810416889, -4.52652361536611813619, 3.59958187796987694185, -9.06868276327505817846, -0.04636432727688877264, -2.45401269203295591126, -2.36769978629793698133, 0.04885150865510290430, -1.97207179073664740088, -2.17126518631749476640, 5.40824635357168759242, -1.68028874464551236656, -3.30777948444939084993, 5.92733107754240151621, 0.77394886207832058744, -5.46566720763942903716, -4.80028110851448808205, -6.19898883100559849879, -9.54632883305107782235, -7.06128919278238953439, 0.37822694370670539854, -1.16398856836733699538, -3.15126999611416813707, -4.13489780007679552654, 2.22615182116493315334, -0.16235180819800376639, -9.71969414999387737453, -2.25236608398408577614, 9.03780203741545307139, -3.99426298907304477837, -6.85966594972139187547, 7.87987582063953340139, 8.92848252999750613412, -4.80064930030714087650, 4.03902467999435366153, -6.35200894735895360554, -6.91673848381585365530, -8.30359403382313132624, 7.26067102728818269952, 0.17170253738858676229, 4.42555871303523318261, 6.97676813562016207015, 0.68433561670986797765, 9.43041376073451331763, 4.47078266386699141322, 1.28449662235520989384, -3.65094522174956637173, 3.82550338697418013112, -3.19604411601917881569, -8.30997282981865836859, -3.23643393861631700759, -8.08256449473433491448, -7.04545044409723431045, 6.53283207411585209456, -8.68431132725208243528, -6.66049294417915938027, 8.39277993888919837673, -5.51795708487144587195, 5.06274956279014709537, -6.04905057031118964517, 7.89835317673068004751, -4.65258086376030988163, -2.80743328694388161182, 6.20331133338815732259, 0.41240421069617738681, -2.96632960919597010729, 7.49767440988220812415, -1.40629216106182752810, -1.79887827914224374126, -1.45136212943459241842, -1.90097885926140008905, -3.46962403687019182996, 6.02780678057579066831, -3.32680353388372473944, -1.43859466302201965959, 9.27894118897502551135, 1.68200616780895728652, 8.97979866162246409544, -8.97536553925247204688, -7.70880293524018878060, -2.58594425559216301025, 6.26634678017307678033, -4.19063647504730418802, -1.39077010971768544323, 1.62424250085350330153, -8.37888307535489751388, -6.15936170642154223742, 5.83122965913244328817, 8.34589139230354248866, -7.27547396313778449439, -2.91128234526452267517, 3.54128862123840093545, -8.96163232728181569087, 6.59088889525130028346, 1.26600855783107846264, 7.49465053214340670706, 1.45164272715643249967, -5.42570057312865827726, 0.52475075379087599003, -9.55945440818796576821, -1.23774087305292113115, -3.14795589466653868982, -1.20460128252931575332, -1.86496078205968807140, 8.86138249953289758309, -0.47271253229669518703, -1.13252602850231021137, -9.56101491026741001633, -3.83729645628213766884, -0.54330178886186075715, -6.55991297335562606463, -5.60158906434164105548, -9.27463368707721258488, 5.36985063708216081579, -0.09662310857370393080, -2.13992959120874015611, -0.89747229869680644754, -6.07559078785918860177, -3.22408119485389654812, 6.03430732465884389626, 7.91282309266943428838, -4.34773304127487669746, 4.57648069058548934152, -1.62598475490024085843, 1.89417123639821838310, -8.42317799033316561008, 7.30510032212941240459, -1.16179018106284814849, 7.66239620592111947417, -6.17043602773223121005, 8.79038633987697082262, 9.18381609630436557268, -0.96296413026226268528, -4.75095961764432317409, -4.31371948814707373288, -6.97535688278403398499, 1.98242202981339588064, 7.05401153084228837997, 7.11859271679097815877, -1.45069173207029677997, 2.12773386676743925250, 2.99375825008124607507, -6.74197071403517433907, -2.62244837571354416639, 7.46872365726169817890, -7.38947832116204850195, -0.08964850756678011123, -0.83433221635074872324, 9.13752849546632361921, 4.11943399864745529726, 7.69768795982767173314, -8.66837448628209017443, 4.74885184954021433157, -4.95950010762339132953, 0.10049493738109482877, -2.47557500496966564896, -8.06822419560182169107, -0.42903844702162174940, -8.69333431620459684552, 0.06414116829597560354, 8.12542640139241356678, -9.88907733808730782243, 7.71195004723674770730, -4.48771438521941057331, -2.88301630118592999708,}; +float art_3[] = { -2.26173934607462534530, -6.52139971683012298342, -3.12916654027354912415, -3.61360376215010425938, -7.75370418703271280236, 2.86093019190021280451, -4.83690738105799944435, -3.79295696212658306479, 9.05288277176741118524, -9.75304032003426790709, -7.84914647204793425317, 6.91109316873045287366, 0.67260026557401531022, -2.53282107732476013950, -1.97764113249115247584, 0.28011297594279049861, 1.81597617454252890923, 6.57752258453368909841, 8.56158007807064791450, -3.77967238441399011606, -4.04768395284304460802, 6.52043092707121374474, 4.06796222069145230193, -6.02008992086368976970, -6.64903391942815336080, 1.48818366096720922087, -1.39923636977079013377, -0.01347401769480960354, 0.99651997166047046051, -4.91697606016906973991, 2.05139201840071727645, 2.70047873042224395590, -2.63625722196251466301, -3.07079082099618982937, 3.28555818068083738126, -8.95134857100147485198, -0.50081678247498473411, -7.51894853092542447826, -6.88578960348241064793, -0.54504253093442045497, -1.55128637174030714618, -0.42649287539229163713, 4.33979383475623237132, -5.07956609299747796626, 7.46674537499421830944, -1.82815131069608050041, -4.37566551540675696685, -2.76614986980625410240, -6.22455560502931781741, -0.21391418349306334790, -4.50805046407880549708, 3.63099908630496415185, -4.52070412977942304167, 7.70190982096603349305, 7.61648444422666415221, -6.17862140477673804639, -7.63975014847110678318, 4.41526950687651620342, -6.87495110174963386385, 7.01396937401316833416, -3.14404637395338681927, -5.64641226429549547561, 5.06697310895740216097, -2.87725888403055307663, 7.66533235553696101761, 9.68416950985696800558, 4.69521250026197023431, 4.84292043777602287946, 3.17975269501541646378, 4.70121738430903945982, 8.55707550472692091148, -5.08644953893295781455, -3.86259298688116992082, -4.08533279073434130169, -7.25516754890608694950, -2.81632401934790621567, 7.77609033781973479904, -4.24893039599796473738, -7.68314217261673704229, 7.92462533991812279055, 6.27881228476874753142, -0.20746894021613471182, 6.25961896582596466487, -3.75443057039080585469, -5.18390293527659018480, 0.51979507671120828149, 4.39789694609627446198, 6.36191063114389265820, -4.13890816553489138130, -8.43434612705675235134, -4.72510373616794865370, 3.55971648806022145095, 8.57215789070301426023, -2.03287687696869490139, 7.90912690397671980236, -8.25303803669039837132, -0.28307187350781504165, 9.06830520947472606963, 2.34371639805787879141, -1.06647625114774058375, -2.74481363294000413333, 6.72153430291236020366, -3.97518552912759126627, 8.50109408090870033448, 1.16858478680771149527, 5.72142000291267294187, -7.66617172889237430411, 6.03398636561944101686, 1.91937156205334247261, -3.50618865411297342405, 0.71798658301741191679, -0.53639873511545843598, 4.37332543028236209182, 5.10897327052807881387, 4.16269933548869985884, 0.05178901450715933663, 6.27135110335566636763, 1.08754690359923067433, -7.34315855998033661933, 0.97522785280155410703, 5.27922497484909669652, 2.73012102209865403779, 2.23127882914829100969, 8.66601320724850054944, 6.05614895651418549960, 8.60978031256597020615, 1.63938768200518225626, -2.65014577448202892640, -1.42874201361811614674, 2.60792179999358708642, -6.74747478262326172427, -7.31609696331035674888, -5.89726763418188859589, 3.90091686355115285778, 2.14738546310366551495, 6.96266192009210627134, 1.80504893779245279006, 3.45048686505794499624, 3.10221838489928636307, -6.49549860277670365605, 9.76043419112581389641, -4.34496369494812295642, 7.30765496866646913077, 7.35506613149961196996, 5.06672354855466622325, -4.62109036491750391207, -7.17360119092424142195, 8.66038985798728333521, -8.57896333516290177101, -7.34463139509792917181, -2.37926491596850198107, 5.43351523710477479767, -0.86862570349154211158, 9.11031497137120993557, -4.03504907784487265587, 8.91795482407443174111, 6.65960173177942138523, 4.90557977531197941801, -7.25061028788303651993, -0.90819722216796172631, 8.82016798116954348075, 6.78162026959264352399, 8.16065712263466025433, 2.92356245091674438186, -7.34648417026596511903, 6.04392810497690646798, 1.75609964122721606827, -2.67632088887284780299, 3.74787850651254750289, -0.24786025682427670347, -5.55891507362338721521, 9.91016860729120452334, -4.95549943826179450923, 8.42953884112332829659, 0.15846429303311282411, 7.12531339470243452183, -6.32419006782649795184, -2.14632669657658858853, 6.53237817908375362208, 8.70458914573806907811, 3.78818774163785931819, 0.30493347454710395539, 2.67335745064531060677, 7.67107424791642245054, -4.65540613239496803288, 0.70346544829935453436, 7.48198216395477189167, 5.33557596548154045024, 5.23631439954514554813, -3.70185708385476530680, 2.39539258776138908047, -1.99818079186790242829, 2.16204932947321459835, -4.49635733948728244513, 2.72298464994821642904, -0.00886479648378823981, -6.10688154254459547587, -3.02431633248431097627, 5.14676400356836261096, -6.44390903606897325062, 6.97639093466405668664, -8.03325893301788163114, 1.31999999694946978934, -9.26912632867386498958, -9.40654910121871701278, 9.23716726270921739683, 4.31206126091302266445, 1.15500038088626411081, 0.17904774587640126526, 3.43068300646943669108, 2.59175760964044243906, -8.68146680965977424194, 9.33126974941225384441, 0.52444959534473056806, 9.47029976513974602881, -4.30212135180276344926, -8.08985115605770133129, 7.68950767954876823751, -2.78097595370501604606, -4.54591696781853293885, 4.80850201489145234746, -0.82189980848260724144, -4.30540697481585254991, -9.96404466981918446322, 8.45052538216423698714, 9.78443644949852142645, 0.11189889312684897504, -9.15793813743328577459, 5.99109352139011797078, 2.83330228956011609398, -8.75300287891251826977, 1.00537336851828840167, 4.10450335988593906222, 6.87595074252269711224, 8.80721112119874049995, -1.97977543113114506923, -4.04526171632668507527, -3.24718362611680788632, -0.12858589412192600321, 8.45807542008554946733, -7.78715743072523558510, 6.10442739930910605040, -3.30973275226372365410, 3.83970903907855287684, 9.68858707849233979914, -4.75651205898975693032, 5.52465453037203957365, 5.72107874060069399036, 1.88417284615005797832, 7.20114319631476718087, -2.88821803874755644870, -7.58843119770664920054, 9.86964873139684328862, -0.88739795453662395630, -9.48874367307095134549, -1.41517092053182302891,}; +float ait_3[] = { 6.66983261193634291430, 1.41844534546847533818, 0.93223455325094306545, 9.85792598519001117552, -2.54664167476351899211, -7.95204396826435555568, -1.79055291905173596945, -5.59389340732179718430, -0.00670274631485234806, 2.81195398046731881436, -2.51680342203577112059, 8.61941036674177851751, -9.46059931631794981399, 4.34456456452217487652, 2.82542339453918600611, 2.50391584923834642495, -4.80634873505796633708, -0.99773988223173404322, -6.62790953538595672967, 4.41935783791265457410, -2.82809114824888130357, 8.93183604946890596921, 4.56231399730619813226, -1.15642578523805283908, -1.81253038693295209782, -9.42525164413589955359, -7.45611397611343917902, -1.01514031877614030464, -4.78102054331543513399, -8.65015811271172552210, -0.24603893096856666034, -3.14767127364348375806, -0.87223630347746095026, -1.62265320493163933691, -9.46454470009047810208, 3.31970987855021171242, -6.74108112713102514135, 5.36968925807980390630, -8.20200821255118839304, -9.85655570625662846851, 0.25182924423626573684, 4.68708581212075259259, -7.36996235275393818398, 8.76922347164490645355, -5.02524041690487521805, -7.55143858612001395159, 2.82659355695375857920, 2.68894609927420447093, -4.91968925365574172304, 1.86885192550061418615, 5.50970060115263038369, -2.02531162063019021247, 0.54370533691253797315, 0.68711215268655756461, -7.36718824079711342989, -7.06577380199404192496, -4.87148903359455864859, -5.60018728172634716600, 1.34693294169751354161, -8.80268739069648908924, -8.40825542509216461440, 2.26406020169468291670, 6.31594250669931156494, 7.89785241455768272090, 3.34178488499889780883, 8.16583575630677671597, -6.53297090300205951507, -6.20306187228846894754, 4.86499599134429416836, 2.81232959686829886436, -9.54625629042503476285, -4.25977462778485538308, 5.50646424106233389750, 2.15800526807999659695, -9.81230114463244262879, 7.56167325301825599126, -5.06011517346609807078, 1.68774637345719824566, -5.96954616686939765202, 9.99298449278959211028, -1.14252334375568054270, 7.77213236243582272778, -7.68809796917703636154, 5.57556466224472302429, 9.66324421517759901690, -3.18980310099564690063, -9.84479706592263781317, 9.47123420906123314467, -7.68735089380204517084, 7.66272816663380496038, -0.22480875261101473939, 6.12380768115122720019, 8.37402276400300138448, 2.40469215079673048763, -5.73244884291220735406, -4.24039208254333388481, -7.53469215807201386781, -1.97565073589931117226, 5.16197260188101303413, -6.39440254256873341632, 9.78218271830712637893, 1.90085736371731428562, -3.31923339333589595412, 4.07791694728823550520, -1.38130153652882547988, 7.20465835446428926048, -6.83143720015534761103, 6.45842912106476063627, -8.67056320532798707745, -8.85805259296565772331, 7.70649585160478167722, -6.14315362851711022074, -4.12360920692002075327, 7.69418152870201410565, 7.24558994375491138840, -6.25126549860222269217, 6.92335649537899300299, -0.49312932285816124534, 0.12524660101996332173, 7.63769540409974823092, 2.98089455856217711016, 7.59245888679852143355, -6.86282483816698807289, 5.99750386125127654680, -1.73495946495620678718, -7.17312076738523263231, -6.84641985217763249238, 7.22962874386114862091, 2.87675874245284290964, -1.19940933783622405429, 7.86849257082641884153, -2.79653080753953631188, -7.65478580540531972076, -0.71357879458861717126, -0.03663672584375454733, -4.41889420467155780159, -0.73816433065756115184, -7.27385741918494588276, 9.82429172980836540319, 2.33221785504778189591, -4.13342914411733275415, -5.88811797256465485617, 5.75255525197507289192, 0.19224134045353835631, -9.06308507210640890150, 9.17303594422256907137, 1.79895124297308584005, -0.38738637999366787312, 6.36771615679755953465, 7.38641685120222035721, -7.26535469869390659881, -3.84517840795792942288, 6.17832119249565892005, 1.15569663759499441369, -7.93148916732904929461, -1.53144583253345167861, 9.96706767804357340879, 8.35383517877978221122, -4.83698880399924746598, 0.79969793879922868030, -5.21786585017506965301, 5.61123091028410847514, -0.23636955124010050611, -3.91704047770627461489, 9.95769851430087982180, -5.93343381789243018432, 0.08292118718958718659, -7.02653571189426617849, -1.47419560589035825160, 0.11445401909665875451, -5.63503381168814776458, 3.54883032637714279645, -4.50001415155199424589, -5.03182195718429703390, 3.88414031256467140452, -2.51547200261190972981, 3.41622228922731530076, -6.51834996366621410857, -4.71426945058495405760, -1.96663927612819122714, 3.75405336212320861478, 8.03750341209058660752, -2.76417312214995103403, -1.80681362788505595063, 2.81231273904695555643, 3.24294660907506226977, 7.32835450610197725041, 9.71101805291727160352, -8.06605643297933383451, -6.15156987390721443631, 2.05265481958590179090, 1.92524585826071970018, 5.71909635943240601819, -3.81911928646012199806, -8.45408776975279607768, -0.15878704193272952239, -6.72056106795006868992, -2.14629108708720650611, -1.59385065534370973239, 0.27810168235553689442, 3.32159997646852644948, -9.38326712018172415242, -0.52078629030789258536, 8.65055270176937440851, 6.42769521984089209354, 0.84382107956881924338, -9.57155999227250120498, -2.59587879412014288505, -5.49452935806388431672, 4.02499551412729239530, 5.39513304579451080656, -5.32957916803691311713, -0.08781760250051995342, -6.63752754490054819314, -6.12294274787246806113, -1.12509574109703081035, 5.96852311742765451186, -2.44922928114834803637, 2.05019825737936756127, -8.69586063031392342282, 3.15538212477125412647, 6.18711438894351672957, 3.15347159955759970273, -1.86292188130510005806, 5.63099855933845816480, -6.20776455912784363989, 3.29116828314286280488, 5.63277286704506607862, -9.96896275064425196888, 5.01204580222828610658, 5.17087828910240787650, 4.51385452148856813892, -2.26045723560937261709, -5.61452710030120805129, -8.45304982194379661564, 5.59140762743137287316, -2.64966579683882930141, -4.82345204985235653794, -2.95756874961901949916, -3.60011491870718280950, 3.05624325817508157854, 8.24917379583467536008, 8.57693722735552199765, 1.08747156714718329340, -4.00329008272839015348, -9.32860568557988756311, 9.65862277765145904596, -9.00033270471172386351, 5.89301479452225507316, 0.00070117848786033221, 7.31497122274193145586, -7.61535455746650313813, 8.96120879387639490687, 0.21613833112998293018, -6.44428523504676320499, -3.83949530984680720280,}; +float ares_3 = 34287.74893555418; + +float arp_4[] = { 8.11766157030820068030, -9.14104512536093771757, -6.90265577569858557183, -5.66678287717162731951, -7.61055608286747009572, -5.33211409974530425870, 8.37655115190885624088, 4.32713070395994137129, 5.57369962353053871595, -0.89383035040985703290, -6.72052423544267085731, -2.53148849681485721419, 7.85069775317086993027, -4.92080170352028289926, -2.41951534453257455226, -4.62081805199814610319, 1.14012056507129955207, -8.35701475948831529195, -0.82567762778153408476, -3.38932070793872330938, 7.58328259529883297319, -1.00233840872277468748, -9.80506234291192768637, 3.05059001198651458253, -3.02222179908578425511, 3.78918156926150828667, -3.00160183583006379138, 2.29268563366529321002, -4.98163546194561313030, 1.14632156925941863790, 8.20945597922985470518, -3.71543845119439719582, 7.48818580756599061488, 8.23836797972922241229, 2.78420798791037782394, 0.08166580714686055842, -4.95361620244965550341, 9.21581583329308529073, 7.28226185190663599656, 2.72732791871043289689, 6.76822705979645533603, 3.24408839791319181245, 8.40626709405596983515, 4.42224761775843866474, 0.58208230355212897678, -0.41887683979410539337, 7.15774427347182751191, 9.43188430510227249215, 6.42992189728116514402, 6.32566234357313561532, 0.62996520002265654625, -2.16399823783500533381, 2.94737826608665542949, -6.38708382737875002277, -3.29253322065541365049, -9.87247764429072205417, -1.58497218931541716813, 1.06312627538523685189, -5.71014703675104051683, 8.40960208098393025011, 6.64112680055328041817, 6.53255445915079135943, 6.00818873424952215601, -3.63727146066058715235, -1.09542885500989584102, -7.84192748047784160548, 0.51034568159138160581, -1.62302585897993623121, -0.90522971939573793065, 3.61378768719925425046, -0.31889559392643640479, 2.01617268949581784909, -7.72963368185723531667, -5.16915593867246236925, 0.00245668429549539269, -1.62557359280665103540, -4.09112976621591961646, -2.37281615628547548624, -5.16068663310965458635, -7.11487866353854236934, -4.15064859743966785288, 1.92990594993490738318, -2.60012957325858984348, 7.71462501858383120634, 6.72777530704080461987, 2.64810953800656534440, 3.24766732355117326847, -8.26850801037500815482, 1.63698301221205255729, -0.19882877415058608506, 8.30674051758786191613, 6.37031055250022859582, -5.64704316343922485544, 5.06085139335926292858, -4.80440750078746603435, -1.86997580096896776070, 1.49103503760046152138, -0.01963867524739804082, 0.20913735561526181073, 9.23607302849289979463, -6.34944521580942833339, 7.76485473842344831041, 6.91706269489783664994, 2.35668967326095568637, -4.38273104868781793186, -4.07366183380135815639, -1.30881445931132667226, -3.70446171830624493282, -7.68603164288562012985, -3.03892918529199818067, -1.02487791137802375374, -1.25012212202706685105, 7.02661695464057700633, -6.23180324804626550872, 7.23411657194380808278, -4.62920872534738236936, 5.09205012269768531041, -3.50774926470170278492, -1.32684890386563658637, -8.59909061079411962680, 8.84080613702008122345, -8.53249659518067105068, 1.95876849203605019056, -1.09190288046063166405, 2.81781383084791947624, 6.24496552988012609831, -4.47089480578357445495, -0.16143651827373162178, -2.36538062888643541015, -2.29993084863440166998, -7.64713762948324493607, 5.39755848641967261869, 9.71127152168351059913, -5.99105933877173413293, -9.05773840775855099139, -5.34708178587670168724, -1.59619114904834447088, 2.35617820506859132479, 5.76893961236121199931, -9.74114257989031884222, 6.12888339554478278615, -3.77019643787154201675, 5.40480889752446103103, -3.33352327036014361994, -6.24397848756698969908, 2.11302696264924527725, -4.66201274968073509086, 0.00243894710224346056, 4.37724569750841752125, -4.96335006499138486902, 3.21500473511076023669, 4.30118790015129448534, 3.44933134308010735936, -2.07452416088011570849, 4.14755359442132665038, -2.32481485898233586340, -4.44223113332270891362, 4.39903610394382305060, 5.22207274924713260589, 6.77760058100482609689, 6.60485654074187777951, 2.72982497077323849055, 7.89805779912575545154, -5.66603937585937700305, 1.84883446021087394229, 1.95427747005934726587, 3.35387088264427291051, 2.96490391593051327845, 4.16732047537934136017, 3.59291857412155479778, 4.73907688211001421053, -6.41536720287131423390, 5.10122934047321940909, -0.34228360496465093377, -0.27571048721136648396, -2.17487685972826128022, 7.25770192183992080004, -2.31876092513300324782, -5.60990754022060045259, 6.01180482991306774920, -7.81252760039610016918, 1.27650279477317951660, -6.66267822254739794374, 6.98304088982209236747, 0.44411488874678006766, 8.03230756829254843865, 5.45600207405943216088, -5.13912909017661512223, -4.68432461388908194522, -1.09022694794242269722, -7.79962144339304153107, -7.14886906545105382804, 1.12640735762857246982, 4.69257460166119599876, -0.23016353623950536189, -2.13317452890337655447, 8.39380604350743553255, -8.56923687060404937199, 0.51487081164487236151, -6.30219369694011266603, 1.66046886555702322141, -1.61010754412622425491, 4.97539996040855214687, -6.66204421296942861375, -9.13335148853487766019, 9.37492566630093904223, 7.11747413358956393381, -9.20059818485612623817, -8.02466760955883096074, -6.02204784551578065077, 9.96338478368738478252, -1.02802479023298332095, 2.00286050211046529057, 2.77410328553274609931, 6.40731259701781397098, -8.14107147453577262297, -3.98223078983555645038, -1.19215225858765272449, 0.00260642884733819358, -4.13014133849641851270, 4.53607598693204394635, 6.86205146950580058274, -5.36120148514548588992, 7.79441214993383368892, 6.97660788348479954379, 1.27149942587796616067, 4.97870080112252821891, 0.95539546005926290206, 3.17801762326883618925, -6.26857224849952920920, -2.34531494725385769584, 1.66928700134732110882, -2.00570393257200763060, 6.04350656892066595560, -1.93356433506894198615, 7.28429433107694990213, -9.36762509296950085513, -3.83880190554097211475, 4.18069849152442074569, 1.81849386080369157526, -9.75606222266313771740, -1.99729316571109904999, -6.35306652323981868591, 9.34602429288715086386, 8.06464336841212414697, 8.21734689939009399495, -9.62876888792530927219, 7.62121611481172678282, 7.78397632239070702553, -3.46883118456124961426, 9.09449502534020126632, -7.93280975952617417590, -3.07986384183021222327, 4.12311118044077318245, -8.83899350780863812815, -0.10435270339431568232,}; +float aip_4[] = { 0.36005608085944373897, -1.84058519054544689197, -1.54334976853728633728, 8.45981160371566076606, -1.66192992128007155372, -0.76367013495809388246, 1.61773630017551894866, 0.12728635534591070666, 4.14118488989373290110, -7.34372483207391457682, -6.85299020158485383547, 4.77457472821836503840, -9.16468887566496448471, -4.82827875542378048834, 1.89496271665273141593, 1.22112133576585080164, -6.16105911808483064362, -7.09641632951701417653, -5.18589232896463236955, 4.30139362758043652946, -9.72668907673423177584, -0.41191788473278201366, -2.50072446045554563909, -7.73395081441295761948, 0.81103980317619317475, -8.61555321754982905702, 5.89123506119547890592, -4.74694062352273782324, 3.73270044747164320142, 6.80979028103922701121, 7.08903092438439585976, -6.32936307529844199848, 7.52516920378315390394, -7.13569837984227284267, -3.50604800895976786990, 2.39440238629338963960, -1.34793353166329232806, -2.24238085907580142475, -6.56955289742981740631, 5.53857826269888420256, -6.56557865494451320387, 5.62546183478898775832, -3.40128535875451998294, -1.07540756539091120203, -8.85135022228536882949, -6.37093162478805208337, 4.43122565308166471709, -6.14234315044466239186, 2.54986121395921649935, 4.93001152699001288227, -6.88686708675859460271, 2.90895130824499403843, 4.94354285441010432578, -9.61971391333514880273, 3.62727985816728093482, -2.34903593186256820502, 8.63918511199526761857, 9.60868932793843555373, 3.17416017561416019532, 4.70551432144687709069, 8.47779982566133938349, 2.49959499470620016837, -6.33707490507784676481, -2.68384642465197487127, 0.23258992273781764482, -1.98403039349221366194, 7.44032476525396546663, 6.69989386051386759391, 6.05472260370257231443, -3.81776526801661475474, -9.54624888882967859161, -4.17556699374023398974, -2.82453533722138594442, -3.19435693472746962129, 9.56556016461136238149, -0.86298547335639597122, 1.36436683654182111525, 3.15192403146507693634, 8.39750714032325618064, 6.58483967684127335929, 1.72523484968863982658, -8.32069798217954748054, 5.97607659557367298930, 4.32167097794106425113, -1.53519461071690699328, -6.19774411829041582678, 7.52151798977940799773, 8.07695456722575499953, 5.31090656908387792612, -3.01437298673365816626, 3.15422710914781490033, 7.68018196325259339119, -5.28245853204072801645, 7.07931160730806752213, -0.52528396095050844394, 9.99413745490105043245, 7.25381122445896409090, -9.67723215817859383492, 0.34043660433807865218, -7.88800878762083712559, -2.71196269844180104513, -4.06828163053811753258, -0.30426784885628954669, -6.39249600966606656982, 0.62142175229795881819, -3.65881516016371755029, -8.62333227163078674948, 9.99119436430123997184, -6.79491883690712050736, 7.95268486023081777603, -3.90014419663155198492, 9.49144774312477679246, 3.69757521382196685522, 1.97850436825261510876, -9.36844792473268128674, -0.94343942316681150828, 2.22033381821190367589, 3.93353569917168144343, 7.59661083350388466329, -6.23350137720206021186, -2.89228482103676576287, 4.47019058791796375374, 2.11159749178620970156, -0.12979281714996027119, 9.34745072317588920896, -5.72234699163898952179, -8.05145570790722864274, 6.17398561353560637599, -1.08984472925497755114, 3.68478874098729392017, -2.25181752692373393643, 7.13562514607841080760, -9.13980705148340000221, 2.52479202164406224540, 0.05825763535818495598, -9.23578955550128100072, 4.10382266225822966987, 3.60306675350878613528, -9.04675182042590186882, -1.04788429918021108733, -1.58909917004871203972, 4.86907996182171132205, 3.06905078137757314494, -4.31729509656636256665, 7.63254855033278900578, -2.79237624702665598875, 3.17285276924568471202, 8.99120438997692872363, 2.20962431524237246094, -9.44099011214018091209, -4.00516630853206834217, -8.39424141035571835801, -4.13031182308306377138, 0.18280922959769796421, -5.04889087201305386543, -9.96772455643478139109, -6.91629478020100485480, -7.49883781048546183001, -4.73930162482209205166, -1.69446506614803382718, -8.14589680779659097709, 3.27802011697104234145, 6.35619475494854313524, 9.87790084886017183408, 4.30553668450914273080, 1.59427159068167156875, 6.70477055964105161934, 1.93140768102730397970, 7.21166448866209464086, 3.27331850626667986148, 7.87013351184912579583, -1.16523599626477114555, 0.75201363045834135335, -4.14558192340723152824, 3.43116502201806028438, 7.40097748303507785295, -9.93490763323127090700, -3.70057658497259289732, -5.29832711809347500775, 6.19796228132330639937, 1.47100318907325089413, 5.80065386996444765089, -8.15984012074006415105, 6.01626074245192654644, 7.45865770824512708259, -5.74719906690866544352, -0.68307982660515342843, 5.56257415770011753864, -3.56227355517090504833, 0.93506995763555700307, 7.09267731127392764279, -8.75254433611122450998, 4.54191313658706086187, 6.92909982308644956106, 3.75793378886130291505, 7.74753910473810236681, -3.87452959913495220690, 8.26707933477283418711, 3.82445718056313666011, -1.36192103066162850666, -5.20962908195985008319, 2.73916406407691326308, -1.28225826328362835227, 8.19499591911803193511, -4.15572756101097517956, 5.03454221238917298820, 5.82065269078085378851, 7.65017221932906466009, 4.86257827161603195520, 7.91220868744265359851, 9.03046945179348270472, 5.89109307303686335899, -9.07475219329579907424, -9.59059442891786773089, -1.58589656753354724117, -8.91259077183221570806, -8.23702701443570362017, -1.03627966402999938111, 9.65735753471810909332, -7.88873983599478911799, -5.47356016849050952544, -6.65858209712574034000, 0.84417554116466497760, 9.62796554208492239013, -2.27354609917787264806, -2.23558524597440921866, 8.86815100317313920186, 2.92360843918881663228, -9.24017597009659397145, -9.47787437810828592433, 1.82761491457741342970, -1.98671232580490553232, 8.78568777176227300174, 8.52785854392964637327, 9.45088564076224812993, -6.67731879508535186574, -5.40057640275054140488, -8.46861594271484818819, 9.86654753324809874471, 3.90160046100172586137, 6.65491264764387580044, -6.15144530387968302421, 9.04937528093356746695, -2.24153454440960242522, 0.73869576362027800087, -6.24839078689237048536, 3.67949873393561510682, -5.66251748048170799876, 4.57236769601672321528, 1.93138257810025493200, 3.57011988145648651027, 0.32533609204444147167, -2.85089374265722561574, 3.88703689294542620303, 0.68620849727526511685, -3.05362651241587457207,}; +float art_4[] = { 1.95836110922376249732, -3.77006226572133673614, -2.51122404320560477231, -5.16519595576802714731, -3.33174518346235970512, -7.43857218369669048741, -9.79013024365418615957, 9.41049576815089849902, 9.95562291450405112414, 2.50516349390529846630, 8.48077724083075779049, -7.42862709586415537188, 1.22678643247851937303, -5.32287443212095645606, 2.05651065291495172005, 1.79169686742965517112, -2.47633672000034010807, -0.74687787604712596590, 9.44003927771909800981, 1.28083755020745826414, -2.32861916829224746550, 5.71210462836381616114, -7.62165088978809635023, 1.27891125941524919085, -4.42322046430601023559, -7.56295508076929401398, -5.42853900102414144158, -5.15750430826499695058, -1.22083696940027408573, 3.16660532186949517097, -4.59211863318361324104, -7.94033523507815974796, 4.69743773437869904797, 9.68102581255802263627, 5.88960342808633541267, 7.23787413916460309338, -9.39804512548981385578, 8.93958903490516831880, -4.55433713835908982048, 0.86386566709249201779, 6.82422776956039456309, -1.75062814643901454303, 6.44988706819484036714, 2.04934174366704979775, -1.88447132209377343770, 7.97620588798522334173, -0.04014259240437034748, -9.02609267463702558132, -0.28112966317284815432, 4.78857607021775777412, 6.38190487043749499207, -8.29257285030534063708, -8.34607578002511374393, -0.65023535277598476512, -7.97207013352629889624, 7.12159832030788919610, 5.41741408399136403773, -6.50856542451311970154, -6.31385786810475746478, 4.54065304247421508421, 4.06511671897879445225, -5.97790784750955417337, 2.13220215490281184145, 9.74770241549730442898, -8.31551432516044641829, 6.29979928504761232944, -0.49464334215566196917, 8.06570497621529014509, -4.81490862281240872278, 3.61166894453807962861, -6.40489869070901551851, 9.35667946608939615771, -9.32174522015042583689, -2.97687481322051361587, 2.51643244447085834281, 4.37407497816530010937, 9.76731896411543232261, 9.25835948819729281922, -0.10118687994136621455, 4.89019068888778107862, 9.22555006915842312765, -6.61305731484587155933, 3.73471075618056325141, -8.60732699477039275848, 5.48790353132365638089, -2.78435689580313905367, -0.67946411929398031759, -2.61734667653182651037, 0.06382270434102466083, 2.52523751692426756676, 1.61980018555763116694, -3.91380648122047425375, 4.13400124233531052198, -1.18988146037823483425, 0.88766169480585865870, 3.75431130537385193691, -4.83933083170373201654, -3.95794050361581994935, -6.61997373326611970867, -4.24074497693908369911, -1.90912514566224267298, 3.66491071991310413125, 6.32552933386770632751, 2.34756376181951154081, -6.20451498543254054141, -0.76568881467707861077, 3.72903735226581645179, 3.76840720023366770874, -5.49944220718731102693, 1.47856929533973868729, 3.57067244762020408189, -0.54888015050475402745, 4.43334007529933948888, -8.80393887798237884112, 1.14463229450155168365, -6.65615416860429576218, -8.36476327262880303692, -9.74885439340230774974, 1.28325342112469087397, 8.32125359276396991959, 3.95139487289847757268, 2.08365244886854661388, -0.07934126790488171821, 9.65497616917636491962, 0.28995635694721677567, -5.61395907810010807992, 4.74445169613737505188, 5.06636444853366718633, -3.03980970203531164486, 5.96661990820588883366, 3.67294794331757934458, 1.36849594224718096314, -0.85976170553402653240, -7.88041055377669330539, 4.12197398618885912924, -5.69581212321479846139, -1.33464340388236912816, 5.62542763160464254213, -3.43375582182370564510, -2.56415956238543785162, -4.52332325095310139318, -9.92988296364925204784, 3.84565387858993545933, -2.83750699451218224567, 3.01199369480792711329, -8.53124401954756095279, -5.87584784789346503686, -8.75810404562704469811, 3.71559450922317324739, -8.46005114695926252466, -0.57146068233822688853, 3.79805140337388813521, 8.09442167125382638915, 5.07608414466839263923, 4.88262291021534622359, 0.99603261860280412066, 0.53571560151153718721, -3.66878314846516673242, -3.33676386065415009341, -3.01887333543622737864, 1.63222654477866413458, 0.92987944652044340899, -8.04660282674206861486, -0.33570256706797962920, 7.49309915517732605394, -8.12198847568255111184, 2.83281587978109072878, -3.08787163616826632051, 1.31079447100370671819, -4.43079157420822156155, 7.09131526483023577612, -5.34282106823877711577, -7.43837294316231201208, -2.52206092473738596738, -8.68493714722106702197, -8.71915161226548107720, 5.64867001639629151555, -3.59819839482218561955, -8.73282466468600127030, 1.10521875197960639525, -0.57588137047378928912, 9.08601290215446155685, -7.38666076906232937915, 7.01158428023863677936, 4.32842017711110038647, 2.78577413858272748826, 8.04172345404710142702, 4.96478390700154825765, -8.46565517550279977854, 2.84901412939649922862, 7.96123964953902074626, 9.60527394932342559741, 4.09780547128232264242, -1.08651943107139992151, 1.24450635952903887471, -4.18090943947998727737, 7.68084016792845858390, 1.23194254338790187830, 2.86579814666877652485, 1.37159162372222809267, -5.57380698944451857813, 1.06563407099916851450, -5.35090652014018619553, -8.29079677932186775990, -2.31530333888983363977, 9.35730382419434647545, 0.32516372745656596521, -1.46780212218396144408, -1.15777547130628022387, -9.37023930895917445127, 7.49592376649046343573, -2.84685310636668464923, -4.81915713350234753420, 3.47093051881410374904, 1.10224610799734179523, 2.90455841715838403161, -0.21924509515029733109, 4.76025257530385914606, -3.82841713773366798534, 3.64513484454086444941, 4.46909480126502067776, 0.81501309520322706703, -3.72314356383118116867, -7.95417348040380112195, 5.25268134801702579750, 2.38616876616097250974, -5.83452873781371561535, -9.29773012287215294691, -1.72958767415976488735, -5.30207011134402161900, 5.29063531387901520020, -9.95729006732525512291, -4.17353350136118184111, 0.53119350910771956364, -0.16374718974662627602, 4.44086694412100158047, 1.59740192728590457705, 5.88337780735157522827, 7.75166702888786218750, 6.73368424118300978876, -8.41914348053775540848, 5.76787998572451598989, -5.39152297033026695061, -9.55538626658133871672, -6.05400929098724382982, -2.55965026034629516971, 7.64600553386480541462, -6.29985305481029556063, 9.63860101929107315755, 4.29781109770832259187, -7.69497130222854153203, 2.28228634802603203013, -3.63914349958789351547, 0.06391588512180135240, -2.74886222051597961524, -0.13932203852785818299,}; +float ait_4[] = { -7.71282859222158556634, -2.86120015976267083602, 1.71929414606543495836, 1.34881251083015563097, 4.57384679483587142101, -7.60929279016787418755, 1.11992951621317793354, -2.05870998899363932821, -0.36915358730573188950, -8.76153959437855789361, 5.35521970879214137540, 3.72483743474393769191, 8.96165447956780525374, -4.61780539727052641297, -8.83253309607612457910, -0.68151666439243818729, -3.47297267069656800942, -7.09343677317774368163, -5.05618646486787604033, -4.44661994040182140253, 2.18980205675395112053, 6.82695323359659056450, -5.85505274029015687631, 5.08320122959556464082, 4.87563966113659752466, -0.89848125030680670022, -2.13378947777419902820, -7.00263065543876450647, 5.81238750380759228165, 0.87541529173171639400, 1.96602137894582540412, 1.07216793410526634034, 7.71737556797603829750, -1.82781212815955029782, -9.06771755079912722408, -9.80382909101951405262, -0.79979969401723849387, -3.01009813414874205506, 3.47783820698870194121, 5.85211877717495276841, -6.45376079625405107976, 5.89015430483364532677, 6.94341222723777917736, -0.78239013493125497689, 1.84879709554815363504, 7.72403713066761454797, -4.22943670272774685515, 6.87571703174431902994, -0.74301474664837030559, -8.46708744242720001694, 2.26577266118891884616, 1.09249492533801273453, 4.49851746110999428652, 5.95100263876330259905, 8.37405065298545281394, 9.54163405067479430954, 9.13586494501271673130, 6.78395168886129695807, -2.55643093259893205982, -4.05021978110301894560, -6.08957975156369624159, -8.08377501195474401641, -3.98349378351249505670, -7.33058986750386765152, -2.73343838938552075035, -6.60003654050119337171, -2.63289125490874198476, 1.65157662060050469677, -3.82352432453241419807, 0.50462001101771392086, -4.28848788174121686723, -4.24446965363369344004, 3.68808542908215741818, -5.44749968122742345145, -2.93744968479581380905, 9.49014663877053976648, -9.01205997984526874234, 2.62597702825831191831, 7.54269309665439635637, 2.37288280729393363799, 0.90489440233616669218, -7.65288508644619724919, -5.41044806992433180426, 1.83518392113916029018, -2.85551873676372203192, -8.54164892236218342703, 1.18250194089258897634, -1.04594417158672747803, 7.02157742763372283434, 6.32705390280793977809, 9.45617934629450118678, 9.61302411761173303262, -4.27656767528856107674, -9.29234923232258935855, -6.12992704595691151326, 6.70729760287360576854, -2.89415602035164987171, 5.58289768769964567241, 5.34595538173473983079, -4.10881931269197586687, 8.12426054777853323685, 1.75570357338516913615, -8.55476765632488245217, -5.70249422437874287084, -7.55585140366916263588, 6.21324835199248326489, 8.90163802945075488537, -6.85088768776788192838, -7.68760842572191727129, 5.49085544739036279793, -9.85189087144574315857, 6.88839166326917506922, -8.39219958380896358108, -3.48291201495260338561, -9.72271089116828868271, 0.28012555354196777557, 1.71503131741599190718, 7.51444026527896724588, 8.05184265259315523622, -2.34898392188637838984, -0.55906264560530338770, 5.82926975361793253683, 1.15026780053997690345, 9.50625190181394685851, -6.68479662299395549496, -3.59313293876660289072, 6.70084100710553443037, 4.93748316832971667623, 7.83791876060525538605, -1.07852033367626987115, -5.55354863256491348977, 2.18341760246355320874, -3.32642988075765799749, -9.44503705748988942048, -3.81137502452392862295, 2.75216045480250670607, 7.83099146430919290651, 4.00612099071793181793, 0.08006482851461882433, -7.12934189263592799080, 1.59426555545538306546, 6.01916527569160564326, 4.02203043163655671322, -7.30632628819011475940, -6.97696716663366078848, -8.91436793504404079158, -0.89800177060680042018, -0.66353875547341623076, 7.42997994110930193301, 5.25820241343956595870, -8.73371592574450872348, 4.86075339817850782254, -2.27291447051597828022, -8.77852797975220688897, 0.21418976239716513987, 0.44227296247042247046, 2.18256507497707019638, 8.81769454770553195999, -9.67634030736409123108, 0.29199017429642104560, 8.84827893031620149600, 0.37855404084867316783, 9.12033953254081808382, 8.32816095891589824873, -6.43291470900524231524, -5.77976107041776820950, 1.92488010796775022015, -0.02135942463838347294, 4.62478546822055491816, -8.57703419201005523576, -9.12701884386337525257, 9.66693316228212040642, -0.25311355310446082001, 2.17565642318960428270, -9.32891147300150258559, 2.75270520090264092516, 3.00375270830684470980, -9.64452986460367966970, 3.73716421321408986955, -8.45569180138461540253, -0.77629405605134316204, -0.56061973179970081560, 3.26590485242656392018, -5.71763907081536082444, -5.51305755756865067951, -5.28298112945860598444, 2.77175874545014799821, -0.42262030268383199427, 2.41718847306423256782, 7.38284793084340051905, 9.92231904187002911044, 9.66616827869852457411, -8.58635354892870417132, 4.38627015240323991918, -4.91727002383891775850, -8.50416777963621406400, -1.77998537655554223136, 5.61552551671781863263, -3.07156783315007508861, -4.06377669294194276972, -4.62048272276037685913, -6.88206639058221991689, -0.40841420732931155158, -6.57189184465070752594, 2.27497037776903354711, -4.70435018223292189532, -6.49062649315617257173, -6.30557890874673887538, -5.35737987787830771680, -7.15899078883363237935, 0.01559992576471458392, -1.55205829762820890494, -4.16105582202042523221, 0.44770799367697122761, 3.86064173709948477153, 6.10872468415169933564, -1.56527073658445559090, -4.96336256124930930156, 9.59465451278959591264, -9.45749532421970684481, -9.97262909211887915717, 8.95254633687546430565, -8.17860022187901236634, -0.42797946860191338203, -9.11090943175364031958, 0.24771830590493415514, -5.04842989594021407385, 1.87770199828116091112, -0.57771620812879831419, -6.80220098384211091513, -4.56461108442079144254, 8.12506014842676194121, -7.14326254673013583130, -7.52033057218491052254, 5.32715680026749183185, 8.06038776292131586843, 6.83143798180450900759, 9.77280653584634251274, 9.80718368773837312347, -1.56455282495128500386, -4.87589485291680002632, 5.69396338291105763574, 4.43099822273358512348, 3.93924665682038011028, -0.63242777430591701204, -6.87835506461718537707, -2.97626924578852314340, 9.30703996557977930593, 9.72675702355411786471, -4.84259827980241297496, 5.93611416845696027167, -7.52077802244371262930, 8.24895641043980276663, -0.63955454977005210537, 6.94494915327505424330, -1.95252461537893928778,}; +float ares_4 = 31258.855438601084; + +#endif // TESTDATA_LEARN_H \ No newline at end of file diff --git a/sw/applications/l_loss/utils.h b/sw/applications/l_loss/utils.h new file mode 100644 index 000000000..0c5db6678 --- /dev/null +++ b/sw/applications/l_loss/utils.h @@ -0,0 +1,162 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +// Define SIMULATION if you want to disable printing +// #define SIMULATION +#define TARGET + +// Scale for printing floats +#define SCL 100 + +// Asserts should FAIL or only print +// #define ASSERT_FAIL + +#ifdef SIMULATION +#pragma message ("SIMULATION environment") +#endif +#ifdef TARGET +#pragma message ("TARGET environment") +#endif + +// Enable or disable printing +#ifndef SIMULATION +#define PRINTF(...) printf(__VA_ARGS__) +#else +#define PRINTF(...) +#endif + +static float maxdiff = 0; +static int32_t maxdiffxp = 0; + +// Assert functions, always print if failing +void assert_closef_si(float a, float b, float prec, int idx) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) +#if defined(SIMULATION) || defined(TARGET) + PRINTF("Max diff float (scaled x10^6): %d\n", (int)(maxdiff * SCL)); +#else + PRINTF("Max diff float: %f\n", maxdiff); +#endif + } + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL), idx); +#else + PRINTF("AF %d %f %f %d\n", __LINE__, a, b, idx); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32_si(int32_t a, int32_t b, int32_t prec, int idx) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d %d\n", __LINE__, a, b, idx); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closef_s(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) +#if defined(SIMULATION) || defined(TARGET) + PRINTF("Max diff float (scaled x10^6): %d\n", (int)(maxdiff * SCL)); +#else + PRINTF("Max diff float: %f\n", maxdiff); +#endif + } + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL)); +#else + PRINTF("AF %d %f %f %d\n", __LINE__, a, b); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32_s(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closef(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL)); +#else + PRINTF("AF %d %f %f %d\n", __LINE__, a, b); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +// Vector export for plots +#if defined(SIMULATION) || defined(TARGET) +#define VECTOR_EXPORT(...) +#else +void vectorExport(float* a, int size, char filename[]) { + FILE *filePointer; + filePointer = fopen(filename, "w"); + if (filePointer == NULL) { + printf("Failed to create file.\n"); + return; + } + for (int i=0; i Date: Tue, 14 May 2024 17:25:18 +0200 Subject: [PATCH 20/27] add fft test --- sw/applications/l_fft/fxp32.c | 56 +++ sw/applications/l_fft/fxp32.h | 26 ++ sw/applications/l_fft/main.c | 88 ++++ sw/applications/l_fft/sylt-fft/LICENSE | 48 +++ sw/applications/l_fft/sylt-fft/README.md | 59 +++ sw/applications/l_fft/sylt-fft/config.h | 88 ++++ sw/applications/l_fft/sylt-fft/fft.h | 423 ++++++++++++++++++++ sw/applications/l_fft/sylt-fft/fpmath.h | 139 +++++++ sw/applications/l_fft/sylt-fft/intrinsics.h | 153 +++++++ sw/applications/l_fft/sylt-fft/main.c | 74 ++++ sw/applications/l_fft/testdata_fft.h | 19 + sw/applications/l_fft/utils.h | 162 ++++++++ 12 files changed, 1335 insertions(+) create mode 100644 sw/applications/l_fft/fxp32.c create mode 100644 sw/applications/l_fft/fxp32.h create mode 100644 sw/applications/l_fft/main.c create mode 100644 sw/applications/l_fft/sylt-fft/LICENSE create mode 100644 sw/applications/l_fft/sylt-fft/README.md create mode 100644 sw/applications/l_fft/sylt-fft/config.h create mode 100644 sw/applications/l_fft/sylt-fft/fft.h create mode 100644 sw/applications/l_fft/sylt-fft/fpmath.h create mode 100644 sw/applications/l_fft/sylt-fft/intrinsics.h create mode 100644 sw/applications/l_fft/sylt-fft/main.c create mode 100644 sw/applications/l_fft/testdata_fft.h create mode 100644 sw/applications/l_fft/utils.h diff --git a/sw/applications/l_fft/fxp32.c b/sw/applications/l_fft/fxp32.c new file mode 100644 index 000000000..2755f99a9 --- /dev/null +++ b/sw/applications/l_fft/fxp32.c @@ -0,0 +1,56 @@ +#include "fxp32.h" + +fxp32 fxp32_fromFloat(float f) { + return (fxp32)(f * (1 << FRACTIONAL_BITS)); +} + +float fxp32_toFloat(fxp32 x) { + return (float)x / (1 << FRACTIONAL_BITS); +} + +float fxp32_fxpMulToFloat(fxpMul x) { + return (float)x / ((fxpMul)1 << FRACTIONAL_BITS); +} + +fxp32 fxp32_fromInt(int i) { + return i << FRACTIONAL_BITS; +} + +fxp32 fxp32_mul(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS); +} + +fxpMul fxp32_mul64(fxp32 a, fxp32 b) { + return ((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS; +} + +fxpMul fxp32_pow2(fxp32 a) { + return ((fxpMul)a * (fxpMul)a) >> FRACTIONAL_BITS; +} + +fxp32 fxp32_div(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a << FRACTIONAL_BITS) / b); +} + +bool fxp32_closefxp(fxp32 a, fxp32 b, fxp32 prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= prec; +} + +fxp32 fxp32_sqrt(fxp32 a) { + fxp32 guess = a >> 1; + fxp32 eps = fxp32_fromFloat(0.0001); + while (fxp32_closefxp(guess, fxp32_div(a, guess), eps) == false) { + guess = (guess + fxp32_div(a, guess)) >> 1; + } + return guess; +} + +bool fxp32_close(fxp32 a, fxp32 b, float prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= fxp32_fromFloat(prec); +} diff --git a/sw/applications/l_fft/fxp32.h b/sw/applications/l_fft/fxp32.h new file mode 100644 index 000000000..878d82855 --- /dev/null +++ b/sw/applications/l_fft/fxp32.h @@ -0,0 +1,26 @@ +#ifndef FXP_H +#define FXP_H + +#include +#include + +#define FRACTIONAL_BITS 23 +typedef int32_t fxp32; +typedef int64_t fxpMul; + +fxp32 fxp32_fromFloat(float f); +float fxp32_toFloat(fxp32 x); +float fxp32_fxpMulToFloat(fxpMul x); +fxp32 fxp32_fromInt(int i); + +fxp32 fxp32_mul(fxp32 a, fxp32 b); +fxpMul fxp32_mul64(fxp32 a, fxp32 b); +fxpMul fxp32_pow2(fxp32 a); + +fxp32 fxp32_div(fxp32 a, fxp32 b); + +fxp32 fxp32_sqrt(fxp32 a); + +bool fxp32_close(fxp32 a, fxp32 b, float prec); + +#endif // FXP_H \ No newline at end of file diff --git a/sw/applications/l_fft/main.c b/sw/applications/l_fft/main.c new file mode 100644 index 000000000..cada5b16e --- /dev/null +++ b/sw/applications/l_fft/main.c @@ -0,0 +1,88 @@ +#include "utils.h" +#include "fxp32.h" +#include "sylt-fft/fft.h" +#include +#include +#include +#include + +#include "testdata_fft.h" + +#define COMP_PREC 0.000001f + + +fft_complex_t arr[LEN]; +int len = LEN; + +void fill_arr_real(fft_complex_t* arr, int n, float* data_r) { + for (int i = 0; i < n; i++) { + arr[i].r = fxp32_fromFloat(data_r[i]); + arr[i].i = 0; + } +} + +void print_arr(fft_complex_t* arr, int n) { + for (int i = 0; i < n; i++) { + PRINTF("i = %d, r = %f, i = %f\n", i, fxp32_toFloat(arr[i].r), fxp32_toFloat(arr[i].i)); + } +} + +void analyze_arr(fft_complex_t* arr, int n) { + int nonzeroCount = 0; + PRINTF("Nonzero indexes: "); + for (int i = 0; i < n; i++) { + if (fxp32_toFloat(arr[i].r) > COMP_PREC || fxp32_toFloat(arr[i].r) < -COMP_PREC || + fxp32_toFloat(arr[i].i) > COMP_PREC || fxp32_toFloat(arr[i].i) < -COMP_PREC) { + nonzeroCount++; + PRINTF("\n%d (r = %d, i = %d fxp32 values)", i, arr[i].r, arr[i].i); + } + } + PRINTF("\n"); + PRINTF("Nonzero count: %d\n", nonzeroCount); +} + +void test_fft_a1() { + fill_arr_real(arr, len, a1); + fft_fft(arr, bits); + analyze_arr(arr, len); +} + +void test_fft_a2() { + fill_arr_real(arr, len, a2); + fft_fft(arr, bits); + analyze_arr(arr, len); +} + +void test_fft_a3() { + fill_arr_real(arr, len, a3); + fft_fft(arr, bits); + analyze_arr(arr, len); +} + +// TODO: to have exactly the python result, we must multiply the result by len +void test_fft_a4() { + fill_arr_real(arr, len, a4); + fft_fft(arr, bits); + for (int i = 0; i < len; i++) { + assert_closef_si(fxp32_toFloat(arr[i].r), A4_real[i], COMP_PREC, i); + assert_closef_si(fxp32_toFloat(arr[i].i), A4_imag[i], COMP_PREC, i); + } +} + +int main() { + PRINTF("====== Test FFT ==========\n"); + PRINTF("====== Test A1 ===========\n"); + test_fft_a1(); + PRINTF("====== Test passed =======\n"); + PRINTF("====== Test A2 ===========\n"); + test_fft_a2(); + PRINTF("====== Test passed =======\n"); + PRINTF("====== Test A3 ===========\n"); + test_fft_a3(); + PRINTF("====== Test passed =======\n"); + PRINTF("====== Test A3 ===========\n"); + test_fft_a4(); + PRINTF("====== Test passed =======\n"); + PRINTF("====== Test FFT end ======\n\n"); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/sw/applications/l_fft/sylt-fft/LICENSE b/sw/applications/l_fft/sylt-fft/LICENSE new file mode 100644 index 000000000..f1c15f58f --- /dev/null +++ b/sw/applications/l_fft/sylt-fft/LICENSE @@ -0,0 +1,48 @@ +MHG (MORAL HIGH GROUND) LICENSE + +This software is released under the UNLICENSE license under the single +condition that good moral standards are maintained in its use. + +The term "moral standards" is chosen because it is impossible to define +legally as it varies with cultural and individual values, principles and +ideas. Hence, the following may or may not apply. + +If you find this software useful - use it in any way you like, but +consider dropping the author a "thank you" message. + +If you want to use this software to earn money - please do, but +consider mentioning the authors name or making a donation. + +If you use this software - consider letting the author know. Simply +knowing it is being put to good use is often rewarding. + +You should consider maintaining open source licensing for software +that uses or is derived from this software. + + +UNLICENSE + +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to \ No newline at end of file diff --git a/sw/applications/l_fft/sylt-fft/README.md b/sw/applications/l_fft/sylt-fft/README.md new file mode 100644 index 000000000..530a2f314 --- /dev/null +++ b/sw/applications/l_fft/sylt-fft/README.md @@ -0,0 +1,59 @@ +SYLT-FFT +======== +DEVSOUND (I)FFT(R) LIBRARY +------------------------------------- +And some other funky fixed-point maths like gray-coding and pow(2, f) + +**Optimized (C-level) for Keil C Compiler and GCC on Cortex-M4.** + +**Authors:** +* D. Taylor 2014 (gmail: senseitg) + +**License:** +* MHG (GPL compatible) - see LICENSE + +**Features:** +* FFT (Fast Fourier Transform) and IFFT (Inverse FFT) +* Fixed-point 32-bit, Radix-2 +* Complex or real (with slight conversion overhead) data +* No plan construction required before (I)FFT +* No reliance on other libraries (including libm if using precomputed tables) + +**Options (config.h):** +* DIT (decimation-in-time) or DIF (decimation-in-frequency) +* Rounding on divide (-speed, +accuracy) +* Saturating math (-speed, +stability) +* Table size vs. max. FFT length + +**Resource requirements:** +* Minimal memory requirements (in-place) +* Minimal stack use (non-recursive) +* Minimal twiddle tables (512 bytes for max N=512 FFT) + +**Notes:** +* Designed for optimal performance, not optimal accuracy + +**Caveats:** +* Care must be taken with input data to ensure no overflows +* Requires C99 (-std=c99 for GCC) + +**Performance:** +* Comparing against: CMSIS DSP arm_cortexM4I_math.lib(1.4.2) +* Platform: Freescale Kinetis K20 (Cortex-M4/ARMv7E-M) +* KEIL = Keil C Compiler 5.01 -O3 +* GCC = GNU Tools for ARM Embedded Processors 4.8.4 -O3 + +``` +Comparisons are of speed, +N% = faster than CMSIS, -N% = slower. +Please verify and do additional tests to add to the list. + + CMSIS-DSP SYLT-FFT N KEIL GCC +* arm_cfft_radix2_q31 fft_inverse 256 +25.6% +15.1% +``` + +**Thanks to:** +* [Wikipedia](http://www.wikipedia.org/) - for existing and taking donations +* [KATJA](http://www.katjaas.nl/) - for intelligible merge/split spectra algorithm +* [XCORE](https://github.com/xcore/) - for intelligible (I)FFT algorithm +* [CMLAB](http://www.cmlab.csie.ntu.edu.tw/cml/dsp/training/coding/transform/fft.html) - for intelligible FFT algorithm breakdown +* [BEVAN](http://web.ece.ucdavis.edu/~bbaas/281/slides/Handout.fft2.pdf) - for intelligible DIT vs. DIF, radix-2 vs. 4 overview diff --git a/sw/applications/l_fft/sylt-fft/config.h b/sw/applications/l_fft/sylt-fft/config.h new file mode 100644 index 000000000..5705d19dc --- /dev/null +++ b/sw/applications/l_fft/sylt-fft/config.h @@ -0,0 +1,88 @@ +// CONFIGURATION FILE +// D. TAYLOR 2014 + +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#include "stdint.h" +#include "stdbool.h" + +#ifndef __INLINE +#if defined(__GNUC__) +#define __INLINE __attribute__((always_inline)) inline +#else +#define __INLINE __inline +#endif +#endif + + +/* == MATH CONFIGURE =============================================== */ + +#define FPOW2_FBITS 23 // Number of fractional bits (1...28) NOTE: changed +#define FPOW2_LIMIT 8 // Limit accuracy to n fractional bits (1...FPOW2_FBITS-1) + +#define SINE_BITS 7 // Sine quality (2..14) vs. memory tradeoff +#define SINE_USE_TABLE 1 // Use pre-computed ROM table (vs. generate in RAM) +#define SINE_PRINTOUT 0 // Write sine table to screen (PC only) + +/* == FFT CONFIGURE =============================================== */ + +// Maximum FFT size: 4 << SINE_BITS (complex data points) +// Memory used by sine table: 4 << SINE_BITS (bytes) +// FFT is faster when SINE_USE_TABLE is 0 (located in RAM) + +#define FFT_DIT // Operation mode, FFT_DIT or FFT_DIF (slower) +#define FFT_ROUNDING 0 // Perform rounding when dividing (slower) +#define FFT_SATURATE 0 // Use saturating math where possible (slower) + +/* == WAVETABLE CONFIGURE ========================================== */ + +/* == GLOBAL DATA CONFIGURE ======================================== */ + +// PI +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +// LUT for sine wave, first quadrant only +#if SINE_USE_TABLE +// == PLACE GENERATED SINE TABLE HERE ======================== // +// ROM +#if SINE_BITS != 7 +#error "sinetable[] size does not match SINE_BITS" +#endif +const int32_t sinetable[] = { + 0x00000000, 0x01921d1f, 0x03242abe, 0x04b6195d, 0x0647d97c, 0x07d95b9e, 0x096a9049, 0x0afb6805, + 0x0c8bd35e, 0x0e1bc2e3, 0x0fab272b, 0x1139f0ce, 0x12c8106e, 0x145576b1, 0x15e21444, 0x176dd9de, + 0x18f8b83c, 0x1a82a025, 0x1c0b826a, 0x1d934fe5, 0x1f19f97b, 0x209f701c, 0x2223a4c5, 0x23a6887e, + 0x25280c5d, 0x26a82185, 0x2826b928, 0x29a3c484, 0x2b1f34eb, 0x2c98fbba, 0x2e110a61, 0x2f875262, + 0x30fbc54d, 0x326e54c7, 0x33def287, 0x354d9056, 0x36ba2013, 0x382493b0, 0x398cdd32, 0x3af2eeb7, + 0x3c56ba70, 0x3db832a5, 0x3f1749b7, 0x4073f21d, 0x41ce1e64, 0x4325c135, 0x447acd50, 0x45cd358f, + 0x471cece6, 0x4869e664, 0x49b41533, 0x4afb6c97, 0x4c3fdff3, 0x4d8162c4, 0x4ebfe8a4, 0x4ffb654d, + 0x5133cc94, 0x5269126e, 0x539b2aef, 0x54ca0a4a, 0x55f5a4d2, 0x571deef9, 0x5842dd54, 0x59646497, + 0x5a827999, 0x5b9d1153, 0x5cb420df, 0x5dc79d7c, 0x5ed77c89, 0x5fe3b38d, 0x60ec382f, 0x61f1003e, + 0x62f201ac, 0x63ef328f, 0x64e88926, 0x65ddfbd3, 0x66cf811f, 0x67bd0fbc, 0x68a69e81, 0x698c246c, + 0x6a6d98a4, 0x6b4af278, 0x6c242960, 0x6cf934fb, 0x6dca0d14, 0x6e96a99c, 0x6f5f02b1, 0x70231099, + 0x70e2cbc6, 0x719e2cd2, 0x72552c84, 0x7307c3cf, 0x73b5ebd0, 0x745f9dd0, 0x7504d345, 0x75a585cf, + 0x7641af3c, 0x76d94988, 0x776c4edb, 0x77fab988, 0x78848413, 0x7909a92c, 0x798a23b1, 0x7a05eead, + 0x7a7d055b, 0x7aef6323, 0x7b5d039d, 0x7bc5e28f, 0x7c29fbee, 0x7c894bdd, 0x7ce3ceb1, 0x7d3980ec, + 0x7d8a5f3f, 0x7dd6668e, 0x7e1d93e9, 0x7e5fe493, 0x7e9d55fc, 0x7ed5e5c6, 0x7f0991c3, 0x7f3857f5, + 0x7f62368f, 0x7f872bf2, 0x7fa736b4, 0x7fc25596, 0x7fd8878d, 0x7fe9cbbf, 0x7ff62182, 0x7ffd885a, + 0x7fffffff, // <= space potato! +}; // <= sad monkey? +// == END OF GENERATED SINE TABLE ============================ // +#else +// RAM +int32_t sinetable[(1 << SINE_BITS) + 1]; +#endif + +// LUT for pow(2, fixedpoint) +// Only need to define up to FPOW2_LIMIT +const int32_t fpow2table[] = { + 0x6a09e668, 0x306fe0a3, 0x172b83c8, 0x0b5586d0, 0x059b0d31, 0x02c9a3e7, 0x0163daa0, 0x00b1afa6, +/*0x0058c86e, 0x002c605e, 0x00162f39, 0x000b175f, 0x00058ba0, 0x0002c5cc, 0x000162e5, 0x0000b172, + 0x000058b9, 0x00002c5d, 0x0000162e, 0x00000b17, 0x0000058c, 0x000002c6, 0x00000163, 0x000000b1, + 0x00000059, 0x0000002c, 0x00000016, 0x0000000b, 0x00000006, 0x00000003, 0x00000001, 0x00000001,*/ +}; + +#endif diff --git a/sw/applications/l_fft/sylt-fft/fft.h b/sw/applications/l_fft/sylt-fft/fft.h new file mode 100644 index 000000000..d6fb3ba76 --- /dev/null +++ b/sw/applications/l_fft/sylt-fft/fft.h @@ -0,0 +1,423 @@ +// (I)FFT(R) +// D. TAYLOR 2014 + +#ifndef __FFT_H__ +#define __FFT_H__ + +#include "config.h" +#include "intrinsics.h" +#include "fpmath.h" + +/* == DECLARATIONS ================================================ */ + +// Fixed-point data type +typedef int32_t fft_t; + +// Complex number type +typedef struct { + fft_t r, i; +} fft_complex_t; + +// Readability macros +#define FFT_QCOS(K, SH) sinetable[(1 << SINE_BITS) - (K << SH)] +#define FFT_QSIN(K, SH) sinetable[K << SH] + +#if !((defined FFT_DIT) | (defined FFT_DIF)) +#error "Must define FFT_DIT or FFT_DIF" +#endif + + +/* == CODING STYLE DEFINITIONS ==================================== */ + +// GCC/ARMCC require different coding styles for optimal performance. +// These defines unify the different styles into one syntax. + +// # Optimal performance on ARMCC (Keil) # +#if defined(__ARMCC_VERSION) +// Declare complex, assign complex +#define FFT_DECLC(VAR, ASG) fft_complex_t VAR = ASG; +// Declare complex, assign real, imaginary +#define FFT_DECLR(VAR, R, I) fft_complex_t VAR = (fft_complex_t){ .r = R, .i = I }; +// Assign real, imaginary +#define FFT_ASSGN(VAR, R, I) VAR = (fft_complex_t){ .r = R, .i = I }; +// Access real, imaginary +#define FFT(VAR, SUB) VAR.SUB +#endif + +// # Optimal performance on GCC # +#if defined(__GNUC__) +// Declare complex, assign complex +#define FFT_DECLC(VAR, ASG) fft_t VAR##r = ASG.r, VAR##i = ASG.i; +// Declare complex, assign real, imaginary +#define FFT_DECLR(VAR, R, I) fft_t VAR##r = R, VAR##i = I; +// Assign real, imaginary +#define FFT_ASSGN(VAR, R, I) VAR.r = R; VAR.i = I; +// Access real, imaginary +#define FFT(VAR, SUB) VAR##SUB +#endif + +#if FFT_SATURATE +#define FFT_A(A,B) qadd(A, B) // A + B (saturating) +#define FFT_S(A,B) qsub(A, B) // A - B (saturating) +#define FFT_M2(W) qadd(W, W) // W * 2 (saturating) +#else +#define FFT_A(A,B) ((A) + (B)) // A + B +#define FFT_S(A,B) ((A) - (B)) // A - B +#define FFT_M2(W) ((W) << 1) // W * 2 +#endif +#define FFT_M(A,B) smmulr(A, B) // A * B +#define FFT_MA(A,B,C) smmlar(A, B, C) // C + (A * B) +#define FFT_MS(A,B,C) smmlsr(A, B, C) // C - (A * B) +#if FFT_ROUNDING +#define FFT_D2(W) (((W) + 1) >> 1) // W / 2 (rounded) +#else +#define FFT_D2(W) ((W) >> 1) // W / 2 +#endif + +/* == FORWARD AND INVERSE FFT ===================================== */ + +// Forward FFT transform +// Permutation must be performed prior to (DIT)/after (DIF) call +void fft_forward(fft_complex_t data[], unsigned bits) { + unsigned size = 1 << bits; +#ifdef FFT_DIT + unsigned shift = SINE_BITS + 1; + for(unsigned stride = 2 ; stride <= size; stride <<= 1, shift--) { +#else//FFT_DIF + unsigned shift = SINE_BITS - (bits - 2); + for(unsigned stride = size; stride >= 2; stride >>= 1, shift++) { +#endif + // Twiddle and combine for k = 0, having trivial (0 and 1) twiddle factors + for(unsigned a = 0; a < size; a += stride) { + unsigned b = a + (stride >> 1); +/* + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); + // # Radix-2 DIT/DIF trivial butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_ADD(FFT(A,r), FFT(B,r))), FFT_D2(FFT_ADD(FFT(A,i), FFT(B,i)))); + FFT_ASSGN(data[b], FFT_D2(FFT_SUB(FFT(A,r), FFT(B,r))), FFT_D2(FFT_SUB(FFT(A,i), FFT(B,i)))); +*/ + // Special case: GCC optimizes ARMCC style better here + fft_complex_t A = data[a], B = data[b]; + // # Radix-2 DIT/DIF trivial butterfly # + data[a] = (fft_complex_t){ .r = FFT_D2(FFT_A(A.r, B.r)), .i = FFT_D2(FFT_A(A.i, B.i)) }; + data[b] = (fft_complex_t){ .r = FFT_D2(FFT_S(A.r, B.r)), .i = FFT_D2(FFT_S(A.i, B.i)) }; + } + if(!(stride & 2)) { + for(unsigned a = (stride >> 2); a < (stride >> 2) + size; a += stride) { + unsigned b = a + (stride >> 1); + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT trivial butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_A(FFT(A,r), FFT(B,i))), FFT_D2(FFT_S(FFT(A,i), FFT(B,r)))); + FFT_ASSGN(data[b], FFT_D2(FFT_S(FFT(A,r), FFT(B,i))), FFT_D2(FFT_A(FFT(A,i), FFT(B,r)))); +#else//FFT_DIF + // # Radix-2 DIF trivial butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_A(FFT(A,r), FFT(B,r))), FFT_D2(FFT_A(FFT(A,i), FFT(B,i)))); + FFT_ASSGN(data[b], FFT_D2(FFT_S(FFT(A,i), FFT(B,i))), FFT_D2(FFT_S(FFT(B,r), FFT(A,r)))); +#endif + } + } + // Twiddle and combine + for(unsigned k = 1; k < (stride >> 2); k++) { + FFT_DECLR(W, FFT_QCOS(k, shift), FFT_QSIN(k, shift)); + for(unsigned a = k, b; a < size; a += (stride >> 2) + (stride >> 1)) { + b = a + (stride >> 1); + { // These two blocks prevent the compiler from confusing... + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT butterfly # + FFT_DECLR(BW, FFT_MA(FFT(B,i), FFT(W,i), FFT_M(FFT(B,r), FFT(W,r))), + FFT_MS(FFT(B,r), FFT(W,i), FFT_M(FFT(B,i), FFT(W,r)))); + FFT_ASSGN(data[a], FFT_A(FFT_D2(FFT(A,r)), FFT(BW,r)), FFT_A(FFT_D2(FFT(A,i)), FFT(BW,i))); + FFT_ASSGN(data[b], FFT_S(FFT_D2(FFT(A,r)), FFT(BW,r)), FFT_S(FFT_D2(FFT(A,i)), FFT(BW,i))); +#else//FFT_DIF + // # Radix-2 DIF butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_A(FFT(A,r), FFT(B,r))), FFT_D2(FFT_A(FFT(A,i), FFT(B,i)))); + FFT_DECLR(D, FFT_S(FFT(A,r), FFT(B,r)), FFT_S(FFT(A,i), FFT(B,i))); + FFT_ASSGN(data[b], FFT_MA(FFT(D,r), FFT(W,r), FFT_M(FFT(D,i), FFT(W,i))), + FFT_MS(FFT(D,r), FFT(W,i), FFT_M(FFT(D,i), FFT(W,r)))); +#endif + } + a += (stride >> 2); b += (stride >> 2); + { // ...register use resulting in more efficient code + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT butterfly # + FFT_DECLR(BW, FFT_MS(FFT(B,r), FFT(W,i), FFT_M(FFT(B,i), FFT(W,r))), + FFT_MA(FFT(B,i), FFT(W,i), FFT_M(FFT(B,r), FFT(W,r)))); + FFT_ASSGN(data[a], FFT_A(FFT_D2(FFT(A,r)), FFT(BW,r)), FFT_S(FFT_D2(FFT(A,i)), FFT(BW,i))); + FFT_ASSGN(data[b], FFT_S(FFT_D2(FFT(A,r)), FFT(BW,r)), FFT_A(FFT_D2(FFT(A,i)), FFT(BW,i))); +#else//FFT_DIF + // # Radix-2 DIF butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_A(FFT(A,r), FFT(B,r))), FFT_D2(FFT_A(FFT(A,i), FFT(B,i)))); + FFT_DECLR(D, FFT_S(FFT(B,r), FFT(A,r)), FFT_S(FFT(B,i), FFT(A,i))); + FFT_ASSGN(data[b], FFT_MS(FFT(D,i), FFT(W,r), FFT_M(FFT(D,r), FFT(W,i))), + FFT_MA(FFT(D,i), FFT(W,i), FFT_M(FFT(D,r), FFT(W,r)))); +#endif + } + } + } + } +} + +// Inverse FFT transform +// Permutation must be performed prior to (DIT)/after (DIF) call +void fft_inverse(fft_complex_t data[], unsigned bits) { + unsigned size = 1 << bits; +#ifdef FFT_DIT + unsigned shift = SINE_BITS + 1; + for(unsigned stride = 2 ; stride <= size; stride <<= 1, shift--) { +#else//FFT_DIF + unsigned shift = SINE_BITS - (bits - 2); + for(unsigned stride = size; stride >= 2; stride >>= 1, shift++) { +#endif + // Twiddle and combine for k = 0, having trivial (0 and 1) twiddle factors + for(unsigned a = 0; a < size; a += stride) { + unsigned b = a + (stride >> 1); +/* + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); + // # Radix-2 DIT/DIF trivial butterfly # + FFT_ASSGN(data[a], FFT_A(FFT(A,r), FFT(B,r)), FFT_A(FFT(A,i), FFT(B,i))); + FFT_ASSGN(data[b], FFT_S(FFT(A,r), FFT(B,r)), FFT_S(FFT(A,i), FFT(B,i))); +*/ + // Special case: GCC optimizes ARMCC style better here + fft_complex_t A = data[a], B = data[b]; + // # Radix-2 DIT/DIF trivial butterfly # + data[a] = (fft_complex_t){ .r = FFT_A(A.r, B.r), .i = FFT_A(A.i, B.i) }; + data[b] = (fft_complex_t){ .r = FFT_S(A.r, B.r), .i = FFT_S(A.i, B.i) }; + } + if(!(stride & 2)) { + for(unsigned a = (stride >> 2); a < size; a += stride) { + unsigned b = a + (stride >> 1); + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT trivial butterfly # + FFT_ASSGN(data[a], FFT_S(FFT(A,r), FFT(B,i)), FFT_A(FFT(A,i), FFT(B,r))); + FFT_ASSGN(data[b], FFT_A(FFT(A,r), FFT(B,i)), FFT_S(FFT(A,i), FFT(B,r))); +#else//FFT_DIF + // # Radix-2 DIF trivial butterfly # + FFT_ASSGN(data[a], FFT_A(FFT(A,r), FFT(B,r)), FFT_A(FFT(A,i), FFT(B,i))); + FFT_ASSGN(data[b], FFT_S(FFT(B,i), FFT(A,i)), FFT_S(FFT(A,r), FFT(B,r))); +#endif + } + } + // Twiddle and combine + for(unsigned k = 1; k < (stride >> 2); k++) { + FFT_DECLR(W, FFT_QCOS(k, shift), FFT_QSIN(k, shift)); + for(unsigned a = k, b; a < size; a += (stride >> 2) + (stride >> 1)) { + b = a + (stride >> 1); + { // These two blocks prevent the compiler from confusing... + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT butterfly # + FFT_DECLR(BW, FFT_MS(FFT(B,r), FFT(W,r), FFT_M(FFT(B,i), FFT(W,i))), + FFT_MA(FFT(B,i), FFT(W,r), FFT_M(FFT(B,r), FFT(W,i)))); + FFT_ASSGN(data[a], FFT_S(FFT(A,r), FFT_M2(FFT(BW,r))), FFT_A(FFT(A,i), FFT_M2(FFT(BW,i)))); + FFT_ASSGN(data[b], FFT_A(FFT(A,r), FFT_M2(FFT(BW,r))), FFT_S(FFT(A,i), FFT_M2(FFT(BW,i)))); +#else//FFT_DIF + // # Radix-2 DIF butterfly # + FFT_ASSGN(data[a], FFT_A(FFT(A,r), FFT(B,r)), FFT_A(FFT(A,i), FFT(B,i))); + FFT_DECLR(D, FFT_S(FFT(A,r), FFT(B,r)), FFT_S(FFT(A,i), FFT(B,i))); + FFT_ASSGN(data[b], FFT_M2(FFT_MS(FFT(D,i), FFT(W,i), FFT_M(FFT(D,r), FFT(W,r)))), + FFT_M2(FFT_MA(FFT(D,i), FFT(W,r), FFT_M(FFT(D,r), FFT(W,i))))); +#endif + } + a += (stride >> 2); b += (stride >> 2); + { // ...register use resulting in more efficient code + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT butterfly # + FFT_DECLR(BW, FFT_MA(FFT(B,i), FFT(W,r), FFT_M(FFT(B,r), FFT(W,i))), + FFT_MS(FFT(B,r), FFT(W,r), FFT_M(FFT(B,i), FFT(W,i)))); + FFT_ASSGN(data[a], FFT_S(FFT(A,r), FFT_M2(FFT(BW,r))), FFT_S(FFT(A,i), FFT_M2(FFT(BW,i)))); + FFT_ASSGN(data[b], FFT_A(FFT(A,r), FFT_M2(FFT(BW,r))), FFT_A(FFT(A,i), FFT_M2(FFT(BW,i)))); +#else//FFT_DIF + // # Radix-2 DIF butterfly # + FFT_ASSGN(data[a], FFT_A(FFT(A,r), FFT(B,r)), FFT_A(FFT(A,i), FFT(B,i))); + FFT_DECLR(D, FFT_S(FFT(A,r), FFT(B,r)), FFT_S(FFT(B,i), FFT(A,i))); + FFT_ASSGN(data[b], FFT_M2(FFT_MS(FFT(D,r), FFT(W,i), FFT_M(FFT(D,i), FFT(W,r)))), + FFT_M2(FFT_MA(FFT(D,i), FFT(W,i), FFT_M(FFT(D,r), FFT(W,r))))); +#endif + } + } + } + } +} + + +/* == DATA SET PROCESSING AND MANIPULATION ======================== */ + +// Process complex data to produce real-only output +// This allows us to output N*2 point of real data using a N point complex (I)FFT +// Even/odd real data will be found in the real/imaginary parts of every output bin upon completion +void fft_convert(fft_complex_t data[], unsigned bits, bool permutated, bool invert) { + unsigned size = 1 << --bits; + unsigned shift = SINE_BITS - bits++; + unsigned n, z, nc, zc; + fft_t rsum, rdif, isum, idif; + fft_t itwiddled, rtwiddled; + for(nc = zc = size; nc; nc--, zc++) { + if(permutated) { + n = RBITS(nc, bits); z = RBITS(zc, bits); + } else { + n = nc; z = zc; + } + rsum = data[n].r + data[z].r; isum = data[n].i + data[z].i; + rdif = data[n].r - data[z].r; idif = data[n].i - data[z].i; + fft_t r = FFT_QCOS(nc, shift); fft_t i = -FFT_QSIN(nc, shift); + if(invert) r = -r; + rtwiddled = FFT_MA(r, isum, FFT_M(i, rdif)) << 1; + itwiddled = FFT_MS(r, rdif, FFT_M(i, isum)) << 1; + data[n].r = rsum + rtwiddled; data[n].i = itwiddled + idif; + data[z].r = rsum - rtwiddled; data[z].i = itwiddled - idif; + } + fft_t data_0_tr = data[0].r; + data[0].r = (data[0].r + data[0].i); data[0].i = (data_0_tr - data[0].i); + if(!invert) { data[0].r <<= 1; data[0].i <<= 1; } +} + +// Perform bit-reversal permutation on data set +// (Reverses address bits for all data points) +void fft_permutate(fft_complex_t data[], unsigned bits) { + unsigned size = 1 << bits; + unsigned shift = 32 - bits; + for(unsigned i = 1; i < size - 1; i++) { + unsigned z = rbit(i) >> shift; + if(z > i) { + fft_t + t = data[i].r; data[i].r = data[z].r; data[z].r = t; + t = data[i].i; data[i].i = data[z].i; data[z].i = t; + } + } +} + + +/* == "HIGH"-LEVEL FUNCTIONS ====================================== */ + +// Perform forward FFT (including permutation) +__INLINE +void fft_fft(fft_complex_t *complex, unsigned bits) { +#ifdef FFT_DIT + fft_permutate(complex, bits); +#endif + fft_forward(complex, bits); +#ifdef FFT_DIF + fft_permutate(complex, bits); +#endif +} + +// Perform inverse FFT (including permutation) +__INLINE +void fft_ifft(fft_complex_t *complex, unsigned bits) { +#ifdef FFT_DIT + fft_permutate(complex, bits); +#endif + fft_inverse(complex, bits); +#ifdef FFT_DIF + fft_permutate(complex, bits); +#endif +} + +// Perform forward FFT (including permutation, real output conversion) +__INLINE +void fft_fftr(fft_complex_t *complex, unsigned bits) { + fft_fft(complex, bits); + fft_convert(complex, bits, false, false); +} + +// Perform inverse FFT (including permutation, real input conversion) +__INLINE +void fft_ifftr(fft_complex_t *complex, unsigned bits) { + fft_convert(complex, bits, false, true); + fft_ifft(complex, bits); +} + + +/* == DATA SET CONSTRUCTION ======================================= */ + +// Magnitude and phase => complex FFT bin [index] +// A data set built with this method does not require fft_permutate before DIT IFFT +__INLINE +void fft_phase_magnitude(fft_complex_t complex[], unsigned bits, unsigned index, int32_t mag, uint32_t pha) { +#ifdef FFT_DIT + unsigned n = RBITS(index, bits); +#else//FFT_DIF + unsigned n = index; +#endif + complex[n].r = FFT_M(mag, sine(pha)); + complex[n].i = FFT_M(mag, cosine(pha)); +} + +// Magnitude, phase:0 => complex FFT bin [index] +// A data set built with this method does not require fft_permutate before DIT IFFT +__INLINE +void fft_magnitude(fft_complex_t complex[], unsigned bits, unsigned index, int32_t mag) { +#ifdef FFT_DIT + unsigned n = RBITS(index, bits); +#else//FFT_DIF + unsigned n = index; +#endif + complex[n].r = 0; complex[n].i = mag; +} + +// REAL Symmetric DC offset => complex FFT bin [0] (DC) +// A data set built with this method does not require fft_permutate before DIT IFFT +// A data set built with this method does not require fft_convert before IFFT +__INLINE +void fft_real_dc(fft_complex_t data[], fft_t r, fft_t i) { + data[0].r = r + i; + data[0].i = r - i; +} + +// REAL Symmetric magnitude and phase => complex FFT bins [index], [size-index] +// A data set built with this method does not require fft_permutate before DIT IFFT +// A data set built with this method does not require fft_convert before IFFT +void fft_real_phase_magnitude(fft_complex_t complex[], unsigned bits, unsigned index, int32_t mag_lo, int32_t pha_lo, int32_t mag_hi, int32_t pha_hi) { + unsigned size = 1 << bits; + unsigned shift = SINE_BITS - (bits - 1); +#ifdef FFT_DIT + unsigned n = RBITS(index, bits); + unsigned z = RBITS(size - index, bits); +#else//FFT_DIF + unsigned n = index; + unsigned z = size - index; +#endif + fft_t rsum, rdif, isum, idif, r, i; + fft_t itwiddled, rtwiddled; + r = FFT_M(mag_lo, sine(pha_lo)); + i = FFT_M(mag_hi, sine(pha_hi)); + rsum = r + i; rdif = r - i; + r = FFT_M(mag_lo, cosine(pha_lo)); + i = FFT_M(mag_hi, cosine(pha_hi)); + isum = r + i; idif = r - i; + r = -FFT_QCOS(index, shift); i = -FFT_QSIN(index, shift); + rtwiddled = FFT_MA(r, isum, FFT_M(i, rdif)) << 1; + itwiddled = FFT_MS(r, rdif, FFT_M(i, isum)) << 1; + complex[n].r = rsum + rtwiddled; complex[n].i = itwiddled + idif; + complex[z].r = rsum - rtwiddled; complex[z].i = itwiddled - idif; +} + +// REAL Symmetric magnitude, phase:0 => complex FFT bins [index], [size-index] +// This method works with permutated (bit-reversed) addressing +// A data set built with this method does not require fft_permutate before DIT IFFT +// A data set built with this method does not require fft_convert before IFFT +void fft_real_magnitude(fft_complex_t complex[], unsigned bits, unsigned index, int32_t mag_lo, int32_t mag_hi) { + unsigned shift = SINE_BITS - bits + 1; +#ifdef FFT_DIT + unsigned n = RBITS(index, bits); + unsigned z = RBITS((1 << bits) - index, bits); +#else//FFT_DIF + unsigned n = index; + unsigned z = (1 << bits) - index; +#endif + fft_t isum, idif, r, i; + fft_t itwiddled, rtwiddled; + isum = (mag_lo + mag_hi); idif = mag_lo - mag_hi; + r = FFT_QCOS(index, shift); i = FFT_QSIN(index, shift); + rtwiddled = FFT_M(r, isum) << 1; + itwiddled = FFT_M(i, isum) << 1; + complex[n].r = -rtwiddled; complex[n].i = (idif - itwiddled); + complex[z].r = rtwiddled; complex[z].i = -(idif + itwiddled); +} + +#endif diff --git a/sw/applications/l_fft/sylt-fft/fpmath.h b/sw/applications/l_fft/sylt-fft/fpmath.h new file mode 100644 index 000000000..9385d3c07 --- /dev/null +++ b/sw/applications/l_fft/sylt-fft/fpmath.h @@ -0,0 +1,139 @@ +// FIXED POINT MATHS +// D. TAYLOR 2014 + +#ifndef __FPMATH_H__ +#define __FPMATH_H__ + +#include "config.h" +#include "intrinsics.h" +#include "fpmath.h" + +#define SINE_SIZE (1 << SINE_BITS) // Sine table size +#define SINE_FBITS (32 - 2 - SINE_BITS) // Fractional bits +#define SINE_FMASK ((1 << SINE_FBITS) - 1) // Fraction mask + +// Linear/box interpolation (30 bit precision) +// y1 is first point, y2 second +// mu is interpolation point 00000000-FFFFFFFF +// floating-point equivalent return y2 - y1 * mu + y1; +__INLINE +int32_t linear(int32_t y1, int32_t y2, uint32_t mu) { + return smmlar((y2 >> 1) - (y1 >> 1), mu >> 1, y1 >> 2) << 2; +} + +// Cubic interpolation +// y0...y3 need to be externally limited in range to prevent overflow +// y0...y3 are control points, interpolation is performed between y1 and y2 +// mu is interpolation point 00000000-FFFFFFFF +// floating-point equivalent return y1+mu/2*(y2-y0+mu*(2*y0-5*y1+4*y2-y3+mu*(3*(y1-y2)+y3-y0))) +__INLINE +int32_t cubic(int32_t y0, int32_t y1, int32_t y2, int32_t y3, uint32_t mu) { + mu >>= 1; + int32_t a = (3 * (y1 - y2) - y0 + y3); + int32_t b = 2 * y2 + y0 - (5 * y1 + y3) / 2; + int32_t c = (y2 - y0) / 2; + return smmlar(smmlar(smmlar(a, mu, b) << 1, mu, c) << 1, mu, y1); +} + +// Generate first quadrant (0 to PI/2) of sine wave +// Output table is in Q31 format, with 1 limited to 0x7FFFFFFF +void sine_init() { +#if !SINE_USE_TABLE + unsigned int n; +#if SINE_PRINTOUT + printf("// ROM\n"); + printf("#if SINE_BITS != 7\n"); + printf("#error \"sinetable[] size does not match SINE_BITS\"\n"); + printf("#endif\n"); + printf("const int32_t sinetable[] = {"); +#endif + for(n = 0; n <= SINE_SIZE; n++) { + uint64_t v = (sin(((double)n * M_PI) / (double)(SINE_SIZE * 2)) * 2147483648.0); + sinetable[n] = v > 2147483647 ? 2147483647 : v; +#if SINE_PRINTOUT + // Print table + if((n & 7) == 0) printf("\n "); + printf("0x%08x, ", sinetable[n]); +#endif + } +#if SINE_PRINTOUT + printf("// <= space potato!\n}; // <= sad monkey?\n"); +#endif +#endif +} + +// Sin by table lookup with interpolation +// pos = 00000000 to FFFFFFFF, corresponding to 0-2PI(less one) +int32_t sine(uint32_t pos) { + uint32_t fraction = (pos & SINE_FMASK) << (2 + SINE_BITS); + uint32_t index = (pos & 0x40000000) ? (0x40000000 + SINE_FMASK - (pos & 0x3FFFFFFF)) : (pos & 0x3FFFFFFF); + uint32_t indexa = index >> SINE_FBITS; + uint32_t indexb = pos & 0x40000000 ? indexa - 1 : indexa + 1; + int32_t sample = linear(sinetable[indexa], sinetable[indexb], fraction); + return pos & 0x80000000 ? -sample : sample; +} + +// Cos by table lookup with interpolation +// See sine +__INLINE +int32_t cosine(uint32_t pos) { + return sine(pos + 0x40000000); +} + +// Fast sin by table lookup +// Same as sine, but no interpolation +__INLINE +int32_t fastsin(uint32_t pos) { + uint32_t index = (pos & 0x40000000) ? 0x40000000 - (pos & 0x3fffffff) : (pos & 0x3fffffff); + int32_t sample = sinetable[index >> SINE_FBITS]; + return (pos & 0x80000000 ? -sample : sample); +} + +// Fast cos by table lookup +// See fastsin +__INLINE +int32_t fastcos(uint32_t pos) { + return fastsin(pos + 0x40000000); +} + +// Fixed point pow(2, e) +uint64_t fpow2(uint32_t e) { + uint32_t ipart = e >> FPOW2_FBITS; +#ifdef FPOW2_LIMIT + uint32_t fpart = (e >> (FPOW2_FBITS - FPOW2_LIMIT)) << (32 - FPOW2_LIMIT); +#else + uint32_t fpart = e << (32 - FPOW2_FBITS); +#endif + uint64_t final = 0x100000000; + if(fpart) { + uint32_t bit = clz(fpart); + uint32_t fcalc = fpow2table[bit++] >> 1; + fpart <<= bit; + while(fpart) { + uint32_t lzc = clz(fpart); + bit += lzc++; + int32_t fmul = fpow2table[bit++]; + fcalc += smmlar(fcalc, fmul, fmul >> 1); + fpart <<= lzc; + } + final += (uint64_t)fcalc << 1; + } + return final << ipart; +} + +// Convert binary to gray-code +unsigned bin2gray(unsigned bits) { + return (bits >> 1) ^ bits; +} + +// Convert gray-code to binary +unsigned gray2bin(unsigned bits) { + bits ^= bits >> 16; + bits ^= bits >> 8; + bits ^= bits >> 4; + bits ^= bits >> 2; + bits ^= bits >> 1; + return bits; +} + +#endif diff --git a/sw/applications/l_fft/sylt-fft/intrinsics.h b/sw/applications/l_fft/sylt-fft/intrinsics.h new file mode 100644 index 000000000..e2614baeb --- /dev/null +++ b/sw/applications/l_fft/sylt-fft/intrinsics.h @@ -0,0 +1,153 @@ +// INTRINSICS +// D. TAYLOR 2014 + +#ifndef __INTRINSICS_H__ +#define __INTRINSICS_H__ +#include "config.h" + +// issue warnings when not using full hardware acceleration + +#if defined(__ARMCC_VERSION) || (defined(__GNUC__) && defined(__arm__)) +#if (__CORTEX_M < 0x03) +#warning "Cortex-M core < M3 detected; hardware acceleration for math operations not supported" +#elif (__CORTEX_M < 0x04) +#warning "Cortex-M core < M4 detected; partial hardware acceleration for math operations supported" +#endif +#endif + +// reverse bits (ARM: RBIT) +__INLINE +uint32_t rbit(uint32_t x) { + uint32_t result; +#if defined(__ARMCC_VERSION) && ((__CORTEX_M >= 0x03) || (__CORTEX_SC >= 300)) + __asm{ rbit result, x } +#elif defined(__GNUC__) && defined(__arm__) && ((__CORTEX_M >= 0x03) || (__CORTEX_SC >= 300)) + __asm("rbit %0, %1":"=r"(result):"r"(x)); +#else + x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1)); + x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2)); + x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4)); + x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8)); + result = (x >> 16) | (x << 16); +#endif + return result; +} + +#define RBITS(W, BITS) (rbit(W) >> (32 - (BITS))) + +// count leading zeroes (ARM: CLZ) +__INLINE +uint32_t clz(uint32_t x) { + uint32_t result; +#if defined(__ARMCC_VERSION) && ((__CORTEX_M >= 0x03) || (__CORTEX_SC >= 300)) + __asm{ clz result, x } +#elif defined(__GNUC__) && defined(__arm__) && ((__CORTEX_M >= 0x03) || (__CORTEX_SC >= 300)) + __asm("clz %0, %1":"=r"(result):"r"(x)); +#else + x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; + x -= 0x55555555 & (x >> 1); + x = (0x33333333 & x) + (0x33333333 & (x >> 2)); + result = 32 - ((0x01010101 * (0x0F0F0F0F & (x + (x >> 4)))) >> 24); +#endif + return result; +} + +// 32-bit signed multiply -> 32-bit result, add 32-bit (ARM: SMMLAR) +// floating point equivalent: return c + a * b +__INLINE +int32_t smmlar(int32_t a, int32_t b, int32_t c) { + int32_t result; +#if defined(__ARMCC_VERSION) && (__CORTEX_M >= 0x04U) + __asm{ smmlar result, a, b, c } +#elif defined(__GNUC__) && defined(__arm__) && (__CORTEX_M >= 0x04U) + __asm("smmlar %0, %1, %2, %3":"=r"(result):"r"(a),"r"(b),"r"(c)); +#else + result = c + ((((int64_t)a * b) + 0x80000000) >> 32); +#endif + return result; +} + +// 32-bit signed multiply -> 32-bit result, subtract 32-bit (ARM: SMMLSR) +// floating point equivalent: return c - a * b +__INLINE +int32_t smmlsr(int32_t a, int32_t b, int32_t c) { + int32_t result; +#if defined(__ARMCC_VERSION) && (__CORTEX_M >= 0x04U) + __asm{ smmlsr result, a, b, c } +#elif defined(__GNUC__) && defined(__arm__) && (__CORTEX_M >= 0x04U) + __asm("smmlsr %0, %1, %2, %3":"=r"(result):"r"(a),"r"(b),"r"(c)); +#else + result = c - ((((int64_t)a * b) + 0x80000000) >> 32); +#endif + return result; +} + +// 32-bit signed multiply -> 32-bit result (ARM: SMMULR) +// floating point equivalent: return a * b +__INLINE +int32_t smmulr(int32_t a, int32_t b) { + int32_t result; +#if defined(__ARMCC_VERSION) && (__CORTEX_M >= 0x04U) + __asm{ smmulr result, a, b } +#elif defined(__GNUC__) && defined(__arm__) && (__CORTEX_M >= 0x04U) + __asm("smmulr %0, %1, %2":"=r"(result):"r"(a),"r"(b)); +#else + result = ((((int64_t)a * b) + 0x80000000) >> 32); +#endif + return result; +} + +// saturating add (ARM: qadd) +// floating point equivalent: return max(min(a + b, 1), -1) +__INLINE +int32_t qadd(int32_t a, int32_t b) { + uint32_t result; +#if defined(__ARMCC_VERSION) + __asm{ qadd result, a, b } +#elif defined(__GNUC__) + __asm("qadd %0, %1, %2":"=r"(result):"r"(a),"r"(b)); +#else + int64_t c = (int64_t)a + b; + if(c > 2147483647) c = 2147483647; + if(c < -2147483648) c = -2147483648; + result = c; +#endif + return result; +} + +// saturating subtract (ARM: qsub) +// floating point equivalent: return max(min(a - b, 1), -1) +__INLINE +int32_t qsub(int32_t a, int32_t b) { + uint32_t result; +#if defined(__ARMCC_VERSION) + __asm{ qsub result, a, b } +#elif defined(__GNUC__) && defined(__arm__) + __asm("qsub %0, %1, %2":"=r"(result):"r"(a),"r"(b)); +#else + int64_t c = (int64_t)a - b; + if(c > 2147483647) c = 2147483647; + if(c < -2147483648) c = -2147483648; + result = c; +#endif + return result; +} + +// 32-bit arithmetic shift right with rounding (ARM: ASRS + ADC) +// floating point equivalent: return v / pow(2, s) +__INLINE +int32_t asrr(int32_t v, int32_t s) { + int32_t result; +#if defined(__ARMCC_VERSION) + __asm{ asrs result, v, s }; + __asm{ adc result, result }; +#elif defined(__GNUC__) && defined(__arm__) + __asm("asrs %0, %1, %2":"=r"(result):"r"(v),"r"(s):"cc"); + __asm("adc %0, %1, #0":"=r"(result):"r"(result)); +#else + result = (v + (1 << (s - 1))) >> s; +#endif + return result; +} + +#endif diff --git a/sw/applications/l_fft/sylt-fft/main.c b/sw/applications/l_fft/sylt-fft/main.c new file mode 100644 index 000000000..cf59fcadd --- /dev/null +++ b/sw/applications/l_fft/sylt-fft/main.c @@ -0,0 +1,74 @@ +// BENCHMARKING FOR FRDM-K20D50M +// D. TAYLOR 2014 + +// * Do not build with operating system +// * Uses FTM0 as core clock cycle counter +// * Requires C99 standard + +#define BENCH_RUNS 100 // Not really necessary without OS + +#include + +#include +#include + +#include "config.h" +#include "intrinsics.h" +#include "fpmath.h" +#include "fft.h" + +static volatile unsigned count_hi; // FTM0 high counter +static volatile unsigned count; // Performance counter + +// FFT data structure +fft_complex_t complex[256]; + +// This function contains code to benchmark +static void benchmark(void) { + fft_inverse(complex, 8); +} + +// Initialize FTM0 +static void bench_init(void) { + SIM->SCGC6 |= SIM_SCGC6_FTM0_MASK; + FTM0->MOD = 0xFFFF; + NVIC_EnableIRQ(FTM0_IRQn); +} + +// Resets cycle counts, begins benchmarking +__INLINE +static void bench_begin(void) { + FTM0->SC = 0; + count_hi = 0; + FTM0->CNT = 0; + FTM0->SC = FTM_SC_CLKS(1) | FTM_SC_TOIE_MASK; +} + +// Ends benchmarking, returns cycle count +__INLINE +static unsigned bench_end(void) { + FTM0->SC = 0; + return (FTM0->CNT | (count_hi << 16)) - 2; +} + +// FTM0 overflow counter +void FTM0_IRQHandler(void) { + FTM0->SC &= ~FTM_SC_TOF_MASK; + count_hi++; +} + +int main() { + bench_init(); + while(1) { + // Perform benchmark + unsigned ack = 0; + for(unsigned n = 0; n < BENCH_RUNS; n++) { + bench_begin(); + benchmark(); + ack += bench_end(); + } + count = ack / BENCH_RUNS; + // Count is reported here - use a breakpoint or add communication code + count = count; + } +} diff --git a/sw/applications/l_fft/testdata_fft.h b/sw/applications/l_fft/testdata_fft.h new file mode 100644 index 000000000..09f47c82a --- /dev/null +++ b/sw/applications/l_fft/testdata_fft.h @@ -0,0 +1,19 @@ +#ifndef TESTDATA_FFT_H +#define TESTDATA_FFT_H + +#pragma GCC diagnostic ignored "-Wunused-variable" + +#define LEN 128 + +int bits = 7; + +static float a1[] = { -0.0000000000, -0.1467304745, -0.2902846773, -0.4275550934, -0.5555702330, -0.6715589548, -0.7730104534, -0.8577286100, -0.9238795325, -0.9700312532, -0.9951847267, -0.9987954562, -0.9807852804, -0.9415440652, -0.8819212643, -0.8032075315, -0.7071067812, -0.5956993045, -0.4713967368, -0.3368898534, -0.1950903220, -0.0490676743, 0.0980171403, 0.2429801799, 0.3826834324, 0.5141027442, 0.6343932842, 0.7409511254, 0.8314696123, 0.9039892931, 0.9569403357, 0.9891765100, 1.0000000000, 0.9891765100, 0.9569403357, 0.9039892931, 0.8314696123, 0.7409511254, 0.6343932842, 0.5141027442, 0.3826834324, 0.2429801799, 0.0980171403, -0.0490676743, -0.1950903220, -0.3368898534, -0.4713967368, -0.5956993045, -0.7071067812, -0.8032075315, -0.8819212643, -0.9415440652, -0.9807852804, -0.9987954562, -0.9951847267, -0.9700312532, -0.9238795325, -0.8577286100, -0.7730104534, -0.6715589548, -0.5555702330, -0.4275550934, -0.2902846773, -0.1467304745, -0.0000000000, 0.1467304745, 0.2902846773, 0.4275550934, 0.5555702330, 0.6715589548, 0.7730104534, 0.8577286100, 0.9238795325, 0.9700312532, 0.9951847267, 0.9987954562, 0.9807852804, 0.9415440652, 0.8819212643, 0.8032075315, 0.7071067812, 0.5956993045, 0.4713967368, 0.3368898534, 0.1950903220, 0.0490676743, -0.0980171403, -0.2429801799, -0.3826834324, -0.5141027442, -0.6343932842, -0.7409511254, -0.8314696123, -0.9039892931, -0.9569403357, -0.9891765100, -1.0000000000, -0.9891765100, -0.9569403357, -0.9039892931, -0.8314696123, -0.7409511254, -0.6343932842, -0.5141027442, -0.3826834324, -0.2429801799, -0.0980171403, 0.0490676743, 0.1950903220, 0.3368898534, 0.4713967368, 0.5956993045, 0.7071067812, 0.8032075315, 0.8819212643, 0.9415440652, 0.9807852804, 0.9987954562, 0.9951847267, 0.9700312532, 0.9238795325, 0.8577286100, 0.7730104534, 0.6715589548, 0.5555702330, 0.4275550934, 0.2902846773, 0.1467304745,}; +static float a2[] = { -1.0000000000, -0.9891765100, -0.9569403357, -0.9039892931, -0.8314696123, -0.7409511254, -0.6343932842, -0.5141027442, -0.3826834324, -0.2429801799, -0.0980171403, 0.0490676743, 0.1950903220, 0.3368898534, 0.4713967368, 0.5956993045, 0.7071067812, 0.8032075315, 0.8819212643, 0.9415440652, 0.9807852804, 0.9987954562, 0.9951847267, 0.9700312532, 0.9238795325, 0.8577286100, 0.7730104534, 0.6715589548, 0.5555702330, 0.4275550934, 0.2902846773, 0.1467304745, 0.0000000000, -0.1467304745, -0.2902846773, -0.4275550934, -0.5555702330, -0.6715589548, -0.7730104534, -0.8577286100, -0.9238795325, -0.9700312532, -0.9951847267, -0.9987954562, -0.9807852804, -0.9415440652, -0.8819212643, -0.8032075315, -0.7071067812, -0.5956993045, -0.4713967368, -0.3368898534, -0.1950903220, -0.0490676743, 0.0980171403, 0.2429801799, 0.3826834324, 0.5141027442, 0.6343932842, 0.7409511254, 0.8314696123, 0.9039892931, 0.9569403357, 0.9891765100, 1.0000000000, 0.9891765100, 0.9569403357, 0.9039892931, 0.8314696123, 0.7409511254, 0.6343932842, 0.5141027442, 0.3826834324, 0.2429801799, 0.0980171403, -0.0490676743, -0.1950903220, -0.3368898534, -0.4713967368, -0.5956993045, -0.7071067812, -0.8032075315, -0.8819212643, -0.9415440652, -0.9807852804, -0.9987954562, -0.9951847267, -0.9700312532, -0.9238795325, -0.8577286100, -0.7730104534, -0.6715589548, -0.5555702330, -0.4275550934, -0.2902846773, -0.1467304745, -0.0000000000, 0.1467304745, 0.2902846773, 0.4275550934, 0.5555702330, 0.6715589548, 0.7730104534, 0.8577286100, 0.9238795325, 0.9700312532, 0.9951847267, 0.9987954562, 0.9807852804, 0.9415440652, 0.8819212643, 0.8032075315, 0.7071067812, 0.5956993045, 0.4713967368, 0.3368898534, 0.1950903220, 0.0490676743, -0.0980171403, -0.2429801799, -0.3826834324, -0.5141027442, -0.6343932842, -0.7409511254, -0.8314696123, -0.9039892931, -0.9569403357, -0.9891765100,}; +static float a3[] = { -1.0000000000, -0.4336062769, -0.0330608032, 0.0767959873, -0.1243628311, -0.5458608033, -1.0170767165, -1.3455723565, -1.3826834324, -1.0744497922, -0.4807005727, 0.2441579963, 0.9021971032, 1.3176751338, 1.3952762693, 1.1512695375, 0.7071067812, 0.2476372985, -0.0419582682, -0.0392412152, 0.2736784992, 0.8037051342, 1.3778681590, 1.8015008655, 1.9238795325, 1.6891982223, 1.1556938857, 0.4764686328, -0.1515365482, -0.5532301870, -0.6335948553, -0.4088397586, -0.0000000000, 0.4088397586, 0.6335948553, 0.5532301870, 0.1515365482, -0.4764686328, -1.1556938857, -1.6891982223, -1.9238795325, -1.8015008655, -1.3778681590, -0.8037051342, -0.2736784992, 0.0392412152, 0.0419582682, -0.2476372985, -0.7071067812, -1.1512695375, -1.3952762693, -1.3176751338, -0.9021971032, -0.2441579963, 0.4807005727, 1.0744497922, 1.3826834324, 1.3455723565, 1.0170767165, 0.5458608033, 0.1243628311, -0.0767959873, 0.0330608032, 0.4336062769, 1.0000000000, 1.5447467430, 1.8808198682, 1.8847745735, 1.5385763935, 0.9360414474, 0.2517098518, -0.3173668681, -0.6173165676, -0.5884894324, -0.2846662920, 0.1460226477, 0.5120164592, 0.6438954270, 0.4524827957, -0.0401290715, -0.7071067812, -1.3587777645, -1.8058007969, -1.9223293456, -1.6878920616, -1.1938857782, -0.6125012943, -0.1385616409, 0.0761204675, -0.0262589977, -0.3903270210, -0.8666492769, -1.2626770142, -1.4083403738, -1.2141642098, -0.7023007075, -0.0000000000, 0.7023007075, 1.2141642098, 1.4083403738, 1.2626770142, 0.8666492769, 0.3903270210, 0.0262589977, -0.0761204675, 0.1385616409, 0.6125012943, 1.1938857782, 1.6878920616, 1.9223293456, 1.8058007969, 1.3587777645, 0.7071067812, 0.0401290715, -0.4524827957, -0.6438954270, -0.5120164592, -0.1460226477, 0.2846662920, 0.5884894324, 0.6173165676, 0.3173668681, -0.2517098518, -0.9360414474, -1.5385763935, -1.8847745735, -1.8808198682, -1.5447467430,}; +static float a4[] = { -4.8874323843, -4.8561747173, -6.4561818305, 8.0769690972, 7.0231996705, -4.4569316779, -1.7365948474, 5.6062251849, -2.9474887269, 1.9529046064, 6.5312366380, -6.7254081203, -7.6855661685, 6.5268319940, 3.1861703180, 5.6702370028, -3.1679470021, -5.2250147764, 9.0295227911, -8.7399827776, 7.7837296560, 1.1239502924, 7.2300715769, -2.6512728062, -5.5532341144, 3.1003886174, 6.9735631179, -5.2173233391, -3.2449424536, -3.0825701569, 5.2340149330, -5.9022812596, 6.8880465829, -3.0913196620, 1.6671344908, -4.7086959417, 6.0032137891, 6.7717116285, -2.1942547405, 7.1993068277, -6.3970823132, -3.8501407913, -6.5603135366, 1.6699374556, -5.0078541517, -1.1044495730, 4.8540806880, 4.8959880214, 2.1451992407, 5.8228221278, 1.3967127422, 6.6548112277, -1.2023970899, 6.7541007463, 8.0561140848, 8.9040310811, 8.6226488475, -7.8329008779, -9.7879076322, 3.1578079796, 0.9912487182, -7.4516582134, 5.3187283979, -5.4036219956, 0.6471514397, -6.4427821664, 0.3882358965, 9.5290790737, 9.0955798631, 6.4791815797, -3.7360650211, 1.0342502192, -9.7562491615, 2.0419815961, -0.9998590204, 0.5886069063, -3.7041673250, 1.9428682290, 8.0604512475, -0.7203042889, -7.5770402197, -2.9475955020, 5.6968165948, 9.9830928082, 0.9770267277, -9.3850806364, 3.0318132262, -8.4305628254, 2.2655326275, 5.8825397405, -2.3154447377, 9.0202998173, 1.5207013315, -0.0083182619, -1.6471666789, -5.4101669404, -5.3249658616, 4.3072108319, -6.1249525109, 7.7484185065, -9.7996896016, 3.2995041032, 6.0749134107, 7.8103357741, -9.8404434759, 1.0987222049, -5.1286840706, 0.4845644400, -2.2428896391, -6.2540971281, -9.8970526600, 3.3331049444, 8.0520028769, -0.5140610197, -2.5144701654, 8.6883672014, 0.3925111778, 1.1116015689, -5.9891809311, 3.3369563218, -1.6594849745, -6.8284119014, 5.7585432568, 1.1917466443, 1.5817227628, 3.0971667002, 5.9001408400, -0.2029466459,}; + +static float A4_abs[] = { 0.4042524993, 0.3072352111, 0.1926927716, 0.2367857248, 0.3505086005, 0.1959037036, 0.3467378020, 0.4157987833, 0.7142020464, 0.3407714367, 0.3771545887, 0.6797115803, 0.6267043948, 0.3748867512, 0.1407444775, 0.3464486301, 0.5600517988, 0.6639541984, 0.6884431243, 0.3744245172, 0.1671014577, 0.0941833705, 0.9533674717, 0.3564589024, 0.3598238528, 0.3316468894, 0.7458813787, 0.8661624193, 0.3036180139, 0.0790395811, 0.5765622258, 0.2718375325, 0.7123475671, 0.3193224370, 0.9124553800, 0.2214806974, 0.2330931574, 0.4481957853, 0.1595790237, 0.8469842076, 0.3996376395, 0.1589822918, 0.3486900330, 0.1555739939, 0.6478004456, 0.3620439768, 0.1259486675, 0.7398855686, 0.2649413049, 0.6664981246, 0.4366811216, 0.3362109363, 0.3404149711, 0.5417450666, 0.6636619568, 0.4955887496, 0.5029658079, 0.2083842605, 0.4214411974, 0.4761011600, 0.9036493897, 0.3872455657, 0.6043589115, 0.7038301826, 0.3528341353, 0.7038301826, 0.6043589115, 0.3872455359, 0.9036493897, 0.4761011899, 0.4214412570, 0.2083842605, 0.5029658079, 0.4955887496, 0.6636619568, 0.5417450666, 0.3404149711, 0.3362109363, 0.4366810918, 0.6664980054, 0.2649413049, 0.7398855686, 0.1259486675, 0.3620439470, 0.6478003860, 0.1555739939, 0.3486900330, 0.1589823365, 0.3996376693, 0.8469842076, 0.1595790237, 0.4481958151, 0.2330931574, 0.2214806974, 0.9124553204, 0.3193223774, 0.7123475671, 0.2718375623, 0.5765622258, 0.0790395588, 0.3036180139, 0.8661624193, 0.7458813787, 0.3316468894, 0.3598238826, 0.3564588726, 0.9533675909, 0.0941833630, 0.1671014279, 0.3744245470, 0.6884431243, 0.6639540792, 0.5600517988, 0.3464486599, 0.1407444626, 0.3748867512, 0.6267043948, 0.6797115803, 0.3771546483, 0.3407714665, 0.7142020464, 0.4157987833, 0.3467378020, 0.1959037036, 0.3505086005, 0.2367857248, 0.1926927865, 0.3072352111,}; +static float A4_real[] = { 0.4042524993, -0.1761741340, 0.1884478331, 0.1338798553, -0.2058558464, 0.1518667787, -0.2994182706, 0.1278990954, 0.0683841407, -0.2599084675, 0.3770120144, 0.1078646928, -0.6265158653, -0.3417697549, -0.0240193158, -0.3266579807, -0.4909971952, 0.6041150689, -0.5127196312, 0.0818721652, -0.1553470343, -0.0487404540, -0.9369968176, -0.0969600454, -0.1454314291, 0.3228012025, 0.1654728800, -0.8333289623, 0.2672380805, -0.0766930804, 0.5634633899, 0.2183519155, -0.4321054220, -0.3172687590, -0.1719033718, 0.0410648696, 0.2299930602, -0.2477889061, -0.0568154603, 0.7838145494, 0.2811759114, 0.1454769373, -0.0107103586, 0.0660273060, -0.1516663730, 0.3232983947, 0.0214029551, -0.6719067693, 0.0070563257, 0.2811298966, 0.1034892723, -0.2204733193, 0.3397344053, -0.3810022175, -0.6006278992, -0.4101699591, 0.4846468866, -0.0330368355, 0.3901725411, -0.1839402318, 0.1693311334, -0.1757652909, 0.0783300847, 0.0284763128, -0.3528341353, 0.0284763351, 0.0783300996, -0.1757652611, 0.1693311334, -0.1839402169, 0.3901726007, -0.0330368727, 0.4846468866, -0.4101699889, -0.6006278992, -0.3810022473, 0.3397344053, -0.2204733044, 0.1034892797, 0.2811299264, 0.0070563257, -0.6719067693, 0.0214029253, 0.3232983649, -0.1516663581, 0.0660273209, -0.0107104480, 0.1454769671, 0.2811759412, 0.7838145494, -0.0568154603, -0.2477888763, 0.2299930602, 0.0410648733, -0.1719033718, -0.3172687292, -0.4321054220, 0.2183519304, 0.5634633899, -0.0766930580, 0.2672380805, -0.8333289623, 0.1654728800, 0.3228012025, -0.1454314291, -0.0969600007, -0.9369969368, -0.0487404391, -0.1553470194, 0.0818721056, -0.5127196312, 0.6041149497, -0.4909971952, -0.3266580105, -0.0240193084, -0.3417697251, -0.6265158653, 0.1078647077, 0.3770120740, -0.2599084973, 0.0683841407, 0.1278991103, -0.2994182706, 0.1518667936, -0.2058558464, 0.1338799149, 0.1884478480, -0.1761741489,}; +static float A4_imag[] = { 0.0000000000, -0.2517064810, 0.0402233899, -0.1953040361, 0.2836893797, 0.1237527430, -0.1748593748, 0.3956393003, -0.7109206915, 0.2203922868, 0.0103691965, 0.6710983515, -0.0153686758, 0.1540568620, -0.1386797726, 0.1154176295, 0.2694062889, -0.2754634619, 0.4594262540, -0.3653637767, 0.0615645647, -0.0805907920, -0.1759159863, 0.3430184722, 0.3291244805, 0.0760857239, 0.7272948027, 0.2362207174, -0.1441100836, 0.0191161335, -0.1222006679, -0.1619200110, 0.5663248897, 0.0361571088, 0.8961161375, 0.2176404893, 0.0378895774, -0.3734703958, -0.1491223276, -0.3209627271, -0.2839900255, 0.0641235411, -0.3485254943, -0.1408675313, -0.6297957301, -0.1629540026, -0.1241168082, 0.3097932637, -0.2648473084, 0.6043059230, 0.4242409468, -0.2538293302, -0.0215147994, -0.3851298392, -0.2823000550, 0.2781524956, -0.1345065236, -0.2057487965, -0.1593050361, 0.4391335845, -0.8876425028, -0.3450589478, 0.5992612839, 0.7032539248, 0.0000000000, -0.7032539248, -0.5992612839, 0.3450589776, 0.8876425028, -0.4391336143, 0.1593050361, 0.2057487965, 0.1345065534, -0.2781524658, 0.2823000550, 0.3851298392, 0.0215148088, 0.2538293302, -0.4242409170, -0.6043058634, 0.2648473084, -0.3097932935, 0.1241168082, 0.1629539728, 0.6297956705, 0.1408675313, 0.3485254943, -0.0641235560, 0.2839900553, 0.3209627569, 0.1491223276, 0.3734704256, -0.0378895663, -0.2176404595, -0.8961160779, -0.0361571386, -0.5663248897, 0.1619200408, 0.1222006381, -0.0191161409, 0.1441100836, -0.2362206876, -0.7272948027, -0.0760857165, -0.3291245103, -0.3430185020, 0.1759159863, 0.0805907845, -0.0615645051, 0.3653638065, -0.4594262540, 0.2754634321, -0.2694062889, -0.1154175997, 0.1386797577, -0.1540568769, 0.0153686665, -0.6710983515, -0.0103691965, -0.2203922868, 0.7109206915, -0.3956393003, 0.1748593748, -0.1237527281, -0.2836893499, 0.1953040063, -0.0402233899, 0.2517064810,}; + +#endif // TESTDATA_FFT_H \ No newline at end of file diff --git a/sw/applications/l_fft/utils.h b/sw/applications/l_fft/utils.h new file mode 100644 index 000000000..0c5db6678 --- /dev/null +++ b/sw/applications/l_fft/utils.h @@ -0,0 +1,162 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +// Define SIMULATION if you want to disable printing +// #define SIMULATION +#define TARGET + +// Scale for printing floats +#define SCL 100 + +// Asserts should FAIL or only print +// #define ASSERT_FAIL + +#ifdef SIMULATION +#pragma message ("SIMULATION environment") +#endif +#ifdef TARGET +#pragma message ("TARGET environment") +#endif + +// Enable or disable printing +#ifndef SIMULATION +#define PRINTF(...) printf(__VA_ARGS__) +#else +#define PRINTF(...) +#endif + +static float maxdiff = 0; +static int32_t maxdiffxp = 0; + +// Assert functions, always print if failing +void assert_closef_si(float a, float b, float prec, int idx) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) +#if defined(SIMULATION) || defined(TARGET) + PRINTF("Max diff float (scaled x10^6): %d\n", (int)(maxdiff * SCL)); +#else + PRINTF("Max diff float: %f\n", maxdiff); +#endif + } + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL), idx); +#else + PRINTF("AF %d %f %f %d\n", __LINE__, a, b, idx); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32_si(int32_t a, int32_t b, int32_t prec, int idx) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d %d\n", __LINE__, a, b, idx); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closef_s(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) +#if defined(SIMULATION) || defined(TARGET) + PRINTF("Max diff float (scaled x10^6): %d\n", (int)(maxdiff * SCL)); +#else + PRINTF("Max diff float: %f\n", maxdiff); +#endif + } + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL)); +#else + PRINTF("AF %d %f %f %d\n", __LINE__, a, b); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32_s(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closef(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL)); +#else + PRINTF("AF %d %f %f %d\n", __LINE__, a, b); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +// Vector export for plots +#if defined(SIMULATION) || defined(TARGET) +#define VECTOR_EXPORT(...) +#else +void vectorExport(float* a, int size, char filename[]) { + FILE *filePointer; + filePointer = fopen(filename, "w"); + if (filePointer == NULL) { + printf("Failed to create file.\n"); + return; + } + for (int i=0; i Date: Thu, 16 May 2024 12:34:47 +0200 Subject: [PATCH 21/27] add malloc problem example --- sw/applications/l_malloc/main.c | 50 +++++++++++++++++++++++++++++ sw/applications/l_malloc/problem.md | 20 ++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 sw/applications/l_malloc/main.c create mode 100644 sw/applications/l_malloc/problem.md diff --git a/sw/applications/l_malloc/main.c b/sw/applications/l_malloc/main.c new file mode 100644 index 000000000..11fd984d9 --- /dev/null +++ b/sw/applications/l_malloc/main.c @@ -0,0 +1,50 @@ +#include +#include + +void* my_calloc(size_t num, size_t size) { + void *ptr = calloc(num, size); + if (ptr != NULL) { + printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + num * size - 1); + } else { + printf("Failed to allocate memory\n"); + } + return ptr; +} + +void* my_malloc(size_t size) { + void *ptr = malloc(size); + if (ptr != NULL) { + printf("Allocated memory range: [%p - %p]\n", ptr, (char*)ptr + size - 1); + } else { + printf("Failed to allocate memory\n"); + } + return ptr; +} + +int* alloc_array_ret() { + printf("allocate array and return\n"); + int* myarr = (int*)my_malloc(5 * sizeof(int)); + + for (int i = 0; i < 5; ++i) { + myarr[i] = i; + } + + return myarr; +} + +void alloc_array() { + printf("allocate arrays\n"); + int* arr = (int*)my_malloc(10 * sizeof(int)); + int* arr2 = (int*)my_malloc(5 * sizeof(int)); + int* arr3 = (int*)my_malloc(15 * sizeof(int)); + + free(arr3); + free(arr2); + free(arr); +} + +int main(int argc, char *argv[]) { + int* ret = alloc_array_ret(); + alloc_array(); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/sw/applications/l_malloc/problem.md b/sw/applications/l_malloc/problem.md new file mode 100644 index 000000000..b64e04803 --- /dev/null +++ b/sw/applications/l_malloc/problem.md @@ -0,0 +1,20 @@ +So this simple program using dynamic memory allocation does give the following result: +```bash +Allocated memory range: [0xd590 - 0xd5a3] +allocate arrays +Allocated memory range: [0xd218 - 0xd23f] +Allocated memory range: [0xd218 - 0xd22b] +Allocated memory range: [0xd218 - 0xd253] +``` +So after the second allocation all the new arrays are allocated at the same address which then obviously leads to memory corruption + +If I coment out the call to the first function I get this output: +```bash +allocate arrays +Allocated memory range: [0xd590 - 0xd5b7] +Allocated memory range: [0xd218 - 0xd22b] +Allocated memory range: [0xd218 - 0xd253] +``` +The issue persists... + +I don't really know how this could occur and how to fix it. \ No newline at end of file From 0c799eb714455d088514c1d8516ae8d07ba252a9 Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Thu, 16 May 2024 13:56:17 +0200 Subject: [PATCH 22/27] add another test --- sw/applications/l_malloc/main.c | 39 ++++++++++++++++++++++++++++++++ sw/device/lib/runtime/syscalls.c | 8 +++++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/sw/applications/l_malloc/main.c b/sw/applications/l_malloc/main.c index 11fd984d9..19fc12ddf 100644 --- a/sw/applications/l_malloc/main.c +++ b/sw/applications/l_malloc/main.c @@ -1,5 +1,9 @@ #include #include +#include + +extern char __heap_start[]; +extern char __heap_end[]; void* my_calloc(size_t num, size_t size) { void *ptr = calloc(num, size); @@ -21,6 +25,7 @@ void* my_malloc(size_t size) { return ptr; } +// this works now int* alloc_array_ret() { printf("allocate array and return\n"); int* myarr = (int*)my_malloc(5 * sizeof(int)); @@ -32,6 +37,7 @@ int* alloc_array_ret() { return myarr; } +// this works now void alloc_array() { printf("allocate arrays\n"); int* arr = (int*)my_malloc(10 * sizeof(int)); @@ -43,8 +49,41 @@ void alloc_array() { free(arr); } +// this seems to work fine +void test_malloc_free() { + for (int i = 0; i < 10; ++i) { + int* arr = (int*)my_malloc(5 * sizeof(int)); + int* arr2 = (int*)my_malloc(5 * sizeof(int)); + free(arr); + free(arr2); + } +} + +// FIXME: this is weird, why is NULL 0x8 suddenly??? +void test_extensive() { + int heep_size_bytes = __heap_end - __heap_start; + printf("heep_size_bytes: %d\n", heep_size_bytes); + char* ptr = __heap_start; + printf("heep start: %p\n", ptr); + while (ptr < __heap_end) { + char* old_ptr = ptr; + ptr = (char*)malloc(1); + printf("ptr: %p\n", ptr); + if ((ptr == 0x8) || (ptr == NULL)) { + printf("Failed to allocate memory\n"); + break; + } + printf("offset: %d\n", ptr - old_ptr); + } +} + + int main(int argc, char *argv[]) { + printf("heep_start: %p\n", __heap_start); + printf("heep_end: %p\n", __heap_end); int* ret = alloc_array_ret(); alloc_array(); + test_malloc_free(); + test_extensive(); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/sw/device/lib/runtime/syscalls.c b/sw/device/lib/runtime/syscalls.c index a9a7fcc20..41864c422 100644 --- a/sw/device/lib/runtime/syscalls.c +++ b/sw/device/lib/runtime/syscalls.c @@ -282,10 +282,14 @@ void *_sbrk(ptrdiff_t incr) return NULL; } - if ((brk += incr) < __heap_end) { + if ((brk + incr) < __heap_end && (brk + incr) >= old_brk) { + /* Here: ^ imo we could put <= as well, depends on the + definition of the heep region...*/ brk += incr; } else { + /* Here we need to return NULL, else the heap will just overflow*/ brk = __heap_end; + return NULL; } - return old_brk; + return (void*)old_brk; } From fa2346f5e62c3f7d307fb0e4aee4d437107e09f3 Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Thu, 16 May 2024 13:57:14 +0200 Subject: [PATCH 23/27] add my implementation --- sw/applications/l_malloc/main.c | 39 ++++++++++++++++++++++++++++++++ sw/device/lib/runtime/syscalls.c | 8 +++++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/sw/applications/l_malloc/main.c b/sw/applications/l_malloc/main.c index 11fd984d9..19fc12ddf 100644 --- a/sw/applications/l_malloc/main.c +++ b/sw/applications/l_malloc/main.c @@ -1,5 +1,9 @@ #include #include +#include + +extern char __heap_start[]; +extern char __heap_end[]; void* my_calloc(size_t num, size_t size) { void *ptr = calloc(num, size); @@ -21,6 +25,7 @@ void* my_malloc(size_t size) { return ptr; } +// this works now int* alloc_array_ret() { printf("allocate array and return\n"); int* myarr = (int*)my_malloc(5 * sizeof(int)); @@ -32,6 +37,7 @@ int* alloc_array_ret() { return myarr; } +// this works now void alloc_array() { printf("allocate arrays\n"); int* arr = (int*)my_malloc(10 * sizeof(int)); @@ -43,8 +49,41 @@ void alloc_array() { free(arr); } +// this seems to work fine +void test_malloc_free() { + for (int i = 0; i < 10; ++i) { + int* arr = (int*)my_malloc(5 * sizeof(int)); + int* arr2 = (int*)my_malloc(5 * sizeof(int)); + free(arr); + free(arr2); + } +} + +// FIXME: this is weird, why is NULL 0x8 suddenly??? +void test_extensive() { + int heep_size_bytes = __heap_end - __heap_start; + printf("heep_size_bytes: %d\n", heep_size_bytes); + char* ptr = __heap_start; + printf("heep start: %p\n", ptr); + while (ptr < __heap_end) { + char* old_ptr = ptr; + ptr = (char*)malloc(1); + printf("ptr: %p\n", ptr); + if ((ptr == 0x8) || (ptr == NULL)) { + printf("Failed to allocate memory\n"); + break; + } + printf("offset: %d\n", ptr - old_ptr); + } +} + + int main(int argc, char *argv[]) { + printf("heep_start: %p\n", __heap_start); + printf("heep_end: %p\n", __heap_end); int* ret = alloc_array_ret(); alloc_array(); + test_malloc_free(); + test_extensive(); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/sw/device/lib/runtime/syscalls.c b/sw/device/lib/runtime/syscalls.c index a9a7fcc20..41864c422 100644 --- a/sw/device/lib/runtime/syscalls.c +++ b/sw/device/lib/runtime/syscalls.c @@ -282,10 +282,14 @@ void *_sbrk(ptrdiff_t incr) return NULL; } - if ((brk += incr) < __heap_end) { + if ((brk + incr) < __heap_end && (brk + incr) >= old_brk) { + /* Here: ^ imo we could put <= as well, depends on the + definition of the heep region...*/ brk += incr; } else { + /* Here we need to return NULL, else the heap will just overflow*/ brk = __heap_end; + return NULL; } - return old_brk; + return (void*)old_brk; } From 75047cc31d52fa3da3d7fd6dbb8cbc9b2132c846 Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Thu, 16 May 2024 14:28:28 +0200 Subject: [PATCH 24/27] Revert "add another test" This reverts commit 0c799eb714455d088514c1d8516ae8d07ba252a9. --- sw/applications/l_malloc/main.c | 39 -------------------------------- sw/device/lib/runtime/syscalls.c | 8 ++----- 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/sw/applications/l_malloc/main.c b/sw/applications/l_malloc/main.c index 19fc12ddf..11fd984d9 100644 --- a/sw/applications/l_malloc/main.c +++ b/sw/applications/l_malloc/main.c @@ -1,9 +1,5 @@ #include #include -#include - -extern char __heap_start[]; -extern char __heap_end[]; void* my_calloc(size_t num, size_t size) { void *ptr = calloc(num, size); @@ -25,7 +21,6 @@ void* my_malloc(size_t size) { return ptr; } -// this works now int* alloc_array_ret() { printf("allocate array and return\n"); int* myarr = (int*)my_malloc(5 * sizeof(int)); @@ -37,7 +32,6 @@ int* alloc_array_ret() { return myarr; } -// this works now void alloc_array() { printf("allocate arrays\n"); int* arr = (int*)my_malloc(10 * sizeof(int)); @@ -49,41 +43,8 @@ void alloc_array() { free(arr); } -// this seems to work fine -void test_malloc_free() { - for (int i = 0; i < 10; ++i) { - int* arr = (int*)my_malloc(5 * sizeof(int)); - int* arr2 = (int*)my_malloc(5 * sizeof(int)); - free(arr); - free(arr2); - } -} - -// FIXME: this is weird, why is NULL 0x8 suddenly??? -void test_extensive() { - int heep_size_bytes = __heap_end - __heap_start; - printf("heep_size_bytes: %d\n", heep_size_bytes); - char* ptr = __heap_start; - printf("heep start: %p\n", ptr); - while (ptr < __heap_end) { - char* old_ptr = ptr; - ptr = (char*)malloc(1); - printf("ptr: %p\n", ptr); - if ((ptr == 0x8) || (ptr == NULL)) { - printf("Failed to allocate memory\n"); - break; - } - printf("offset: %d\n", ptr - old_ptr); - } -} - - int main(int argc, char *argv[]) { - printf("heep_start: %p\n", __heap_start); - printf("heep_end: %p\n", __heap_end); int* ret = alloc_array_ret(); alloc_array(); - test_malloc_free(); - test_extensive(); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/sw/device/lib/runtime/syscalls.c b/sw/device/lib/runtime/syscalls.c index 41864c422..a9a7fcc20 100644 --- a/sw/device/lib/runtime/syscalls.c +++ b/sw/device/lib/runtime/syscalls.c @@ -282,14 +282,10 @@ void *_sbrk(ptrdiff_t incr) return NULL; } - if ((brk + incr) < __heap_end && (brk + incr) >= old_brk) { - /* Here: ^ imo we could put <= as well, depends on the - definition of the heep region...*/ + if ((brk += incr) < __heap_end) { brk += incr; } else { - /* Here we need to return NULL, else the heap will just overflow*/ brk = __heap_end; - return NULL; } - return (void*)old_brk; + return old_brk; } From 68367dba70c275b81f35e559dd6a78a8ad399db5 Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Sat, 1 Jun 2024 14:08:46 +0200 Subject: [PATCH 25/27] add train example, memory overflows --- sw/applications/l_train/cnn.c | 254 +++++++++++ sw/applications/l_train/cnn.h | 142 ++++++ sw/applications/l_train/config-cnn.h | 32 ++ sw/applications/l_train/conv2dlayer.c | 150 +++++++ sw/applications/l_train/conv2dlayer.h | 124 +++++ sw/applications/l_train/fxp32.c | 66 +++ sw/applications/l_train/fxp32.h | 98 ++++ sw/applications/l_train/initial_weights_0.h | 10 + sw/applications/l_train/main.c | 74 +++ sw/applications/l_train/sample_0_normalized.h | 10 + sw/applications/l_train/sylt-fft/LICENSE | 48 ++ sw/applications/l_train/sylt-fft/README.md | 59 +++ sw/applications/l_train/sylt-fft/config.h | 88 ++++ sw/applications/l_train/sylt-fft/fft.h | 423 ++++++++++++++++++ sw/applications/l_train/sylt-fft/fpmath.h | 139 ++++++ sw/applications/l_train/sylt-fft/intrinsics.h | 153 +++++++ sw/applications/l_train/sylt-fft/main.c | 74 +++ sw/applications/l_train/utils.h | 228 ++++++++++ 18 files changed, 2172 insertions(+) create mode 100644 sw/applications/l_train/cnn.c create mode 100644 sw/applications/l_train/cnn.h create mode 100644 sw/applications/l_train/config-cnn.h create mode 100644 sw/applications/l_train/conv2dlayer.c create mode 100644 sw/applications/l_train/conv2dlayer.h create mode 100644 sw/applications/l_train/fxp32.c create mode 100644 sw/applications/l_train/fxp32.h create mode 100644 sw/applications/l_train/initial_weights_0.h create mode 100644 sw/applications/l_train/main.c create mode 100644 sw/applications/l_train/sample_0_normalized.h create mode 100644 sw/applications/l_train/sylt-fft/LICENSE create mode 100644 sw/applications/l_train/sylt-fft/README.md create mode 100644 sw/applications/l_train/sylt-fft/config.h create mode 100644 sw/applications/l_train/sylt-fft/fft.h create mode 100644 sw/applications/l_train/sylt-fft/fpmath.h create mode 100644 sw/applications/l_train/sylt-fft/intrinsics.h create mode 100644 sw/applications/l_train/sylt-fft/main.c create mode 100644 sw/applications/l_train/utils.h diff --git a/sw/applications/l_train/cnn.c b/sw/applications/l_train/cnn.c new file mode 100644 index 000000000..3942d87a2 --- /dev/null +++ b/sw/applications/l_train/cnn.c @@ -0,0 +1,254 @@ +/*!*********************************************************************************** + * @file cnn.c + * @author Linus Crugnola + * @date 07.06.2024 + * + * Description: + * This file contains the implementation of a CNN model with two convolutional layers. + * + *************************************************************************************/ + +// Include order matters here for the sytl-fft library +// clang-format off +#include "sylt-fft/fft.h" + +#include "cnn.h" + +#include +#include +// clang-format on + +// Private variables of the CNN +static complex_t ppgfft[256]; +static complex_t outputfft[256]; + +static float output[256]; +static float layer1Output[3 * 256]; + +static complex_t gradGToW2[256]; +static float gradLToM[3 * 256]; +static float w1Velocity[3 * 21] = {0.0f}; +static float w2Velocity[3] = {0.0f}; + +// #ifndef DYN_ALLOCATION +// static fxp32 layer1OutputFxp[3 * 256]; +// #endif + +#ifdef TRACK_LOSS +static float absV[256]; +#endif + +#ifdef DYN_ALLOCATION +CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Dim2D outputDim, Conv2DPadding layer1Pad, + Conv2DPadding layer2Pad, uint8_t fftBits, float learningRate, float momentum) { + CnnHandle self = (CnnHandle)malloc(sizeof(Cnn)); + // Create layers + self->layer1 = Conv2DLayer_create(layer1Dim, layer1Pad); + self->layer2 = Conv2DLayer_create(layer2Dim, layer2Pad); + + self->inputDim = inputDim; + self->outputDim = outputDim; + self->fftBits = fftBits; + self->learningRate = learningRate; + self->momentum = momentum; + + return self; +} + +void Cnn_destroy(CnnHandle self) { + Conv2DLayer_destroy(self->layer1); + Conv2DLayer_destroy(self->layer2); + free(self); +} +#endif + +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output) { +#ifdef DYN_ALLOCATION + fxp32* layer1Output = (fxp32*)calloc(self->inputDim.x * self->inputDim.y, sizeof(fxp32)); + Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1Output, output); + free(layer1Output); +#else + // Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1OutputFxp); + // Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1OutputFxp, output); +#endif +} + +void Cnn_forwardFloat(CnnHandle self, float* input, float* output) { + Conv2DLayer_forwardFloat(self->layer1, self->inputDim, input, layer1Output); + Conv2DLayer_forwardFloat(self->layer2, self->inputDim, layer1Output, output); +} + +void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output) { + Cnn_forwardFxp(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} + +void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output) { + Cnn_forwardFloat(self, acc, output); + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + output[i] = ppg[i] - output[i]; + } +} + +float Cnn_sampleLoss(CnnHandle self, complex_t* ypredfft, complex_t* ytruefft) { + float loss = 0.0f; + // NOTE: could be optimized by reusing .r and .i for the abs +#ifdef DYN_ALLOCATION + fxpMul* absV = (fxpMul*)calloc(self->outputDim.y * self->outputDim.x, sizeof(fxpMul)); +#elif !defined(TRACK_LOSS) + fxpMul absV[self->outputDim.y * self->outputDim.x]; +#endif + float ytr, ypr, yti, ypi; + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + ytr = fxp32_toFloat(ytruefft[i].r); + ypr = fxp32_toFloat(ypredfft[i].r); + yti = fxp32_toFloat(ytruefft[i].i); + ypi = fxp32_toFloat(ypredfft[i].i); + absV[i] = (ytr - ypr) * (ytr - ypr) + (yti - ypi) * (yti - ypi); + } + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + loss += absV[i]; + } + return loss; +} + +void fft(complex_t* cpx, int8_t bits) { + fft_fft((fft_complex_t*)cpx, bits); + for (int i = 0; i < (1 << bits); i++) { + cpx[i].r <<= bits; + cpx[i].i <<= bits; + } +} + +void arrToComplex(float* arr, complex_t* cpx, int size, int offset) { + for (int i = 0; i < size; i++) { + cpx[i].r = fxp32_fromFloat(arr[i + offset]); + cpx[i].i = 0; + } +} + +void arrOnesComplex(complex_t* cpx, int size) { + for (int i = 0; i < size; i++) { + cpx[i].r = fxp32_fromFloat(1.0f); + cpx[i].i = 0; + } +} + +void Cnn_sgdStep(CnnHandle self, float* acc, float* ppg) { + // Calculate the intermediate function g_i and store its conj in outputfft + for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { + outputfft[i].r = ppgfft[i].r - outputfft[i].r; + outputfft[i].i = outputfft[i].i - ppgfft[i].i; + } + // Calculate the gradient w.r to w_2 + float dL_dw2[self->layer2->dim.x * self->layer2->dim.y]; + for (int i = 0; i < self->layer2->dim.x * self->layer2->dim.y; ++i) { + // Calculate the dg_i/dw_2,i + arrToComplex(layer1Output, gradGToW2, self->inputDim.y, i * self->inputDim.y); + fft(gradGToW2, self->fftBits); + // calculate the product and create sum + dL_dw2[i] = 0.0f; + for (int j = 0; j < self->inputDim.y; ++j) { + dL_dw2[i] += 2 * (fxp32_toFloat(outputfft[j].r) * fxp32_toFloat(gradGToW2[j].r) - + fxp32_toFloat(outputfft[j].i) * fxp32_toFloat(gradGToW2[j].i)); + } + } + // Subtract update velocity and second layer weights + for (int i = 0; i < self->layer2->dim.x * self->layer2->dim.y; ++i) { + w2Velocity[i] = self->momentum * w2Velocity[i] - dL_dw2[i] * self->learningRate; + self->layer2->weightsFloat[i] += w2Velocity[i]; + } +#ifdef LOG_GRADIENTS + for (int i = 0; i < self->layer2->dim.x * self->layer2->dim.y; ++i) { + printf("dL_dw2[%d]: %f\n", i, w2Velocity[i]); + } +#endif + + // get ppg-output in time domain and store in output + for (int i = 0; i < self->outputDim.y * self->outputDim.x; i++) { + output[i] = ppg[i] - output[i]; + } + + // fill the values in the dL/dm matrix + for (int i = 0; i < self->inputDim.x; i++) { + float factor = -512 * self->layer2->weightsFloat[i]; + for (int j = 0; j < self->inputDim.y; j++) { + gradLToM[i * self->inputDim.y + j] = output[j] * factor; + } + } + + // Calculate the gradient from m to w_1 + float dL_dw1[self->layer1->dim.x * self->layer1->dim.y]; + for (int i = 0; i < self->layer1->dim.x * self->layer1->dim.y; ++i) { + dL_dw1[i] = 0.0f; + } + int cx = self->layer1->dim.x / 2; + int cy = self->layer1->dim.y / 2; + for (int i = 0; i < self->inputDim.x; ++i) { + for (int j = 0; j < self->inputDim.y; ++j) { + // For every item in dL/dm add the contribution to every weight + for (int m = 0; m < self->layer1->dim.x; ++m) { + for (int n = 0; n < self->layer1->dim.y; ++n) { + float curAcc = 0.0f; + if (m + i - cx >= 0 && m + i - cx < self->inputDim.x && n + j - cy >= 0 && + n + j - cy < self->inputDim.y) { + curAcc = acc[(m + i - cx) * self->inputDim.y + n + j - cy]; + } + dL_dw1[m * self->layer1->dim.y + n] += gradLToM[i * self->inputDim.y + j] * curAcc; + } + } + } + } + // Update the velocity and first layer weights + for (int i = 0; i < self->layer1->dim.x * self->layer1->dim.y; ++i) { + w1Velocity[i] = self->momentum * w1Velocity[i] - dL_dw1[i] * self->learningRate; + self->layer1->weightsFloat[i] += w1Velocity[i]; + } +#ifdef LOG_GRADIENTS + for (int i = 0; i < self->layer1->dim.x * self->layer1->dim.y; ++i) { + printf("dL_dw1[%d]: %f\n", i, w1Velocity[i]); + } +#endif +} + +// TODO: Implement batch processing for more efficient training +void Cnn_train(CnnHandle self, float* acc, float* ppg, int nEpochs, bool logAllLosses) { + // FFT of the ppg + arrToComplex(ppg, ppgfft, self->outputDim.y * self->outputDim.x, 0); + fft(ppgfft, self->fftBits); + // array to track loss +#ifdef TRACK_LOSS + // float lossArr[nEpochs]; +#endif + for (int i = 0; i < nEpochs; ++i) { + Cnn_forwardFloat(self, acc, output); + arrToComplex(output, outputfft, self->outputDim.y * self->outputDim.x, 0); + fft(outputfft, self->fftBits); + if (i == 0 || i == nEpochs - 1) { + printf("Loss epoch %d: %d\n", i, (int)(Cnn_sampleLoss(self, outputfft, ppgfft))); + } +#ifdef TRACK_LOSS + lossArr[i] = Cnn_sampleLoss(self, outputfft, ppgfft); +#endif + Cnn_sgdStep(self, acc, ppg); + } +#ifdef TRACK_LOSS + if (logAllLosses) { + for (int i = 0; i < nEpochs; ++i) { + printf("Loss epoch %d: %f\n", i, lossArr[i]); + } + } + else { + printf("First loss: %f\n", lossArr[0]); + printf("Final loss: %f\n", lossArr[nEpochs - 1]); + } +#endif +} + +void Cnn_freezeModel(CnnHandle self) { + Conv2DLayer_transformWeightsToFxp(self->layer1); + Conv2DLayer_transformWeightsToFxp(self->layer2); +} \ No newline at end of file diff --git a/sw/applications/l_train/cnn.h b/sw/applications/l_train/cnn.h new file mode 100644 index 000000000..751b6542c --- /dev/null +++ b/sw/applications/l_train/cnn.h @@ -0,0 +1,142 @@ +/*!*********************************************************************************** + * @file cnn.h + * @author Linus Crugnola + * @date 07.06.2024 + * + * Description: + * This file contains the definition of a CNN model with two convolutional layers. + * + *************************************************************************************/ + +#ifndef CNN_H +#define CNN_H + +#include "config-cnn.h" + +#include "conv2dlayer.h" +#include "fxp32.h" + +/** + * @brief a two layer cnn model + * + * @param layer1 the first convolutional layer + * @param layer2 the second convolutional layer + * @param inputDim the input dimension + * @param outputDim the output dimension + * @param fftBits the number of bits for the fft (i.e. the log2 of the outputDim.y + * @param learningRate the learning rate for the model + * @param momentum the momentum for the model + */ +typedef struct __Cnn { + Conv2DLayerHandle layer1; + Conv2DLayerHandle layer2; + Dim2D inputDim; + Dim2D outputDim; + uint8_t fftBits; + float learningRate; + float momentum; +} Cnn; + +typedef struct __Cnn* CnnHandle; + +/** + * @brief a complex fixed point number + * + * @param r the real part of the number + * @param i the imaginary part of the + * + * @note used for compatibility with the sylt-fft library + */ +typedef struct __complex_t { + int32_t r; + int32_t i; +} complex_t; + +/** + * @brief constructor and destructor for the model in case of dynamic allocation + */ +#ifdef DYN_ALLOCATION +/** + * @brief create a new model and corresponding layers + * + * @param inputDim the input dimension + * @param layer1Dim the dimension of the first layer + * @param layer2Dim the dimension of the second layer + * @param outputDim the output dimension + * @param layer1Pad the padding of the first layer + * @param layer2Pad the padding of the second layer + * @param fftBits the number of bits for the fft + * @param learningRate the learning rate for the model + * @param momentum the momentum for the model + * + * @return the new model + */ +CnnHandle Cnn_create(Dim2D inputDim, Dim2D layer1Dim, Dim2D layer2Dim, Dim2D outputDim, Conv2DPadding layer1Pad, + Conv2DPadding layer2Pad, uint8_t fftBits, float learningRate, float momentum); + +/** + * @brief destroy the model + * + * @param self the model + */ +void Cnn_destroy(CnnHandle self); +#endif + +/** + * @brief forward pass of the model for float and fixed point data + * + * @param self the model + * @param input the input data, expected size: self->inputDim.x * self->inputDim.y + */ +void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output); +void Cnn_forwardFloat(CnnHandle self, float* input, float* output); + +/** + * @brief predict the output of the model for float and fixed point data + * + * @param self the model + * @param acc the accelerometer data, expected size: self->inputDim.x * self->inputDim.y + * @param ppg the ppg data, expected size: self->outputDim.x * self->outputDim.y + * @param output the output data, expected size: self->outputDim.x * self->outputDim.y + */ +void Cnn_predictFxp(CnnHandle self, fxp32* acc, fxp32* ppg, fxp32* output); +void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output); + +/** + * @brief train the model + * + * @param self the model + * @param acc the accelerometer data, expected size: self->inputDim.x * self->inputDim.y + * @param ppg the ppg data, expected size: self->outputDim.x * self->outputDim.y + * @param nEpochs the number of epochs to train the model + * @param logAllLosses whether to log all the losses or just the first and last one + * + * @note the model is trained using SGD with momentum + * @note for now the model is trained using only one sample of acc and ppg data + */ +void Cnn_train(CnnHandle self, float* acc, float* ppg, int nEpochs, bool logAllLosses); + +/** + * @brief freeze the model, converts all weights to fixed point in order to + * use the Cnn_forwardFxp function for faster forward passing + * + * @param self the model + */ +void Cnn_freezeModel(CnnHandle self); + +/** + * @brief calculate the loss of the model for a sample + * + * @remark exported for testability, function used in Cnn_train + * + * @param self the model + * @param ypredfft the fft of the predicted output of the model + * expected size: self->outputDim.x * self->outputDim.y + * @param ytruefft the fft of the true output of the model + * expected size: self->outputDim.x * self->outputDim.y + * + * @return the loss of the model + */ +float Cnn_sampleLoss(CnnHandle self, complex_t* ypredfft, complex_t* ytruefft); + +#endif // CNN_H \ No newline at end of file diff --git a/sw/applications/l_train/config-cnn.h b/sw/applications/l_train/config-cnn.h new file mode 100644 index 000000000..451fa3056 --- /dev/null +++ b/sw/applications/l_train/config-cnn.h @@ -0,0 +1,32 @@ +/*!*********************************************************************************** + * @file config-cnn.h + * @author Linus Crugnola + * @date 07.06.2024 + * + * Description: + * This file contains general configuration for the CNN model. + * + *************************************************************************************/ + +#ifndef CONFIG_CNN_H +#define CONFIG_CNN_H + +/** + * @def DYN_ALLOCATION + * @brief define this macro to enable dynamic allocation of the model + */ +// #define DYN_ALLOCATION + +/** + * @def TRACK_LOSS + * @brief define this macro to enable tracking of the loss + */ +// #define TRACK_LOSS + +/** + * @def LOG_GRADIENTS + * @brief define this macro to enable logging of the gradients + */ +// #define LOG_GRADIENTS + +#endif // CONFIG_CNN_H \ No newline at end of file diff --git a/sw/applications/l_train/conv2dlayer.c b/sw/applications/l_train/conv2dlayer.c new file mode 100644 index 000000000..63d4a8670 --- /dev/null +++ b/sw/applications/l_train/conv2dlayer.c @@ -0,0 +1,150 @@ +/*!*********************************************************************************** + * @file conv2dlayer.c + * @author Linus Crugnola + * @date 07.06.2024 + * + * Description: + * This file contains the implementation of a two dimensional convolutional layer. + * + *************************************************************************************/ + +#include "conv2dlayer.h" + +#include +#include +#include + +#ifdef DYN_ALLOCATION +Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding) { + Conv2DLayerHandle self = (Conv2DLayerHandle)malloc(sizeof(Conv2DLayer)); + self->dim = dim; + self->padding = padding; + self->weightsFxp = (fxp32*)calloc(dim.x * dim.y, sizeof(fxp32)); + self->weightsFloat = (float*)calloc(dim.x * dim.y, sizeof(float)); + return self; +} + +void Conv2DLayer_destroy(Conv2DLayerHandle self) { + free(self->weightsFxp); + free(self->weightsFloat); + free(self); +} +#endif + +void Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights) { + memcpy(self->weightsFxp, weights, self->dim.x * self->dim.y * sizeof(fxp32)); +} + +void Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights) { + memcpy(self->weightsFloat, weights, self->dim.x * self->dim.y * sizeof(float)); +} + +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self) { + for (int i = 0; i < self->dim.x * self->dim.y; ++i) { + self->weightsFxp[i] = fxp32_fromFloat(self->weightsFloat[i]); + } +} + +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid) { + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + fxp32 sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = 0; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + in = input[m * iny + n]; + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += fxp32_mul(w, in); + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + } + } +} + +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid) { + // get kernel center + if (kerx % 2 != 1 || kery % 2 != 1) { + printf("Kernel size must be odd\n"); + exit(EXIT_FAILURE); + } + int cx = kerx / 2; + int cy = kery / 2; + + int mMax, mMin; + int nMax, nMin; + float sum, w, in; + + int iMin = 0; + int jMin = 0; + int iMax = inx; + int jMax = iny; + + if (valid) { + iMin = cx; + jMin = cy; + iMax = inx - cx; + jMax = iny - cy; + } + + for (int i = iMin; i < iMax; ++i) { + for (int j = jMin; j < jMax; ++j) { + mMin = i - cx >= 0 ? i - cx : 0; + mMax = i + cx + 1 < inx ? i + cx + 1 : inx; + nMin = j - cy >= 0 ? j - cy : 0; + nMax = j + cy + 1 < iny ? j + cy + 1 : iny; + sum = .0f; + for (int m = mMin; m < mMax; ++m) { + for (int n = nMin; n < nMax; ++n) { + in = input[m * iny + n]; + w = kernel[(m - i + cx) * kery + (n - j + cy)]; + sum += w * in; + } + } + if (valid) + output[(i - cx) * (iny - kery + 1) + (j - cy)] = sum; + else + output[i * iny + j] = sum; + } + } +} + +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output) { + convolve2DFxp(input, output, self->weightsFxp, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); +} + +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output) { + convolve2DFloat(input, output, self->weightsFloat, inputDim.x, inputDim.y, self->dim.x, self->dim.y, + self->padding == VALID); +} \ No newline at end of file diff --git a/sw/applications/l_train/conv2dlayer.h b/sw/applications/l_train/conv2dlayer.h new file mode 100644 index 000000000..ea4e1b7d1 --- /dev/null +++ b/sw/applications/l_train/conv2dlayer.h @@ -0,0 +1,124 @@ +/*!*********************************************************************************** + * @file conv2dlayer.h + * @author Linus Crugnola + * @date 07.06.2024 + * + * Description: + * This file contains the definition of a two dimensional convolutional layer. + * + *************************************************************************************/ + +#ifndef CONV2DLAYER_H +#define CONV2DLAYER_H + +#include "config-cnn.h" +#include +#include + +#include "fxp32.h" + +/** + * @brief a 2D dimension + * + * @param x the rows of a matrix + * @param y the columns of a matrix + */ +typedef struct __Dim2D { + uint16_t x; + uint16_t y; +} Dim2D; + +/** + * @brief a padding type for the convolutional layer + */ +typedef enum __Conv2DPadding { + VALID, + SAME +} Conv2DPadding; + +/** + * @brief a convolutional layer + * + * @param dim the dimension of the layers kernel + * @param padding the padding type + * @param weightsFxp the weights of the layer in fixed point + * @param weightsFloat the weights of the layer in float + */ +typedef struct __Conv2DLayer { + Dim2D dim; + Conv2DPadding padding; + fxp32* weightsFxp; + float* weightsFloat; +} Conv2DLayer; + +typedef struct __Conv2DLayer* Conv2DLayerHandle; + +/** + * @brief constructor and destructor for the layer in case of dynamic allocation + */ +#ifdef DYN_ALLOCATION +/** + * @brief create a new convolutional layer + * + * @param dim the dimension layers kernel + * @param padding the padding type + * + * @return the new layer + */ +Conv2DLayerHandle Conv2DLayer_create(Dim2D dim, Conv2DPadding padding); + +/** + * @brief destroy the convolutional layer + * + * @param self the layer to destroy + */ +void Conv2DLayer_destroy(Conv2DLayerHandle self); +#endif + +/** + * @brief set the weights of the layer + * + * @param self the layer + * @param weights the weights to set, expected size: self->dim.x * self->dim.y + */ +void Conv2DLayer_setWeightsFxp(Conv2DLayerHandle self, fxp32* weights); +void Conv2DLayer_setWeightsFloat(Conv2DLayerHandle self, float* weights); + +/** + * @brief transform the weights of the layer to fixed point + * + * @param self the layer + */ +void Conv2DLayer_transformWeightsToFxp(Conv2DLayerHandle self); + +/** + * @brief forward pass of the layer + * + * @param self the layer + * @param inputDim the input dimension + * @param input the input data, expected size: inputDim.x * inputDim.y + * @param output the output data, size is expected to match padding and inputDim + */ +void Conv2DLayer_forwardFxp(Conv2DLayerHandle self, Dim2D inputDim, fxp32* input, fxp32* output); +void Conv2DLayer_forwardFloat(Conv2DLayerHandle self, Dim2D inputDim, float* input, float* output); + +/** + * @brief 2D convolution of fixed point and float data + * + * @remark exported for testability, functions used in the forward pass of the model above + * + * @param input the input data + * @param output the output data + * @param kernel the kernel + * @param inx the rows of the input data + * @param iny the columns of the input data + * @param kerx the rows of the kernel + * @param kery the columns of the kernel + * @param valid the padding type + * + * @throw EXIT_FAILURE if the kernel dimensions are not odd + */ +void convolve2DFxp(fxp32* input, fxp32* output, fxp32* kernel, int inx, int iny, int kerx, int kery, bool valid); +void convolve2DFloat(float* input, float* output, float* kernel, int inx, int iny, int kerx, int kery, bool valid); + +#endif // CONV2DLAYER_H \ No newline at end of file diff --git a/sw/applications/l_train/fxp32.c b/sw/applications/l_train/fxp32.c new file mode 100644 index 000000000..90a9a0cc6 --- /dev/null +++ b/sw/applications/l_train/fxp32.c @@ -0,0 +1,66 @@ +/*!*********************************************************************************** + * @file fxp32.c + * @author Linus Crugnola + * @date 07.06.2024 + * + * Description: + * This file contains implementations for fixed point arithmetic with 32 bits. + * + *************************************************************************************/ + +#include "fxp32.h" + +fxp32 fxp32_fromFloat(float n) { + return (fxp32)(n * (1 << FRACTIONAL_BITS)); +} + +float fxp32_toFloat(fxp32 x) { + return (float)x / (1 << FRACTIONAL_BITS); +} + +float fxp32_fxpMulToFloat(fxpMul x) { + return (float)x / ((fxpMul)1 << FRACTIONAL_BITS); +} + +fxp32 fxp32_fromInt(int n) { + return n << FRACTIONAL_BITS; +} + +fxp32 fxp32_mul(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS); +} + +fxpMul fxp32_mul64(fxp32 a, fxp32 b) { + return ((fxpMul)a * (fxpMul)b) >> FRACTIONAL_BITS; +} + +fxpMul fxp32_pow2(fxp32 a) { + return ((fxpMul)a * (fxpMul)a) >> FRACTIONAL_BITS; +} + +fxp32 fxp32_div(fxp32 a, fxp32 b) { + return (fxp32)(((fxpMul)a << FRACTIONAL_BITS) / b); +} + +bool fxp32_closefxp(fxp32 a, fxp32 b, fxp32 prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= prec; +} + +fxp32 fxp32_sqrt(fxp32 a) { + fxp32 guess = a >> 1; + fxp32 eps = fxp32_fromFloat(0.0001); + while (fxp32_closefxp(guess, fxp32_div(a, guess), eps) == false) { + guess = (guess + fxp32_div(a, guess)) >> 1; + } + return guess; +} + +bool fxp32_close(fxp32 a, fxp32 b, float prec) { + fxp32 diff = a - b; + if (diff < 0) + diff = -diff; + return diff <= fxp32_fromFloat(prec); +} diff --git a/sw/applications/l_train/fxp32.h b/sw/applications/l_train/fxp32.h new file mode 100644 index 000000000..e986fed13 --- /dev/null +++ b/sw/applications/l_train/fxp32.h @@ -0,0 +1,98 @@ +/*!*********************************************************************************** + * @file fxp32.h + * @author Linus Crugnola + * @date 07.06.2024 + * + * Description: + * This file contains definitions for fixed point arithmetic with 32 bits. + * + *************************************************************************************/ + +#ifndef FXP_H +#define FXP_H + +#include +#include + +/** + * @brief the number of bits used for the fractional part + */ +#define FRACTIONAL_BITS 23 + +/** + * @brief 32 bit and extended 64 bit fixed point number types + */ +typedef int32_t fxp32; +typedef int64_t fxpMul; + +/** + * @brief convert a float/integer to a fixed point number + * + * @param n the number to convert + * + * @return the fixed point number + */ +fxp32 fxp32_fromFloat(float n); +fxp32 fxp32_fromInt(int n); + +/** + * @brief convert a fixed point number to a float + * + * @param x the fixed point number to convert + * + * @return the float + */ +float fxp32_toFloat(fxp32 x); +float fxp32_fxpMulToFloat(fxpMul x); + +/** + * @brief multiply two fixed point numbers + * + * @param a the first number + * @param b the second number + * + * @return the result of the multiplication in 32 or 64 bit format + */ +fxp32 fxp32_mul(fxp32 a, fxp32 b); +fxpMul fxp32_mul64(fxp32 a, fxp32 b); + +/** + * @brief divide two fixed point numbers + * + * @param a the dividend + * @param b the divisor + * + * @return the result of the division in 32 bit format + */ +fxp32 fxp32_div(fxp32 a, fxp32 b); + +/** + * @brief compute the square of a fixed point number + * + * @param a the number to square + * + * @return the square of the number in 64 bit format + */ +fxpMul fxp32_pow2(fxp32 a); + +/** + * @brief compute the square root of a fixed point number + * + * @param a the number to compute the square root of + * + * @return the square root of the number in 32 bit format + */ +fxp32 fxp32_sqrt(fxp32 a); + +/** + * @brief check if two fixed point numbers are close + * + * @param a the first number + * @param b the second number + * @param prec the precision + * + * @return true if the numbers are less or equal than the precision apart + */ +bool fxp32_close(fxp32 a, fxp32 b, float prec); + +#endif // FXP_H \ No newline at end of file diff --git a/sw/applications/l_train/initial_weights_0.h b/sw/applications/l_train/initial_weights_0.h new file mode 100644 index 000000000..5c8770b72 --- /dev/null +++ b/sw/applications/l_train/initial_weights_0.h @@ -0,0 +1,10 @@ +#ifndef INITIAL_WEIGHTS_0_H +#define INITIAL_WEIGHTS_0_H + +#pragma GCC diagnostic ignored "-Wunused-variable" + +static float weights1[] = { 0.00074f, 0.01377f, 0.17394f, -0.15696f, 0.20025f, 0.07754f, -0.18016f, 0.13963f, -0.11725f, -0.15563f, 0.14295f, -0.21137f, 0.16222f, 0.15923f, 0.09191f, 0.11909f, -0.08510f, 0.10470f, -0.17272f, -0.15406f, 0.01272f, -0.02182f, 0.04244f, -0.09101f, 0.09821f, -0.11938f, -0.16599f, 0.06695f, 0.01941f, 0.18315f, 0.16964f, -0.09757f, 0.08488f, -0.13062f, 0.16665f, 0.05306f, -0.10774f, -0.15542f, 0.21532f, 0.20324f, -0.19912f, 0.15838f, 0.03006f, 0.13590f, 0.21169f, 0.12329f, -0.19731f, -0.21282f, -0.05060f, 0.10830f, 0.02311f, -0.09022f, -0.11763f, 0.01359f, -0.21796f, 0.02539f, 0.03854f, -0.09332f, 0.11217f, 0.03479f, 0.07247f, -0.07976f, -0.16356f,}; + +static float weights2[] = { -0.44829f, -0.64195f, 0.45731f,}; + +#endif // INITIAL_WEIGHTS_0_H diff --git a/sw/applications/l_train/main.c b/sw/applications/l_train/main.c new file mode 100644 index 000000000..7865dc611 --- /dev/null +++ b/sw/applications/l_train/main.c @@ -0,0 +1,74 @@ +#include "fxp32.h" +#include "cnn.h" +#include "conv2dlayer.h" +#include "utils.h" +#include +#include +#include +#include + +#define COMP_PREC 0.0001f +#define COMP_PREC_I32 512 + +void floatToFxpVector(float* a, fxp32* b, int size) { + for (int i = 0; i < size; ++i) { + b[i] = fxp32_fromFloat(a[i]); + } +} + +void FxpToFloatVector(fxp32* a, float* b, int size) { + for (int i = 0; i < size; ++i) { + b[i] = fxp32_toFloat(a[i]); + } +} + +void compareVectorsFloat(float* a, float* b, int size, float prec) { + for (int i = 0; i < size; ++i) { + assert_closef_si(a[i], b[i], prec, i); + } +} + +void compareVectorsFxp(fxp32* a, fxp32* b, int size, float prec) { + for (int i = 0; i < size; ++i) { + assert_closei32_si(a[i], b[i], prec, i); + } +} + +#include "sample_0_normalized.h" +#include "initial_weights_0.h" + +void test_lossTrack1() { + + // Create the CNN + Conv2DLayer layer1; + layer1.dim = (Dim2D){3u, 21u}; + layer1.padding = SAME; + layer1.weightsFloat = weights1; + + Conv2DLayer layer2; + layer2.dim = (Dim2D){3u, 1u}; + + layer2.padding = VALID; + layer2.weightsFloat = weights2; + + Cnn cnn; + cnn.layer1 = &layer1; + cnn.layer2 = &layer2; + cnn.inputDim = (Dim2D){3u, 256u}; + cnn.outputDim = (Dim2D){1u, 256u}; + cnn.fftBits = 8; + cnn.learningRate = 1e-7f; + cnn.momentum = 1e-2f; + + // Forward pass + Cnn_train(&cnn, xin_0, ppg_0, 1000, false); +} + +int main() { + PRINTF("\033[1;93m====== Test Train ========\n"); + PRINTF("\033[0m====== Test Weights 1 ====\n"); + test_lossTrack1(); + PRINTF("\033[1;32m====== Test passed =======\n"); + PRINTF("\033[0m====== Test Train end ====\n\n"); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/sw/applications/l_train/sample_0_normalized.h b/sw/applications/l_train/sample_0_normalized.h new file mode 100644 index 000000000..bf8266be3 --- /dev/null +++ b/sw/applications/l_train/sample_0_normalized.h @@ -0,0 +1,10 @@ +#ifndef SAMPLE_0_NORMALIZED_H +#define SAMPLE_0_NORMALIZED_H + +#pragma GCC diagnostic ignored "-Wunused-variable" + +static float xin_0[] = { -1.17213f, -0.71518f, -0.56286f, 0.19873f, 0.65568f, 1.11263f, 0.80800f, 0.50336f, -0.10591f, -0.25823f, -0.10591f, 0.04641f, 0.50336f, 0.50336f, 0.50336f, 0.50336f, 0.35104f, 0.65568f, 0.80800f, 0.50336f, 0.35104f, 0.65568f, 0.96031f, 0.80800f, 0.19873f, 0.04641f, -0.10591f, -0.10591f, -0.10591f, -0.25823f, -0.71518f, -1.01981f, -1.93371f, -1.47676f, -1.01981f, 0.35104f, 0.96031f, 1.26495f, 1.26495f, 0.80800f, 0.04641f, -0.10591f, -0.25823f, -0.25823f, 0.19873f, 0.19873f, 0.19873f, 0.19873f, 0.50336f, 0.50336f, 0.80800f, 0.80800f, 0.65568f, 0.50336f, 0.80800f, 1.11263f, 0.96031f, 0.19873f, -0.10591f, -0.25823f, -0.25823f, -0.41054f, -0.56286f, -0.71518f, -1.01981f, -1.32445f, -1.32445f, -0.71518f, -0.10591f, 0.50336f, 0.65568f, 0.96031f, 0.96031f, 0.50336f, 0.19873f, 0.04641f, -0.10591f, 0.04641f, 0.35104f, 0.50336f, 0.50336f, 0.50336f, 0.35104f, 0.19873f, 0.04641f, -0.25823f, -0.10591f, 0.50336f, 0.65568f, 1.56958f, 1.72190f, 1.26495f, 0.50336f, 0.19873f, -0.10591f, -0.10591f, 0.04641f, 0.04641f, -0.10591f, -0.25823f, -0.10591f, 0.19873f, 0.50336f, 0.50336f, -0.10591f, -1.47676f, -1.47676f, -0.10591f, 0.19873f, 1.11263f, 1.26495f, 1.26495f, 0.80800f, 0.50336f, 0.19873f, 0.35104f, 0.50336f, 0.35104f, 0.19873f, 0.50336f, 0.50336f, 0.65568f, 0.65568f, 0.19873f, -0.10591f, -0.56286f, -1.17213f, -1.62908f, -1.93371f, -1.47676f, -0.86749f, -0.41054f, 0.19873f, 0.50336f, -0.10591f, -1.17213f, -1.47676f, -2.84762f, -3.76152f, -3.30457f, -3.60920f, -4.06615f, -4.52311f, -3.91384f, -3.30457f, -3.15225f, 1.41726f, 1.87422f, 1.11263f, 0.35104f, -0.41054f, -0.10591f, 0.65568f, 0.65568f, 0.50336f, 0.50336f, 0.50336f, -0.41054f, -0.71518f, -1.17213f, -1.17213f, -1.17213f, -1.32445f, -1.17213f, -1.32445f, -1.17213f, -0.41054f, 0.19873f, 0.50336f, 0.65568f, 1.11263f, 1.26495f, 1.26495f, 0.96031f, 0.50336f, 0.04641f, -0.10591f, -0.10591f, 0.50336f, 0.50336f, 0.80800f, 0.96031f, 0.96031f, 0.65568f, -0.10591f, -0.25823f, -0.10591f, 0.65568f, 0.96031f, 1.26495f, 1.11263f, 0.80800f, 0.65568f, 0.35104f, 0.19873f, 0.50336f, 0.50336f, 0.35104f, 0.19873f, 0.19873f, 0.80800f, 0.65568f, 0.50336f, 0.50336f, 0.35104f, -0.10591f, -0.10591f, 0.35104f, 0.35104f, -0.25823f, -0.41054f, -0.41054f, -0.25823f, -0.41054f, -0.86749f, -1.17213f, -1.47676f, -1.17213f, -1.01981f, -1.01981f, 0.04641f, 0.96031f, 1.26495f, 1.56958f, 1.41726f, 0.65568f, 0.35104f, 0.19873f, 0.04641f, 0.35104f, 0.50336f, 0.50336f, 0.35104f, 0.19873f, 0.35104f, 0.50336f, 0.65568f, 0.50336f, 0.50336f, 0.65568f, 0.65568f, 0.04641f, -0.25823f, -0.25823f, -0.25823f, -0.10591f, -0.10591f, -0.41054f, -0.71518f, -1.01981f, -1.17213f, -1.01981f, -0.41054f, 0.04641f, 0.65568f, 0.65568f, -0.34256f, -0.13987f, 0.19793f, 0.67086f, 0.40062f, -0.47768f, -1.01817f, -0.95061f, -0.88305f, -0.74792f, -0.13987f, 0.33306f, 1.00867f, 1.41404f, 2.22477f, 1.61672f, 0.73842f, -0.54524f, -0.34256f, -0.41012f, -0.61280f, -1.08573f, -1.28841f, -1.49110f, -1.28841f, -1.01817f, -0.54524f, -0.20743f, 0.60330f, 0.94111f, 1.14379f, 1.68428f, 1.00867f, -0.27500f, -0.20743f, -0.00475f, 0.06281f, -0.27500f, -0.47768f, -0.81549f, -0.61280f, -0.61280f, -0.13987f, 0.33306f, 0.67086f, 0.94111f, 1.41404f, 1.75184f, 1.27891f, -0.34256f, -0.68036f, -0.34256f, -0.34256f, -0.74792f, -1.01817f, -1.42354f, -1.49110f, -1.42354f, -1.08573f, -0.81549f, -0.00475f, 0.46818f, 1.07623f, 1.27891f, 2.08965f, 1.88696f, 0.33306f, 0.13037f, 0.26549f, 0.19793f, 0.26549f, -0.00475f, -0.27500f, -0.74792f, -0.88305f, -1.08573f, -0.81549f, -0.61280f, -0.20743f, 0.13037f, 1.27891f, 1.48160f, 2.08965f, 1.61672f, 0.87355f, -0.41012f, -0.41012f, -0.47768f, -0.54524f, -1.01817f, -1.28841f, -1.82890f, -1.96403f, -1.82890f, -1.49110f, -1.15329f, -0.41012f, -0.00475f, 0.46818f, 0.67086f, 1.07623f, 1.07623f, 1.34647f, 1.88696f, 2.08965f, 0.46818f, 0.13037f, 0.13037f, 0.33306f, 0.13037f, 0.19793f, 0.26549f, -0.27500f, -0.68036f, -1.28841f, -1.62622f, -2.09915f, -2.09915f, -1.55866f, -1.22085f, -1.08573f, -0.47768f, -0.20743f, 0.40062f, 0.60330f, 1.21135f, 2.02209f, 1.14379f, 0.26549f, 0.80598f, 0.80598f, 0.53574f, -0.27500f, -0.74792f, -1.55866f, -1.96403f, -1.96403f, -1.69378f, -1.55866f, -1.82890f, -1.89647f, -1.01817f, -0.41012f, 0.87355f, 3.37331f, 3.03551f, 0.94111f, 0.94111f, 0.73842f, 0.53574f, -0.20743f, -0.61280f, -1.28841f, -1.55866f, -1.62622f, -1.42354f, -1.22085f, -0.54524f, -0.27500f, 0.40062f, 1.00867f, 1.21135f, 1.48160f, 2.02209f, 1.54916f, 0.94111f, 0.60330f, 0.33306f, 0.13037f, -0.20743f, -0.61280f, -1.49110f, -1.69378f, -1.55866f, -1.22085f, -0.54524f, -0.20743f, 0.46818f, 0.94111f, 1.21135f, 1.41404f, 1.34647f, 1.54916f, 1.27891f, 0.40062f, 0.19793f, 0.13037f, 0.73842f, 0.46818f, -0.27500f, -0.61280f, -0.74792f, -0.88305f, -1.15329f, -0.95061f, -0.61280f, -0.07231f, 0.19793f, 0.94111f, 1.07623f, 1.61672f, 1.27891f, -0.00475f, -0.13987f, 0.26549f, -0.07231f, -0.27500f, -0.61280f, -0.54524f, -0.47768f, -0.34256f, -0.27500f, -0.07231f, 0.40062f, 0.73842f, 0.87355f, 1.21135f, -0.00475f, -0.68036f, -0.27500f, 0.46818f, 0.19793f, -0.00475f, 0.26549f, 0.19793f, -0.34256f, -0.34256f, -0.27500f, -0.20743f, 0.33306f, 0.67086f, 0.80598f, 1.27891f, 1.00867f, -0.13987f, -0.34256f, 0.13037f, 0.19793f, -0.07231f, -0.54524f, -0.74792f, -0.74792f, -0.74792f, -0.68036f, -0.54524f, -0.00475f, 0.13037f, 0.53574f, 0.60330f, 0.94111f, 0.73842f, 0.13037f, 0.06281f, 0.26549f, 0.53574f, 0.46818f, 0.89063f, 0.09342f, 0.09342f, 0.25287f, -0.06602f, -0.38490f, -0.54435f, -0.54435f, -1.02267f, -1.18211f, -0.86323f, -0.70379f, -0.70379f, -0.86323f, -0.54435f, -0.54435f, -0.22546f, 0.41231f, 0.25287f, 0.41231f, 0.57175f, 0.57175f, 0.57175f, 0.57175f, 0.41231f, 0.41231f, -0.22546f, -0.38490f, -0.22546f, -0.06602f, -0.22546f, -0.06602f, 1.05008f, 0.41231f, 0.09342f, -0.06602f, -0.06602f, -0.06602f, -0.22546f, -0.22546f, -0.38490f, -0.70379f, -1.18211f, -1.02267f, -1.02267f, -0.86323f, -0.38490f, -0.38490f, -0.70379f, -0.06602f, 0.09342f, 0.09342f, 0.09342f, 0.09342f, -0.06602f, -0.06602f, -0.06602f, 0.09342f, -0.22546f, -0.22546f, -0.22546f, -0.06602f, 0.25287f, 0.25287f, 0.57175f, 1.20952f, 1.36896f, 0.25287f, 0.09342f, -0.38490f, -0.22546f, -0.22546f, -0.22546f, -0.22546f, -0.54435f, -1.18211f, -1.66044f, -1.50100f, -1.18211f, -1.02267f, -0.86323f, -1.02267f, -0.86323f, -0.70379f, -0.06602f, 1.20952f, 1.20952f, 1.05008f, 1.20952f, 1.52840f, 1.52840f, 1.20952f, 0.41231f, 0.09342f, -0.38490f, -0.38490f, -0.06602f, 0.09342f, 0.57175f, 0.89063f, 1.05008f, 0.89063f, 1.36896f, 2.00673f, 2.80394f, 2.00673f, 0.73119f, 0.41231f, 0.73119f, 0.57175f, -0.06602f, -0.06602f, -0.22546f, -0.38490f, -0.86323f, -1.18211f, -1.34156f, -0.86323f, -0.70379f, -0.54435f, -0.38490f, 0.09342f, 0.25287f, 0.73119f, 1.05008f, 1.20952f, 0.89063f, 1.05008f, 1.20952f, 1.36896f, 0.73119f, 0.41231f, -0.22546f, -0.38490f, -0.86323f, -1.02267f, -1.34156f, -1.50100f, -1.18211f, -0.38490f, -0.38490f, 0.57175f, 1.05008f, 1.05008f, 0.25287f, 1.84729f, 2.48505f, 0.73119f, -0.70379f, -1.18211f, -1.66044f, -1.50100f, -0.86323f, -0.38490f, -0.38490f, -0.54435f, -0.86323f, -1.66044f, -1.66044f, -1.50100f, -1.66044f, -1.97932f, -2.61709f, -2.77653f, -2.61709f, -1.66044f, -1.18211f, -1.66044f, -1.02267f, -0.54435f, -0.54435f, -0.38490f, -0.22546f, -0.06602f, 0.09342f, 0.89063f, 1.36896f, 2.16617f, 2.80394f, 3.28226f, 3.92003f, 3.92003f, 2.96338f, 2.80394f, 1.84729f, 1.36896f, 0.41231f, -0.06602f, -0.22546f, -0.70379f, -0.54435f, -0.38490f, -0.70379f, -1.18211f, -1.18211f, -0.86323f, -0.22546f, 0.09342f, 0.73119f, 0.73119f, -0.22546f, -0.22546f, 0.73119f, 0.73119f, 0.09342f, 0.25287f, 0.09342f, 0.09342f, 0.25287f, 0.41231f, 0.25287f, 0.09342f, -0.06602f, 0.09342f, 0.25287f, 0.09342f, 0.57175f, 0.73119f, 0.57175f, 0.57175f, 0.41231f, -0.06602f, -0.22546f, 0.09342f, 0.09342f, -0.38490f, -0.70379f, -1.18211f, -1.34156f, -1.02267f, -0.86323f, -0.70379f, -0.70379f, -0.54435f, 0.25287f, 0.25287f, 0.41231f, 0.25287f, 0.09342f, -0.06602f, 0.09342f, 0.09342f, -0.06602f, -0.70379f, -0.70379f, -0.22546f, -0.06602f, 0.09342f, 0.09342f, 0.09342f, 1.20952f, 0.89063f, 0.41231f, 0.25287f, -0.06602f, -0.06602f,}; + +static float ppg_0[] = { -0.92840f, -1.17374f, -1.44540f, -1.65505f, -1.65773f, -1.29418f, -0.60902f, 0.07704f, 0.45709f, 0.50169f, 0.37055f, 0.22201f, 0.19168f, 0.38750f, 0.80770f, 1.33004f, 1.80555f, 2.09371f, 2.07765f, 1.74489f, 1.23726f, 0.77736f, 0.51909f, 0.43924f, 0.33977f, 0.00745f, -0.55237f, -1.08319f, -1.31381f, -1.18489f, -0.85034f, -0.56441f, -0.57779f, -0.95204f, -1.45030f, -1.76567f, -1.80716f, -1.62070f, -1.21076f, -0.65184f, -0.18659f, 0.01592f, 0.04804f, 0.08908f, 0.22156f, 0.45129f, 0.74525f, 1.04367f, 1.31711f, 1.55932f, 1.67396f, 1.51918f, 1.12351f, 0.73097f, 0.57574f, 0.67967f, 0.86212f, 0.89423f, 0.63150f, 0.08596f, -0.57199f, -1.09033f, -1.25849f, -1.06401f, -0.76157f, -0.69109f, -0.97301f, -1.43023f, -1.76523f, -1.76166f, -1.40971f, -0.88513f, -0.39669f, -0.04920f, 0.15777f, 0.25769f, 0.26706f, 0.19881f, 0.09220f, 0.02172f, 0.07704f, 0.30408f, 0.65291f, 1.04367f, 1.45762f, 1.84793f, 2.06695f, 2.04197f, 1.87737f, 1.64854f, 1.30729f, 0.85097f, 0.34200f, -0.20666f, -0.76916f, -1.21790f, -1.45789f, -1.54978f, -1.60598f, -1.64434f, -1.56807f, -1.22549f, -0.54791f, 0.29695f, 0.94865f, 1.13466f, 0.91297f, 0.58109f, 0.36876f, 0.30364f, 0.32371f, 0.37590f, 0.40133f, 0.36163f, 0.26974f, 0.12833f, -0.07329f, -0.26822f, -0.33156f, -0.23031f, -0.03627f, 0.17517f, 0.34958f, 0.46021f, 0.53158f, 0.58154f, 0.58823f, 0.55165f, 0.48296f, 0.34691f, 0.08506f, -0.29008f, -0.61972f, -0.64426f, -0.26778f, 0.25725f, 0.56637f, 0.51641f, 0.19524f, -0.25573f, -0.72633f, -1.15144f, -1.48153f, -1.63007f, -1.47395f, -1.00780f, -0.46315f, -0.06794f, 0.24699f, 0.67477f, 1.18195f, 1.58653f, 1.82161f, 1.88540f, 1.72526f, 1.31532f, 0.74525f, 0.15331f, -0.36502f, -0.75354f, -0.92483f, -0.80886f, -0.52739f, -0.31372f, -0.18258f, 0.06232f, 0.46556f, 0.86435f, 1.13913f, 1.25153f, 1.16678f, 0.92813f, 0.61678f, 0.21844f, -0.27982f, -0.74864f, -1.03814f, -1.11531f, -1.05286f, -0.97167f, -0.96587f, -0.96097f, -0.78522f, -0.44442f, -0.13440f, 0.04313f, 0.14974f, 0.23494f, 0.31702f, 0.45619f, 0.65336f, 0.78182f, 0.74748f, 0.58778f, 0.35315f, 0.02038f, -0.38643f, -0.67102f, -0.58225f, -0.10050f, 0.55121f, 1.16767f, 1.61731f, 1.72125f, 1.39874f, 0.85453f, 0.34958f, -0.06883f, -0.46226f, -0.84231f, -1.20943f, -1.55156f, -1.79199f, -1.85712f, -1.75586f, -1.51142f, -1.14252f, -0.75622f, -0.48992f, -0.36636f, -0.31417f, -0.27045f, -0.17678f, -0.00504f, 0.17205f, 0.24743f, 0.21398f, 0.13146f, 0.01682f, -0.14109f, -0.29454f, -0.27491f, 0.14305f, 0.99861f, 1.97863f, 2.62052f, 2.68609f, 2.26590f, 1.62668f, 1.02672f, 0.57797f, 0.19569f, -0.24681f, -0.73258f, -1.15411f, -1.48688f, -1.75408f, -1.87452f, -1.70947f, -1.26474f, -0.75354f, -0.40918f, -0.27670f, -0.24636f, -0.23120f, -0.22808f,}; + +#endif // SAMPLE_0_NORMALIZED_H diff --git a/sw/applications/l_train/sylt-fft/LICENSE b/sw/applications/l_train/sylt-fft/LICENSE new file mode 100644 index 000000000..f1c15f58f --- /dev/null +++ b/sw/applications/l_train/sylt-fft/LICENSE @@ -0,0 +1,48 @@ +MHG (MORAL HIGH GROUND) LICENSE + +This software is released under the UNLICENSE license under the single +condition that good moral standards are maintained in its use. + +The term "moral standards" is chosen because it is impossible to define +legally as it varies with cultural and individual values, principles and +ideas. Hence, the following may or may not apply. + +If you find this software useful - use it in any way you like, but +consider dropping the author a "thank you" message. + +If you want to use this software to earn money - please do, but +consider mentioning the authors name or making a donation. + +If you use this software - consider letting the author know. Simply +knowing it is being put to good use is often rewarding. + +You should consider maintaining open source licensing for software +that uses or is derived from this software. + + +UNLICENSE + +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to \ No newline at end of file diff --git a/sw/applications/l_train/sylt-fft/README.md b/sw/applications/l_train/sylt-fft/README.md new file mode 100644 index 000000000..530a2f314 --- /dev/null +++ b/sw/applications/l_train/sylt-fft/README.md @@ -0,0 +1,59 @@ +SYLT-FFT +======== +DEVSOUND (I)FFT(R) LIBRARY +------------------------------------- +And some other funky fixed-point maths like gray-coding and pow(2, f) + +**Optimized (C-level) for Keil C Compiler and GCC on Cortex-M4.** + +**Authors:** +* D. Taylor 2014 (gmail: senseitg) + +**License:** +* MHG (GPL compatible) - see LICENSE + +**Features:** +* FFT (Fast Fourier Transform) and IFFT (Inverse FFT) +* Fixed-point 32-bit, Radix-2 +* Complex or real (with slight conversion overhead) data +* No plan construction required before (I)FFT +* No reliance on other libraries (including libm if using precomputed tables) + +**Options (config.h):** +* DIT (decimation-in-time) or DIF (decimation-in-frequency) +* Rounding on divide (-speed, +accuracy) +* Saturating math (-speed, +stability) +* Table size vs. max. FFT length + +**Resource requirements:** +* Minimal memory requirements (in-place) +* Minimal stack use (non-recursive) +* Minimal twiddle tables (512 bytes for max N=512 FFT) + +**Notes:** +* Designed for optimal performance, not optimal accuracy + +**Caveats:** +* Care must be taken with input data to ensure no overflows +* Requires C99 (-std=c99 for GCC) + +**Performance:** +* Comparing against: CMSIS DSP arm_cortexM4I_math.lib(1.4.2) +* Platform: Freescale Kinetis K20 (Cortex-M4/ARMv7E-M) +* KEIL = Keil C Compiler 5.01 -O3 +* GCC = GNU Tools for ARM Embedded Processors 4.8.4 -O3 + +``` +Comparisons are of speed, +N% = faster than CMSIS, -N% = slower. +Please verify and do additional tests to add to the list. + + CMSIS-DSP SYLT-FFT N KEIL GCC +* arm_cfft_radix2_q31 fft_inverse 256 +25.6% +15.1% +``` + +**Thanks to:** +* [Wikipedia](http://www.wikipedia.org/) - for existing and taking donations +* [KATJA](http://www.katjaas.nl/) - for intelligible merge/split spectra algorithm +* [XCORE](https://github.com/xcore/) - for intelligible (I)FFT algorithm +* [CMLAB](http://www.cmlab.csie.ntu.edu.tw/cml/dsp/training/coding/transform/fft.html) - for intelligible FFT algorithm breakdown +* [BEVAN](http://web.ece.ucdavis.edu/~bbaas/281/slides/Handout.fft2.pdf) - for intelligible DIT vs. DIF, radix-2 vs. 4 overview diff --git a/sw/applications/l_train/sylt-fft/config.h b/sw/applications/l_train/sylt-fft/config.h new file mode 100644 index 000000000..5705d19dc --- /dev/null +++ b/sw/applications/l_train/sylt-fft/config.h @@ -0,0 +1,88 @@ +// CONFIGURATION FILE +// D. TAYLOR 2014 + +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#include "stdint.h" +#include "stdbool.h" + +#ifndef __INLINE +#if defined(__GNUC__) +#define __INLINE __attribute__((always_inline)) inline +#else +#define __INLINE __inline +#endif +#endif + + +/* == MATH CONFIGURE =============================================== */ + +#define FPOW2_FBITS 23 // Number of fractional bits (1...28) NOTE: changed +#define FPOW2_LIMIT 8 // Limit accuracy to n fractional bits (1...FPOW2_FBITS-1) + +#define SINE_BITS 7 // Sine quality (2..14) vs. memory tradeoff +#define SINE_USE_TABLE 1 // Use pre-computed ROM table (vs. generate in RAM) +#define SINE_PRINTOUT 0 // Write sine table to screen (PC only) + +/* == FFT CONFIGURE =============================================== */ + +// Maximum FFT size: 4 << SINE_BITS (complex data points) +// Memory used by sine table: 4 << SINE_BITS (bytes) +// FFT is faster when SINE_USE_TABLE is 0 (located in RAM) + +#define FFT_DIT // Operation mode, FFT_DIT or FFT_DIF (slower) +#define FFT_ROUNDING 0 // Perform rounding when dividing (slower) +#define FFT_SATURATE 0 // Use saturating math where possible (slower) + +/* == WAVETABLE CONFIGURE ========================================== */ + +/* == GLOBAL DATA CONFIGURE ======================================== */ + +// PI +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +// LUT for sine wave, first quadrant only +#if SINE_USE_TABLE +// == PLACE GENERATED SINE TABLE HERE ======================== // +// ROM +#if SINE_BITS != 7 +#error "sinetable[] size does not match SINE_BITS" +#endif +const int32_t sinetable[] = { + 0x00000000, 0x01921d1f, 0x03242abe, 0x04b6195d, 0x0647d97c, 0x07d95b9e, 0x096a9049, 0x0afb6805, + 0x0c8bd35e, 0x0e1bc2e3, 0x0fab272b, 0x1139f0ce, 0x12c8106e, 0x145576b1, 0x15e21444, 0x176dd9de, + 0x18f8b83c, 0x1a82a025, 0x1c0b826a, 0x1d934fe5, 0x1f19f97b, 0x209f701c, 0x2223a4c5, 0x23a6887e, + 0x25280c5d, 0x26a82185, 0x2826b928, 0x29a3c484, 0x2b1f34eb, 0x2c98fbba, 0x2e110a61, 0x2f875262, + 0x30fbc54d, 0x326e54c7, 0x33def287, 0x354d9056, 0x36ba2013, 0x382493b0, 0x398cdd32, 0x3af2eeb7, + 0x3c56ba70, 0x3db832a5, 0x3f1749b7, 0x4073f21d, 0x41ce1e64, 0x4325c135, 0x447acd50, 0x45cd358f, + 0x471cece6, 0x4869e664, 0x49b41533, 0x4afb6c97, 0x4c3fdff3, 0x4d8162c4, 0x4ebfe8a4, 0x4ffb654d, + 0x5133cc94, 0x5269126e, 0x539b2aef, 0x54ca0a4a, 0x55f5a4d2, 0x571deef9, 0x5842dd54, 0x59646497, + 0x5a827999, 0x5b9d1153, 0x5cb420df, 0x5dc79d7c, 0x5ed77c89, 0x5fe3b38d, 0x60ec382f, 0x61f1003e, + 0x62f201ac, 0x63ef328f, 0x64e88926, 0x65ddfbd3, 0x66cf811f, 0x67bd0fbc, 0x68a69e81, 0x698c246c, + 0x6a6d98a4, 0x6b4af278, 0x6c242960, 0x6cf934fb, 0x6dca0d14, 0x6e96a99c, 0x6f5f02b1, 0x70231099, + 0x70e2cbc6, 0x719e2cd2, 0x72552c84, 0x7307c3cf, 0x73b5ebd0, 0x745f9dd0, 0x7504d345, 0x75a585cf, + 0x7641af3c, 0x76d94988, 0x776c4edb, 0x77fab988, 0x78848413, 0x7909a92c, 0x798a23b1, 0x7a05eead, + 0x7a7d055b, 0x7aef6323, 0x7b5d039d, 0x7bc5e28f, 0x7c29fbee, 0x7c894bdd, 0x7ce3ceb1, 0x7d3980ec, + 0x7d8a5f3f, 0x7dd6668e, 0x7e1d93e9, 0x7e5fe493, 0x7e9d55fc, 0x7ed5e5c6, 0x7f0991c3, 0x7f3857f5, + 0x7f62368f, 0x7f872bf2, 0x7fa736b4, 0x7fc25596, 0x7fd8878d, 0x7fe9cbbf, 0x7ff62182, 0x7ffd885a, + 0x7fffffff, // <= space potato! +}; // <= sad monkey? +// == END OF GENERATED SINE TABLE ============================ // +#else +// RAM +int32_t sinetable[(1 << SINE_BITS) + 1]; +#endif + +// LUT for pow(2, fixedpoint) +// Only need to define up to FPOW2_LIMIT +const int32_t fpow2table[] = { + 0x6a09e668, 0x306fe0a3, 0x172b83c8, 0x0b5586d0, 0x059b0d31, 0x02c9a3e7, 0x0163daa0, 0x00b1afa6, +/*0x0058c86e, 0x002c605e, 0x00162f39, 0x000b175f, 0x00058ba0, 0x0002c5cc, 0x000162e5, 0x0000b172, + 0x000058b9, 0x00002c5d, 0x0000162e, 0x00000b17, 0x0000058c, 0x000002c6, 0x00000163, 0x000000b1, + 0x00000059, 0x0000002c, 0x00000016, 0x0000000b, 0x00000006, 0x00000003, 0x00000001, 0x00000001,*/ +}; + +#endif diff --git a/sw/applications/l_train/sylt-fft/fft.h b/sw/applications/l_train/sylt-fft/fft.h new file mode 100644 index 000000000..d6fb3ba76 --- /dev/null +++ b/sw/applications/l_train/sylt-fft/fft.h @@ -0,0 +1,423 @@ +// (I)FFT(R) +// D. TAYLOR 2014 + +#ifndef __FFT_H__ +#define __FFT_H__ + +#include "config.h" +#include "intrinsics.h" +#include "fpmath.h" + +/* == DECLARATIONS ================================================ */ + +// Fixed-point data type +typedef int32_t fft_t; + +// Complex number type +typedef struct { + fft_t r, i; +} fft_complex_t; + +// Readability macros +#define FFT_QCOS(K, SH) sinetable[(1 << SINE_BITS) - (K << SH)] +#define FFT_QSIN(K, SH) sinetable[K << SH] + +#if !((defined FFT_DIT) | (defined FFT_DIF)) +#error "Must define FFT_DIT or FFT_DIF" +#endif + + +/* == CODING STYLE DEFINITIONS ==================================== */ + +// GCC/ARMCC require different coding styles for optimal performance. +// These defines unify the different styles into one syntax. + +// # Optimal performance on ARMCC (Keil) # +#if defined(__ARMCC_VERSION) +// Declare complex, assign complex +#define FFT_DECLC(VAR, ASG) fft_complex_t VAR = ASG; +// Declare complex, assign real, imaginary +#define FFT_DECLR(VAR, R, I) fft_complex_t VAR = (fft_complex_t){ .r = R, .i = I }; +// Assign real, imaginary +#define FFT_ASSGN(VAR, R, I) VAR = (fft_complex_t){ .r = R, .i = I }; +// Access real, imaginary +#define FFT(VAR, SUB) VAR.SUB +#endif + +// # Optimal performance on GCC # +#if defined(__GNUC__) +// Declare complex, assign complex +#define FFT_DECLC(VAR, ASG) fft_t VAR##r = ASG.r, VAR##i = ASG.i; +// Declare complex, assign real, imaginary +#define FFT_DECLR(VAR, R, I) fft_t VAR##r = R, VAR##i = I; +// Assign real, imaginary +#define FFT_ASSGN(VAR, R, I) VAR.r = R; VAR.i = I; +// Access real, imaginary +#define FFT(VAR, SUB) VAR##SUB +#endif + +#if FFT_SATURATE +#define FFT_A(A,B) qadd(A, B) // A + B (saturating) +#define FFT_S(A,B) qsub(A, B) // A - B (saturating) +#define FFT_M2(W) qadd(W, W) // W * 2 (saturating) +#else +#define FFT_A(A,B) ((A) + (B)) // A + B +#define FFT_S(A,B) ((A) - (B)) // A - B +#define FFT_M2(W) ((W) << 1) // W * 2 +#endif +#define FFT_M(A,B) smmulr(A, B) // A * B +#define FFT_MA(A,B,C) smmlar(A, B, C) // C + (A * B) +#define FFT_MS(A,B,C) smmlsr(A, B, C) // C - (A * B) +#if FFT_ROUNDING +#define FFT_D2(W) (((W) + 1) >> 1) // W / 2 (rounded) +#else +#define FFT_D2(W) ((W) >> 1) // W / 2 +#endif + +/* == FORWARD AND INVERSE FFT ===================================== */ + +// Forward FFT transform +// Permutation must be performed prior to (DIT)/after (DIF) call +void fft_forward(fft_complex_t data[], unsigned bits) { + unsigned size = 1 << bits; +#ifdef FFT_DIT + unsigned shift = SINE_BITS + 1; + for(unsigned stride = 2 ; stride <= size; stride <<= 1, shift--) { +#else//FFT_DIF + unsigned shift = SINE_BITS - (bits - 2); + for(unsigned stride = size; stride >= 2; stride >>= 1, shift++) { +#endif + // Twiddle and combine for k = 0, having trivial (0 and 1) twiddle factors + for(unsigned a = 0; a < size; a += stride) { + unsigned b = a + (stride >> 1); +/* + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); + // # Radix-2 DIT/DIF trivial butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_ADD(FFT(A,r), FFT(B,r))), FFT_D2(FFT_ADD(FFT(A,i), FFT(B,i)))); + FFT_ASSGN(data[b], FFT_D2(FFT_SUB(FFT(A,r), FFT(B,r))), FFT_D2(FFT_SUB(FFT(A,i), FFT(B,i)))); +*/ + // Special case: GCC optimizes ARMCC style better here + fft_complex_t A = data[a], B = data[b]; + // # Radix-2 DIT/DIF trivial butterfly # + data[a] = (fft_complex_t){ .r = FFT_D2(FFT_A(A.r, B.r)), .i = FFT_D2(FFT_A(A.i, B.i)) }; + data[b] = (fft_complex_t){ .r = FFT_D2(FFT_S(A.r, B.r)), .i = FFT_D2(FFT_S(A.i, B.i)) }; + } + if(!(stride & 2)) { + for(unsigned a = (stride >> 2); a < (stride >> 2) + size; a += stride) { + unsigned b = a + (stride >> 1); + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT trivial butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_A(FFT(A,r), FFT(B,i))), FFT_D2(FFT_S(FFT(A,i), FFT(B,r)))); + FFT_ASSGN(data[b], FFT_D2(FFT_S(FFT(A,r), FFT(B,i))), FFT_D2(FFT_A(FFT(A,i), FFT(B,r)))); +#else//FFT_DIF + // # Radix-2 DIF trivial butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_A(FFT(A,r), FFT(B,r))), FFT_D2(FFT_A(FFT(A,i), FFT(B,i)))); + FFT_ASSGN(data[b], FFT_D2(FFT_S(FFT(A,i), FFT(B,i))), FFT_D2(FFT_S(FFT(B,r), FFT(A,r)))); +#endif + } + } + // Twiddle and combine + for(unsigned k = 1; k < (stride >> 2); k++) { + FFT_DECLR(W, FFT_QCOS(k, shift), FFT_QSIN(k, shift)); + for(unsigned a = k, b; a < size; a += (stride >> 2) + (stride >> 1)) { + b = a + (stride >> 1); + { // These two blocks prevent the compiler from confusing... + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT butterfly # + FFT_DECLR(BW, FFT_MA(FFT(B,i), FFT(W,i), FFT_M(FFT(B,r), FFT(W,r))), + FFT_MS(FFT(B,r), FFT(W,i), FFT_M(FFT(B,i), FFT(W,r)))); + FFT_ASSGN(data[a], FFT_A(FFT_D2(FFT(A,r)), FFT(BW,r)), FFT_A(FFT_D2(FFT(A,i)), FFT(BW,i))); + FFT_ASSGN(data[b], FFT_S(FFT_D2(FFT(A,r)), FFT(BW,r)), FFT_S(FFT_D2(FFT(A,i)), FFT(BW,i))); +#else//FFT_DIF + // # Radix-2 DIF butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_A(FFT(A,r), FFT(B,r))), FFT_D2(FFT_A(FFT(A,i), FFT(B,i)))); + FFT_DECLR(D, FFT_S(FFT(A,r), FFT(B,r)), FFT_S(FFT(A,i), FFT(B,i))); + FFT_ASSGN(data[b], FFT_MA(FFT(D,r), FFT(W,r), FFT_M(FFT(D,i), FFT(W,i))), + FFT_MS(FFT(D,r), FFT(W,i), FFT_M(FFT(D,i), FFT(W,r)))); +#endif + } + a += (stride >> 2); b += (stride >> 2); + { // ...register use resulting in more efficient code + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT butterfly # + FFT_DECLR(BW, FFT_MS(FFT(B,r), FFT(W,i), FFT_M(FFT(B,i), FFT(W,r))), + FFT_MA(FFT(B,i), FFT(W,i), FFT_M(FFT(B,r), FFT(W,r)))); + FFT_ASSGN(data[a], FFT_A(FFT_D2(FFT(A,r)), FFT(BW,r)), FFT_S(FFT_D2(FFT(A,i)), FFT(BW,i))); + FFT_ASSGN(data[b], FFT_S(FFT_D2(FFT(A,r)), FFT(BW,r)), FFT_A(FFT_D2(FFT(A,i)), FFT(BW,i))); +#else//FFT_DIF + // # Radix-2 DIF butterfly # + FFT_ASSGN(data[a], FFT_D2(FFT_A(FFT(A,r), FFT(B,r))), FFT_D2(FFT_A(FFT(A,i), FFT(B,i)))); + FFT_DECLR(D, FFT_S(FFT(B,r), FFT(A,r)), FFT_S(FFT(B,i), FFT(A,i))); + FFT_ASSGN(data[b], FFT_MS(FFT(D,i), FFT(W,r), FFT_M(FFT(D,r), FFT(W,i))), + FFT_MA(FFT(D,i), FFT(W,i), FFT_M(FFT(D,r), FFT(W,r)))); +#endif + } + } + } + } +} + +// Inverse FFT transform +// Permutation must be performed prior to (DIT)/after (DIF) call +void fft_inverse(fft_complex_t data[], unsigned bits) { + unsigned size = 1 << bits; +#ifdef FFT_DIT + unsigned shift = SINE_BITS + 1; + for(unsigned stride = 2 ; stride <= size; stride <<= 1, shift--) { +#else//FFT_DIF + unsigned shift = SINE_BITS - (bits - 2); + for(unsigned stride = size; stride >= 2; stride >>= 1, shift++) { +#endif + // Twiddle and combine for k = 0, having trivial (0 and 1) twiddle factors + for(unsigned a = 0; a < size; a += stride) { + unsigned b = a + (stride >> 1); +/* + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); + // # Radix-2 DIT/DIF trivial butterfly # + FFT_ASSGN(data[a], FFT_A(FFT(A,r), FFT(B,r)), FFT_A(FFT(A,i), FFT(B,i))); + FFT_ASSGN(data[b], FFT_S(FFT(A,r), FFT(B,r)), FFT_S(FFT(A,i), FFT(B,i))); +*/ + // Special case: GCC optimizes ARMCC style better here + fft_complex_t A = data[a], B = data[b]; + // # Radix-2 DIT/DIF trivial butterfly # + data[a] = (fft_complex_t){ .r = FFT_A(A.r, B.r), .i = FFT_A(A.i, B.i) }; + data[b] = (fft_complex_t){ .r = FFT_S(A.r, B.r), .i = FFT_S(A.i, B.i) }; + } + if(!(stride & 2)) { + for(unsigned a = (stride >> 2); a < size; a += stride) { + unsigned b = a + (stride >> 1); + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT trivial butterfly # + FFT_ASSGN(data[a], FFT_S(FFT(A,r), FFT(B,i)), FFT_A(FFT(A,i), FFT(B,r))); + FFT_ASSGN(data[b], FFT_A(FFT(A,r), FFT(B,i)), FFT_S(FFT(A,i), FFT(B,r))); +#else//FFT_DIF + // # Radix-2 DIF trivial butterfly # + FFT_ASSGN(data[a], FFT_A(FFT(A,r), FFT(B,r)), FFT_A(FFT(A,i), FFT(B,i))); + FFT_ASSGN(data[b], FFT_S(FFT(B,i), FFT(A,i)), FFT_S(FFT(A,r), FFT(B,r))); +#endif + } + } + // Twiddle and combine + for(unsigned k = 1; k < (stride >> 2); k++) { + FFT_DECLR(W, FFT_QCOS(k, shift), FFT_QSIN(k, shift)); + for(unsigned a = k, b; a < size; a += (stride >> 2) + (stride >> 1)) { + b = a + (stride >> 1); + { // These two blocks prevent the compiler from confusing... + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT butterfly # + FFT_DECLR(BW, FFT_MS(FFT(B,r), FFT(W,r), FFT_M(FFT(B,i), FFT(W,i))), + FFT_MA(FFT(B,i), FFT(W,r), FFT_M(FFT(B,r), FFT(W,i)))); + FFT_ASSGN(data[a], FFT_S(FFT(A,r), FFT_M2(FFT(BW,r))), FFT_A(FFT(A,i), FFT_M2(FFT(BW,i)))); + FFT_ASSGN(data[b], FFT_A(FFT(A,r), FFT_M2(FFT(BW,r))), FFT_S(FFT(A,i), FFT_M2(FFT(BW,i)))); +#else//FFT_DIF + // # Radix-2 DIF butterfly # + FFT_ASSGN(data[a], FFT_A(FFT(A,r), FFT(B,r)), FFT_A(FFT(A,i), FFT(B,i))); + FFT_DECLR(D, FFT_S(FFT(A,r), FFT(B,r)), FFT_S(FFT(A,i), FFT(B,i))); + FFT_ASSGN(data[b], FFT_M2(FFT_MS(FFT(D,i), FFT(W,i), FFT_M(FFT(D,r), FFT(W,r)))), + FFT_M2(FFT_MA(FFT(D,i), FFT(W,r), FFT_M(FFT(D,r), FFT(W,i))))); +#endif + } + a += (stride >> 2); b += (stride >> 2); + { // ...register use resulting in more efficient code + FFT_DECLC(A, data[a]); FFT_DECLC(B, data[b]); +#ifdef FFT_DIT + // # Radix-2 DIT butterfly # + FFT_DECLR(BW, FFT_MA(FFT(B,i), FFT(W,r), FFT_M(FFT(B,r), FFT(W,i))), + FFT_MS(FFT(B,r), FFT(W,r), FFT_M(FFT(B,i), FFT(W,i)))); + FFT_ASSGN(data[a], FFT_S(FFT(A,r), FFT_M2(FFT(BW,r))), FFT_S(FFT(A,i), FFT_M2(FFT(BW,i)))); + FFT_ASSGN(data[b], FFT_A(FFT(A,r), FFT_M2(FFT(BW,r))), FFT_A(FFT(A,i), FFT_M2(FFT(BW,i)))); +#else//FFT_DIF + // # Radix-2 DIF butterfly # + FFT_ASSGN(data[a], FFT_A(FFT(A,r), FFT(B,r)), FFT_A(FFT(A,i), FFT(B,i))); + FFT_DECLR(D, FFT_S(FFT(A,r), FFT(B,r)), FFT_S(FFT(B,i), FFT(A,i))); + FFT_ASSGN(data[b], FFT_M2(FFT_MS(FFT(D,r), FFT(W,i), FFT_M(FFT(D,i), FFT(W,r)))), + FFT_M2(FFT_MA(FFT(D,i), FFT(W,i), FFT_M(FFT(D,r), FFT(W,r))))); +#endif + } + } + } + } +} + + +/* == DATA SET PROCESSING AND MANIPULATION ======================== */ + +// Process complex data to produce real-only output +// This allows us to output N*2 point of real data using a N point complex (I)FFT +// Even/odd real data will be found in the real/imaginary parts of every output bin upon completion +void fft_convert(fft_complex_t data[], unsigned bits, bool permutated, bool invert) { + unsigned size = 1 << --bits; + unsigned shift = SINE_BITS - bits++; + unsigned n, z, nc, zc; + fft_t rsum, rdif, isum, idif; + fft_t itwiddled, rtwiddled; + for(nc = zc = size; nc; nc--, zc++) { + if(permutated) { + n = RBITS(nc, bits); z = RBITS(zc, bits); + } else { + n = nc; z = zc; + } + rsum = data[n].r + data[z].r; isum = data[n].i + data[z].i; + rdif = data[n].r - data[z].r; idif = data[n].i - data[z].i; + fft_t r = FFT_QCOS(nc, shift); fft_t i = -FFT_QSIN(nc, shift); + if(invert) r = -r; + rtwiddled = FFT_MA(r, isum, FFT_M(i, rdif)) << 1; + itwiddled = FFT_MS(r, rdif, FFT_M(i, isum)) << 1; + data[n].r = rsum + rtwiddled; data[n].i = itwiddled + idif; + data[z].r = rsum - rtwiddled; data[z].i = itwiddled - idif; + } + fft_t data_0_tr = data[0].r; + data[0].r = (data[0].r + data[0].i); data[0].i = (data_0_tr - data[0].i); + if(!invert) { data[0].r <<= 1; data[0].i <<= 1; } +} + +// Perform bit-reversal permutation on data set +// (Reverses address bits for all data points) +void fft_permutate(fft_complex_t data[], unsigned bits) { + unsigned size = 1 << bits; + unsigned shift = 32 - bits; + for(unsigned i = 1; i < size - 1; i++) { + unsigned z = rbit(i) >> shift; + if(z > i) { + fft_t + t = data[i].r; data[i].r = data[z].r; data[z].r = t; + t = data[i].i; data[i].i = data[z].i; data[z].i = t; + } + } +} + + +/* == "HIGH"-LEVEL FUNCTIONS ====================================== */ + +// Perform forward FFT (including permutation) +__INLINE +void fft_fft(fft_complex_t *complex, unsigned bits) { +#ifdef FFT_DIT + fft_permutate(complex, bits); +#endif + fft_forward(complex, bits); +#ifdef FFT_DIF + fft_permutate(complex, bits); +#endif +} + +// Perform inverse FFT (including permutation) +__INLINE +void fft_ifft(fft_complex_t *complex, unsigned bits) { +#ifdef FFT_DIT + fft_permutate(complex, bits); +#endif + fft_inverse(complex, bits); +#ifdef FFT_DIF + fft_permutate(complex, bits); +#endif +} + +// Perform forward FFT (including permutation, real output conversion) +__INLINE +void fft_fftr(fft_complex_t *complex, unsigned bits) { + fft_fft(complex, bits); + fft_convert(complex, bits, false, false); +} + +// Perform inverse FFT (including permutation, real input conversion) +__INLINE +void fft_ifftr(fft_complex_t *complex, unsigned bits) { + fft_convert(complex, bits, false, true); + fft_ifft(complex, bits); +} + + +/* == DATA SET CONSTRUCTION ======================================= */ + +// Magnitude and phase => complex FFT bin [index] +// A data set built with this method does not require fft_permutate before DIT IFFT +__INLINE +void fft_phase_magnitude(fft_complex_t complex[], unsigned bits, unsigned index, int32_t mag, uint32_t pha) { +#ifdef FFT_DIT + unsigned n = RBITS(index, bits); +#else//FFT_DIF + unsigned n = index; +#endif + complex[n].r = FFT_M(mag, sine(pha)); + complex[n].i = FFT_M(mag, cosine(pha)); +} + +// Magnitude, phase:0 => complex FFT bin [index] +// A data set built with this method does not require fft_permutate before DIT IFFT +__INLINE +void fft_magnitude(fft_complex_t complex[], unsigned bits, unsigned index, int32_t mag) { +#ifdef FFT_DIT + unsigned n = RBITS(index, bits); +#else//FFT_DIF + unsigned n = index; +#endif + complex[n].r = 0; complex[n].i = mag; +} + +// REAL Symmetric DC offset => complex FFT bin [0] (DC) +// A data set built with this method does not require fft_permutate before DIT IFFT +// A data set built with this method does not require fft_convert before IFFT +__INLINE +void fft_real_dc(fft_complex_t data[], fft_t r, fft_t i) { + data[0].r = r + i; + data[0].i = r - i; +} + +// REAL Symmetric magnitude and phase => complex FFT bins [index], [size-index] +// A data set built with this method does not require fft_permutate before DIT IFFT +// A data set built with this method does not require fft_convert before IFFT +void fft_real_phase_magnitude(fft_complex_t complex[], unsigned bits, unsigned index, int32_t mag_lo, int32_t pha_lo, int32_t mag_hi, int32_t pha_hi) { + unsigned size = 1 << bits; + unsigned shift = SINE_BITS - (bits - 1); +#ifdef FFT_DIT + unsigned n = RBITS(index, bits); + unsigned z = RBITS(size - index, bits); +#else//FFT_DIF + unsigned n = index; + unsigned z = size - index; +#endif + fft_t rsum, rdif, isum, idif, r, i; + fft_t itwiddled, rtwiddled; + r = FFT_M(mag_lo, sine(pha_lo)); + i = FFT_M(mag_hi, sine(pha_hi)); + rsum = r + i; rdif = r - i; + r = FFT_M(mag_lo, cosine(pha_lo)); + i = FFT_M(mag_hi, cosine(pha_hi)); + isum = r + i; idif = r - i; + r = -FFT_QCOS(index, shift); i = -FFT_QSIN(index, shift); + rtwiddled = FFT_MA(r, isum, FFT_M(i, rdif)) << 1; + itwiddled = FFT_MS(r, rdif, FFT_M(i, isum)) << 1; + complex[n].r = rsum + rtwiddled; complex[n].i = itwiddled + idif; + complex[z].r = rsum - rtwiddled; complex[z].i = itwiddled - idif; +} + +// REAL Symmetric magnitude, phase:0 => complex FFT bins [index], [size-index] +// This method works with permutated (bit-reversed) addressing +// A data set built with this method does not require fft_permutate before DIT IFFT +// A data set built with this method does not require fft_convert before IFFT +void fft_real_magnitude(fft_complex_t complex[], unsigned bits, unsigned index, int32_t mag_lo, int32_t mag_hi) { + unsigned shift = SINE_BITS - bits + 1; +#ifdef FFT_DIT + unsigned n = RBITS(index, bits); + unsigned z = RBITS((1 << bits) - index, bits); +#else//FFT_DIF + unsigned n = index; + unsigned z = (1 << bits) - index; +#endif + fft_t isum, idif, r, i; + fft_t itwiddled, rtwiddled; + isum = (mag_lo + mag_hi); idif = mag_lo - mag_hi; + r = FFT_QCOS(index, shift); i = FFT_QSIN(index, shift); + rtwiddled = FFT_M(r, isum) << 1; + itwiddled = FFT_M(i, isum) << 1; + complex[n].r = -rtwiddled; complex[n].i = (idif - itwiddled); + complex[z].r = rtwiddled; complex[z].i = -(idif + itwiddled); +} + +#endif diff --git a/sw/applications/l_train/sylt-fft/fpmath.h b/sw/applications/l_train/sylt-fft/fpmath.h new file mode 100644 index 000000000..9385d3c07 --- /dev/null +++ b/sw/applications/l_train/sylt-fft/fpmath.h @@ -0,0 +1,139 @@ +// FIXED POINT MATHS +// D. TAYLOR 2014 + +#ifndef __FPMATH_H__ +#define __FPMATH_H__ + +#include "config.h" +#include "intrinsics.h" +#include "fpmath.h" + +#define SINE_SIZE (1 << SINE_BITS) // Sine table size +#define SINE_FBITS (32 - 2 - SINE_BITS) // Fractional bits +#define SINE_FMASK ((1 << SINE_FBITS) - 1) // Fraction mask + +// Linear/box interpolation (30 bit precision) +// y1 is first point, y2 second +// mu is interpolation point 00000000-FFFFFFFF +// floating-point equivalent return y2 - y1 * mu + y1; +__INLINE +int32_t linear(int32_t y1, int32_t y2, uint32_t mu) { + return smmlar((y2 >> 1) - (y1 >> 1), mu >> 1, y1 >> 2) << 2; +} + +// Cubic interpolation +// y0...y3 need to be externally limited in range to prevent overflow +// y0...y3 are control points, interpolation is performed between y1 and y2 +// mu is interpolation point 00000000-FFFFFFFF +// floating-point equivalent return y1+mu/2*(y2-y0+mu*(2*y0-5*y1+4*y2-y3+mu*(3*(y1-y2)+y3-y0))) +__INLINE +int32_t cubic(int32_t y0, int32_t y1, int32_t y2, int32_t y3, uint32_t mu) { + mu >>= 1; + int32_t a = (3 * (y1 - y2) - y0 + y3); + int32_t b = 2 * y2 + y0 - (5 * y1 + y3) / 2; + int32_t c = (y2 - y0) / 2; + return smmlar(smmlar(smmlar(a, mu, b) << 1, mu, c) << 1, mu, y1); +} + +// Generate first quadrant (0 to PI/2) of sine wave +// Output table is in Q31 format, with 1 limited to 0x7FFFFFFF +void sine_init() { +#if !SINE_USE_TABLE + unsigned int n; +#if SINE_PRINTOUT + printf("// ROM\n"); + printf("#if SINE_BITS != 7\n"); + printf("#error \"sinetable[] size does not match SINE_BITS\"\n"); + printf("#endif\n"); + printf("const int32_t sinetable[] = {"); +#endif + for(n = 0; n <= SINE_SIZE; n++) { + uint64_t v = (sin(((double)n * M_PI) / (double)(SINE_SIZE * 2)) * 2147483648.0); + sinetable[n] = v > 2147483647 ? 2147483647 : v; +#if SINE_PRINTOUT + // Print table + if((n & 7) == 0) printf("\n "); + printf("0x%08x, ", sinetable[n]); +#endif + } +#if SINE_PRINTOUT + printf("// <= space potato!\n}; // <= sad monkey?\n"); +#endif +#endif +} + +// Sin by table lookup with interpolation +// pos = 00000000 to FFFFFFFF, corresponding to 0-2PI(less one) +int32_t sine(uint32_t pos) { + uint32_t fraction = (pos & SINE_FMASK) << (2 + SINE_BITS); + uint32_t index = (pos & 0x40000000) ? (0x40000000 + SINE_FMASK - (pos & 0x3FFFFFFF)) : (pos & 0x3FFFFFFF); + uint32_t indexa = index >> SINE_FBITS; + uint32_t indexb = pos & 0x40000000 ? indexa - 1 : indexa + 1; + int32_t sample = linear(sinetable[indexa], sinetable[indexb], fraction); + return pos & 0x80000000 ? -sample : sample; +} + +// Cos by table lookup with interpolation +// See sine +__INLINE +int32_t cosine(uint32_t pos) { + return sine(pos + 0x40000000); +} + +// Fast sin by table lookup +// Same as sine, but no interpolation +__INLINE +int32_t fastsin(uint32_t pos) { + uint32_t index = (pos & 0x40000000) ? 0x40000000 - (pos & 0x3fffffff) : (pos & 0x3fffffff); + int32_t sample = sinetable[index >> SINE_FBITS]; + return (pos & 0x80000000 ? -sample : sample); +} + +// Fast cos by table lookup +// See fastsin +__INLINE +int32_t fastcos(uint32_t pos) { + return fastsin(pos + 0x40000000); +} + +// Fixed point pow(2, e) +uint64_t fpow2(uint32_t e) { + uint32_t ipart = e >> FPOW2_FBITS; +#ifdef FPOW2_LIMIT + uint32_t fpart = (e >> (FPOW2_FBITS - FPOW2_LIMIT)) << (32 - FPOW2_LIMIT); +#else + uint32_t fpart = e << (32 - FPOW2_FBITS); +#endif + uint64_t final = 0x100000000; + if(fpart) { + uint32_t bit = clz(fpart); + uint32_t fcalc = fpow2table[bit++] >> 1; + fpart <<= bit; + while(fpart) { + uint32_t lzc = clz(fpart); + bit += lzc++; + int32_t fmul = fpow2table[bit++]; + fcalc += smmlar(fcalc, fmul, fmul >> 1); + fpart <<= lzc; + } + final += (uint64_t)fcalc << 1; + } + return final << ipart; +} + +// Convert binary to gray-code +unsigned bin2gray(unsigned bits) { + return (bits >> 1) ^ bits; +} + +// Convert gray-code to binary +unsigned gray2bin(unsigned bits) { + bits ^= bits >> 16; + bits ^= bits >> 8; + bits ^= bits >> 4; + bits ^= bits >> 2; + bits ^= bits >> 1; + return bits; +} + +#endif diff --git a/sw/applications/l_train/sylt-fft/intrinsics.h b/sw/applications/l_train/sylt-fft/intrinsics.h new file mode 100644 index 000000000..e2614baeb --- /dev/null +++ b/sw/applications/l_train/sylt-fft/intrinsics.h @@ -0,0 +1,153 @@ +// INTRINSICS +// D. TAYLOR 2014 + +#ifndef __INTRINSICS_H__ +#define __INTRINSICS_H__ +#include "config.h" + +// issue warnings when not using full hardware acceleration + +#if defined(__ARMCC_VERSION) || (defined(__GNUC__) && defined(__arm__)) +#if (__CORTEX_M < 0x03) +#warning "Cortex-M core < M3 detected; hardware acceleration for math operations not supported" +#elif (__CORTEX_M < 0x04) +#warning "Cortex-M core < M4 detected; partial hardware acceleration for math operations supported" +#endif +#endif + +// reverse bits (ARM: RBIT) +__INLINE +uint32_t rbit(uint32_t x) { + uint32_t result; +#if defined(__ARMCC_VERSION) && ((__CORTEX_M >= 0x03) || (__CORTEX_SC >= 300)) + __asm{ rbit result, x } +#elif defined(__GNUC__) && defined(__arm__) && ((__CORTEX_M >= 0x03) || (__CORTEX_SC >= 300)) + __asm("rbit %0, %1":"=r"(result):"r"(x)); +#else + x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1)); + x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2)); + x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4)); + x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8)); + result = (x >> 16) | (x << 16); +#endif + return result; +} + +#define RBITS(W, BITS) (rbit(W) >> (32 - (BITS))) + +// count leading zeroes (ARM: CLZ) +__INLINE +uint32_t clz(uint32_t x) { + uint32_t result; +#if defined(__ARMCC_VERSION) && ((__CORTEX_M >= 0x03) || (__CORTEX_SC >= 300)) + __asm{ clz result, x } +#elif defined(__GNUC__) && defined(__arm__) && ((__CORTEX_M >= 0x03) || (__CORTEX_SC >= 300)) + __asm("clz %0, %1":"=r"(result):"r"(x)); +#else + x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; + x -= 0x55555555 & (x >> 1); + x = (0x33333333 & x) + (0x33333333 & (x >> 2)); + result = 32 - ((0x01010101 * (0x0F0F0F0F & (x + (x >> 4)))) >> 24); +#endif + return result; +} + +// 32-bit signed multiply -> 32-bit result, add 32-bit (ARM: SMMLAR) +// floating point equivalent: return c + a * b +__INLINE +int32_t smmlar(int32_t a, int32_t b, int32_t c) { + int32_t result; +#if defined(__ARMCC_VERSION) && (__CORTEX_M >= 0x04U) + __asm{ smmlar result, a, b, c } +#elif defined(__GNUC__) && defined(__arm__) && (__CORTEX_M >= 0x04U) + __asm("smmlar %0, %1, %2, %3":"=r"(result):"r"(a),"r"(b),"r"(c)); +#else + result = c + ((((int64_t)a * b) + 0x80000000) >> 32); +#endif + return result; +} + +// 32-bit signed multiply -> 32-bit result, subtract 32-bit (ARM: SMMLSR) +// floating point equivalent: return c - a * b +__INLINE +int32_t smmlsr(int32_t a, int32_t b, int32_t c) { + int32_t result; +#if defined(__ARMCC_VERSION) && (__CORTEX_M >= 0x04U) + __asm{ smmlsr result, a, b, c } +#elif defined(__GNUC__) && defined(__arm__) && (__CORTEX_M >= 0x04U) + __asm("smmlsr %0, %1, %2, %3":"=r"(result):"r"(a),"r"(b),"r"(c)); +#else + result = c - ((((int64_t)a * b) + 0x80000000) >> 32); +#endif + return result; +} + +// 32-bit signed multiply -> 32-bit result (ARM: SMMULR) +// floating point equivalent: return a * b +__INLINE +int32_t smmulr(int32_t a, int32_t b) { + int32_t result; +#if defined(__ARMCC_VERSION) && (__CORTEX_M >= 0x04U) + __asm{ smmulr result, a, b } +#elif defined(__GNUC__) && defined(__arm__) && (__CORTEX_M >= 0x04U) + __asm("smmulr %0, %1, %2":"=r"(result):"r"(a),"r"(b)); +#else + result = ((((int64_t)a * b) + 0x80000000) >> 32); +#endif + return result; +} + +// saturating add (ARM: qadd) +// floating point equivalent: return max(min(a + b, 1), -1) +__INLINE +int32_t qadd(int32_t a, int32_t b) { + uint32_t result; +#if defined(__ARMCC_VERSION) + __asm{ qadd result, a, b } +#elif defined(__GNUC__) + __asm("qadd %0, %1, %2":"=r"(result):"r"(a),"r"(b)); +#else + int64_t c = (int64_t)a + b; + if(c > 2147483647) c = 2147483647; + if(c < -2147483648) c = -2147483648; + result = c; +#endif + return result; +} + +// saturating subtract (ARM: qsub) +// floating point equivalent: return max(min(a - b, 1), -1) +__INLINE +int32_t qsub(int32_t a, int32_t b) { + uint32_t result; +#if defined(__ARMCC_VERSION) + __asm{ qsub result, a, b } +#elif defined(__GNUC__) && defined(__arm__) + __asm("qsub %0, %1, %2":"=r"(result):"r"(a),"r"(b)); +#else + int64_t c = (int64_t)a - b; + if(c > 2147483647) c = 2147483647; + if(c < -2147483648) c = -2147483648; + result = c; +#endif + return result; +} + +// 32-bit arithmetic shift right with rounding (ARM: ASRS + ADC) +// floating point equivalent: return v / pow(2, s) +__INLINE +int32_t asrr(int32_t v, int32_t s) { + int32_t result; +#if defined(__ARMCC_VERSION) + __asm{ asrs result, v, s }; + __asm{ adc result, result }; +#elif defined(__GNUC__) && defined(__arm__) + __asm("asrs %0, %1, %2":"=r"(result):"r"(v),"r"(s):"cc"); + __asm("adc %0, %1, #0":"=r"(result):"r"(result)); +#else + result = (v + (1 << (s - 1))) >> s; +#endif + return result; +} + +#endif diff --git a/sw/applications/l_train/sylt-fft/main.c b/sw/applications/l_train/sylt-fft/main.c new file mode 100644 index 000000000..cf59fcadd --- /dev/null +++ b/sw/applications/l_train/sylt-fft/main.c @@ -0,0 +1,74 @@ +// BENCHMARKING FOR FRDM-K20D50M +// D. TAYLOR 2014 + +// * Do not build with operating system +// * Uses FTM0 as core clock cycle counter +// * Requires C99 standard + +#define BENCH_RUNS 100 // Not really necessary without OS + +#include + +#include +#include + +#include "config.h" +#include "intrinsics.h" +#include "fpmath.h" +#include "fft.h" + +static volatile unsigned count_hi; // FTM0 high counter +static volatile unsigned count; // Performance counter + +// FFT data structure +fft_complex_t complex[256]; + +// This function contains code to benchmark +static void benchmark(void) { + fft_inverse(complex, 8); +} + +// Initialize FTM0 +static void bench_init(void) { + SIM->SCGC6 |= SIM_SCGC6_FTM0_MASK; + FTM0->MOD = 0xFFFF; + NVIC_EnableIRQ(FTM0_IRQn); +} + +// Resets cycle counts, begins benchmarking +__INLINE +static void bench_begin(void) { + FTM0->SC = 0; + count_hi = 0; + FTM0->CNT = 0; + FTM0->SC = FTM_SC_CLKS(1) | FTM_SC_TOIE_MASK; +} + +// Ends benchmarking, returns cycle count +__INLINE +static unsigned bench_end(void) { + FTM0->SC = 0; + return (FTM0->CNT | (count_hi << 16)) - 2; +} + +// FTM0 overflow counter +void FTM0_IRQHandler(void) { + FTM0->SC &= ~FTM_SC_TOF_MASK; + count_hi++; +} + +int main() { + bench_init(); + while(1) { + // Perform benchmark + unsigned ack = 0; + for(unsigned n = 0; n < BENCH_RUNS; n++) { + bench_begin(); + benchmark(); + ack += bench_end(); + } + count = ack / BENCH_RUNS; + // Count is reported here - use a breakpoint or add communication code + count = count; + } +} diff --git a/sw/applications/l_train/utils.h b/sw/applications/l_train/utils.h new file mode 100644 index 000000000..11f075273 --- /dev/null +++ b/sw/applications/l_train/utils.h @@ -0,0 +1,228 @@ +/*!*********************************************************************************** + * @file utils.h + * @author Linus Crugnola + * @date 07.06.2024 + * + * Description: + * This file contains a number of utility functions for the tests. + * + *************************************************************************************/ + +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +/** + * @def SIMULATION or TARGET for the code to run in verilator or on the pynq board + */ +// #define SIMULATION +#define TARGET + +/** + * @def Scale for printing floats (on the pynq board) + */ +#define SCL 100 + +/** + * @def ASSERT_FAIL if assertions should fail the program + * @note otherwise they will just print a message + */ +// #define ASSERT_FAIL + +#ifdef SIMULATION +#pragma message ("SIMULATION environment") +#endif +#ifdef TARGET +#pragma message ("TARGET environment") +#endif + +/** + * @def PRINTF macro to print to the console + * + * @note should be used instead of printf to avoid printing in the simulation environment + */ +#ifndef SIMULATION +#define PRINTF(...) printf(__VA_ARGS__) +#else +#define PRINTF(...) +#endif + +/** + * @brief Assert close function for float (closef) and fixed point (closei32) numbers + * + * @param a the first number + * @param b the second number + * @param prec the precision + * @param idx the index of the assertion handy for vectors (optional) + * + * @note The functions ending in _s keep track of the maximum difference found in + * @note The functions ending in _si does also print the index of the assertion + */ +static float maxdiff = 0; +static int32_t maxdiffxp = 0; + +// Assert functions, always print if failing +void assert_closef_si(float a, float b, float prec, int idx) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) +#if defined(SIMULATION) || defined(TARGET) + PRINTF("Max diff float (scaled x10^6): %d\n", (int)(maxdiff * SCL)); +#else + PRINTF("Max diff float: %f\n", maxdiff); +#endif + } + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL), idx); +#else + PRINTF("AF %d %f %f %d\n", __LINE__, a, b, idx); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32_si(int32_t a, int32_t b, int32_t prec, int idx) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d %d\n", __LINE__, a, b, idx); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closef_s(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiff) { + maxdiff = diff; + if (maxdiff > 0.0001) +#if defined(SIMULATION) || defined(TARGET) + PRINTF("Max diff float (scaled x10^6): %d\n", (int)(maxdiff * SCL)); +#else + PRINTF("Max diff float: %f\n", maxdiff); +#endif + } + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL)); +#else + PRINTF("AF %d %f %f\n", __LINE__, a, b); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32_s(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > maxdiffxp) { + maxdiffxp = diff; + if (maxdiffxp > 1) + PRINTF("Max diff fxp: %d\n", maxdiffxp); + } + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closef_i(float a, float b, float prec, int idx) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL), idx); +#else + PRINTF("AF %d %f %f %d\n", __LINE__, a, b, idx); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32_i(int32_t a, int32_t b, int32_t prec, int idx) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d %d\n", __LINE__, a, b, idx); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closef(float a, float b, float prec) { + float diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { +#if defined(SIMULATION) || defined(TARGET) + PRINTF("AF (scaled x10^6) %d %d %d\n", __LINE__, (int)(a*SCL), (int)(b*SCL)); +#else + PRINTF("AF %d %f %f\n", __LINE__, a, b); +#endif +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +void assert_closei32(int32_t a, int32_t b, int32_t prec) { + int32_t diff = a - b; + if (diff < 0) diff = -diff; + if (diff > prec) { + printf("AI %d %d %d\n", __LINE__, a, b); +#ifdef ASSERT_FAIL + exit(EXIT_FAILURE); +#endif + } +} + +/** + * @brief Export a vector to a file + * + * @param a the vector to export + * @param size the size of the vector + * @param filename the name of the file + * + * @note This function is not available in the target and simulation environment + */ +#if defined(SIMULATION) || defined(TARGET) +#define VECTOR_EXPORT(...) +#else +void vectorExport(float* a, int size, char filename[]) { + FILE *filePointer; + filePointer = fopen(filename, "w"); + if (filePointer == NULL) { + printf("Failed to create file.\n"); + return; + } + for (int i=0; i Date: Thu, 6 Jun 2024 11:18:37 +0200 Subject: [PATCH 26/27] add timing test --- .../l_cnn_static_realsample/core_portme.c | 106 +++++++++ .../l_cnn_static_realsample/core_portme.h | 93 ++++++++ .../l_cnn_static_realsample/coremark.h | 212 ++++++++++++++++++ .../l_cnn_static_realsample/main.c | 67 +++--- 4 files changed, 449 insertions(+), 29 deletions(-) create mode 100644 sw/applications/l_cnn_static_realsample/core_portme.c create mode 100644 sw/applications/l_cnn_static_realsample/core_portme.h create mode 100644 sw/applications/l_cnn_static_realsample/coremark.h diff --git a/sw/applications/l_cnn_static_realsample/core_portme.c b/sw/applications/l_cnn_static_realsample/core_portme.c new file mode 100644 index 000000000..28279a9e0 --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/core_portme.c @@ -0,0 +1,106 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include "csr.h" +#include "x-heep.h" + +#include "coremark.h" + +#define ITERATIONS 1 + +ee_u32 default_num_contexts = 1; + +static CORETIMETYPE start_time_val, stop_time_val; + +#if VALIDATION_RUN +volatile ee_s32 seed1_volatile = 0x3415; +volatile ee_s32 seed2_volatile = 0x3415; +volatile ee_s32 seed3_volatile = 0x66; +#endif +#if PERFORMANCE_RUN +volatile ee_s32 seed1_volatile = 0x0; +volatile ee_s32 seed2_volatile = 0x0; +volatile ee_s32 seed3_volatile = 0x66; +#endif +#if PROFILE_RUN +volatile ee_s32 seed1_volatile = 0x8; +volatile ee_s32 seed2_volatile = 0x8; +volatile ee_s32 seed3_volatile = 0x8; +#endif +volatile ee_s32 seed4_volatile = ITERATIONS; +volatile ee_s32 seed5_volatile = 0; + +void +portable_init(core_portable *p, int *argc, char *argv[]) +{ + // Don't need to do anything here atm. + (void)p; + (void)argc; + (void)argv; +} + +void +portable_fini(core_portable *p) +{ + // Don't need to do anything here atm. + (void)p; +} + +void +start_time(void) +{ + // Enable mcycle counter and read value + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + + CSR_READ(CSR_REG_MCYCLE, &start_time_val); +} + +void +stop_time(void) +{ + CSR_READ(CSR_REG_MCYCLE, &stop_time_val); +} + +CORE_TICKS +get_time(void) +{ + return (stop_time_val - start_time_val); +} + +secs_ret +time_in_secs(CORE_TICKS ticks) +{ + return ticks*1E-6; // Normalized to 1 MHz clock period +} \ No newline at end of file diff --git a/sw/applications/l_cnn_static_realsample/core_portme.h b/sw/applications/l_cnn_static_realsample/core_portme.h new file mode 100644 index 000000000..762bbdcdc --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/core_portme.h @@ -0,0 +1,93 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include +#include + +typedef signed short ee_s16; +typedef unsigned short ee_u16; +typedef signed int ee_s32; +typedef double ee_f32; +typedef unsigned char ee_u8; +typedef unsigned int ee_u32; +typedef ee_u32 ee_ptr_int; +typedef size_t ee_size_t; + +typedef ee_u32 CORE_TICKS; + +typedef struct CORE_PORTABLE_S +{ + ee_u8 portable_id; +} core_portable; + +#ifndef MULTITHREAD +#define MULTITHREAD 1 // 1 means single-core +#define USE_PTHREAD 0 +#define USE_FORK 0 +#define USE_SOCKET 0 +#endif + +#ifndef COMPILER_VERSION +#ifdef __GNUC__ +#define COMPILER_VERSION "GCC"__VERSION__ +#else +#define COMPILER_VERSION "Undefined non-gcc compiler used" +#endif +#endif + +#ifndef COMPILER_FLAGS +#define COMPILER_FLAGS FLAGS_STR +#endif + +#ifndef MEM_LOCATION +#define MEM_LOCATION "" +#endif + +#ifndef SEED_METHOD +#define SEED_METHOD SEED_VOLATILE +#endif + +#ifndef HAS_PRINTF +#define HAS_PRINTF 1 +#endif + +#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3)) + +#define CORETIMETYPE ee_u32 + +extern ee_u32 default_num_contexts; + +void portable_init(core_portable *p, int *argc, char *argv[]); +void portable_fini(core_portable *p); \ No newline at end of file diff --git a/sw/applications/l_cnn_static_realsample/coremark.h b/sw/applications/l_cnn_static_realsample/coremark.h new file mode 100644 index 000000000..489c92574 --- /dev/null +++ b/sw/applications/l_cnn_static_realsample/coremark.h @@ -0,0 +1,212 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +/* Topic: Description + This file contains declarations of the various benchmark functions. +*/ + +/* Configuration: TOTAL_DATA_SIZE + Define total size for data algorithms will operate on +*/ +#ifndef TOTAL_DATA_SIZE +#define TOTAL_DATA_SIZE 2 * 1000 +#endif + +#define SEED_ARG 0 +#define SEED_FUNC 1 +#define SEED_VOLATILE 2 + +#define MEM_STATIC 0 +#define MEM_MALLOC 1 +#define MEM_STACK 2 + +#include "core_portme.h" + +#if HAS_STDIO +#include +#endif +#if HAS_PRINTF +/* By default, printfs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 + +#if TARGET_SIM && PRINTF_IN_SIM + #define ee_printf(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA + #define ee_printf(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define ee_printf printf +#endif + +#endif + +/* Actual benchmark execution in iterate */ +void *iterate(void *pres); + +/* Typedef: secs_ret + For machines that have floating point support, get number of seconds as + a double. Otherwise an unsigned int. +*/ +#if HAS_FLOAT +typedef double secs_ret; +#else +typedef ee_u32 secs_ret; +#endif + +#if MAIN_HAS_NORETURN +#define MAIN_RETURN_VAL +#define MAIN_RETURN_TYPE void +#else +#define MAIN_RETURN_VAL 0 +#define MAIN_RETURN_TYPE int +#endif + +void start_time(void); +void stop_time(void); +void reset_time(); +CORE_TICKS get_time(void); +secs_ret time_in_secs(CORE_TICKS ticks); + +/* Misc useful functions */ +ee_u16 crcu8(ee_u8 data, ee_u16 crc); +ee_u16 crc16(ee_s16 newval, ee_u16 crc); +ee_u16 crcu16(ee_u16 newval, ee_u16 crc); +ee_u16 crcu32(ee_u32 newval, ee_u16 crc); +ee_u8 check_data_types(void); +void * portable_malloc(ee_size_t size); +void portable_free(void *p); +ee_s32 parseval(char *valstring); + +/* Algorithm IDS */ +#define ID_LIST (1 << 0) +#define ID_MATRIX (1 << 1) +#define ID_STATE (1 << 2) +#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE) +#define NUM_ALGORITHMS 3 + +/* list data structures */ +typedef struct list_data_s +{ + ee_s16 data16; + ee_s16 idx; +} list_data; + +typedef struct list_head_s +{ + struct list_head_s *next; + struct list_data_s *info; +} list_head; + +/*matrix benchmark related stuff */ +#define MATDAT_INT 1 +#if MATDAT_INT +typedef ee_s16 MATDAT; +typedef ee_s32 MATRES; +#else +typedef ee_f16 MATDAT; +typedef ee_f32 MATRES; +#endif + +typedef struct MAT_PARAMS_S +{ + int N; + MATDAT *A; + MATDAT *B; + MATRES *C; +} mat_params; + +/* state machine related stuff */ +/* List of all the possible states for the FSM */ +typedef enum CORE_STATE +{ + CORE_START = 0, + CORE_INVALID, + CORE_S1, + CORE_S2, + CORE_INT, + CORE_FLOAT, + CORE_EXPONENT, + CORE_SCIENTIFIC, + NUM_CORE_STATES +} core_state_e; + +/* Helper structure to hold results */ +typedef struct RESULTS_S +{ + /* inputs */ + ee_s16 seed1; /* Initializing seed */ + ee_s16 seed2; /* Initializing seed */ + ee_s16 seed3; /* Initializing seed */ + void * memblock[4]; /* Pointer to safe memory location */ + ee_u32 size; /* Size of the data */ + ee_u32 iterations; /* Number of iterations to execute */ + ee_u32 execs; /* Bitmask of operations to execute */ + struct list_head_s *list; + mat_params mat; + /* outputs */ + ee_u16 crc; + ee_u16 crclist; + ee_u16 crcmatrix; + ee_u16 crcstate; + ee_s16 err; + /* ultithread specific */ + core_portable port; +} core_results; + +/* Multicore execution handling */ +#if (MULTITHREAD > 1) +ee_u8 core_start_parallel(core_results *res); +ee_u8 core_stop_parallel(core_results *res); +#endif + +/* list benchmark functions */ +list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed); +ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx); + +/* state benchmark functions */ +void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p); +ee_u16 core_bench_state(ee_u32 blksize, + ee_u8 *memblock, + ee_s16 seed1, + ee_s16 seed2, + ee_s16 step, + ee_u16 crc); + +/* matrix benchmark functions */ +ee_u32 core_init_matrix(ee_u32 blksize, + void * memblk, + ee_s32 seed, + mat_params *p); +ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc); \ No newline at end of file diff --git a/sw/applications/l_cnn_static_realsample/main.c b/sw/applications/l_cnn_static_realsample/main.c index 5b5c224a2..528f936aa 100644 --- a/sw/applications/l_cnn_static_realsample/main.c +++ b/sw/applications/l_cnn_static_realsample/main.c @@ -5,6 +5,7 @@ #include "cnn.h" #include "fxp32.h" #include "utils.h" +#include "coremark.h" // #define DYN_ALLOCATION #define COMP_PREC 0.0001f @@ -29,22 +30,22 @@ void compareVectorsFxp(fxp32* a, fxp32* b, int size, float prec) { float weights1[] = { -0.12116f, 0.15478f, 0.15130f, -0.15055f, -0.10036f, 0.14400f, -0.00204f, -0.03408f, -0.18936f, 0.03390f, 0.12200f, -0.13891f, 0.05332f, -0.21001f, -0.18876f, -0.06263f, -0.00274f, -0.14772f, 0.06332f, 0.05349f, -0.05071f, 0.13313f, -0.07974f, -0.06264f, -0.03501f, -0.21790f, 0.05925f, -0.14536f, -0.21762f, 0.08203f, -0.08571f, 0.16886f, 0.12439f, 0.12544f, 0.04466f, -0.16849f, 0.04673f, 0.03604f, -0.04811f, 0.21726f, 0.17702f, 0.16138f, 0.08862f, 0.02971f, -0.05572f, -0.07597f, -0.02538f, 0.06561f, -0.08724f, -0.20014f, -0.09443f, -0.13890f, -0.00194f, 0.03216f, 0.14760f, 0.09256f, 0.13031f, -0.09160f, 0.03522f, -0.03609f, -0.08631f, -0.02040f, -0.19688f,}; float weights2[] = { -0.71606f, 0.06657f, -0.07737f,}; float xin[] = { -1.30298f, -0.96491f, -0.51415f, -0.17608f, 0.27468f, 0.50006f, 0.04930f, -0.73953f, -0.96491f, -1.97912f, -2.65526f, -2.31719f, -2.54257f, -2.88064f, -3.21871f, -2.76795f, -2.31719f, -2.20450f, 1.17620f, 1.51427f, 0.95082f, 0.38737f, -0.17608f, 0.04930f, 0.61275f, 0.61275f, 0.50006f, 0.50006f, 0.50006f, -0.17608f, -0.40146f, -0.73953f, -0.73953f, -0.73953f, -0.85222f, -0.73953f, -0.85222f, -0.73953f, -0.17608f, 0.27468f, 0.50006f, 0.61275f, 0.95082f, 1.06351f, 1.06351f, 0.83813f, 0.50006f, 0.16199f, 0.04930f, 0.04930f, 0.50006f, 0.50006f, 0.72544f, 0.83813f, 0.83813f, 0.61275f, 0.04930f, -0.06339f, 0.04930f, 0.61275f, 0.83813f, 1.06351f, 0.95082f, 0.72544f, 0.61275f, 0.38737f, 0.27468f, 0.50006f, 0.50006f, 0.38737f, 0.27468f, 0.27468f, 0.72544f, 0.61275f, 0.50006f, 0.50006f, 0.38737f, 0.04930f, 0.04930f, 0.38737f, 0.38737f, -0.06339f, -0.17608f, -0.17608f, -0.06339f, -0.17608f, -0.51415f, -0.73953f, -0.96491f, -0.73953f, -0.62684f, -0.62684f, 0.16199f, 0.83813f, 1.06351f, 1.28889f, 1.17620f, 0.61275f, 0.38737f, 0.27468f, 0.16199f, 0.38737f, 0.50006f, 0.50006f, 0.38737f, 0.27468f, 0.38737f, 0.50006f, 0.61275f, 0.50006f, 0.50006f, 0.61275f, 0.61275f, 0.16199f, -0.06339f, -0.06339f, -0.06339f, 0.04930f, 0.04930f, -0.17608f, -0.40146f, -0.62684f, -0.73953f, -0.62684f, -0.17608f, 0.16199f, 0.61275f, 0.61275f, 0.72544f, 0.50006f, 0.50006f, 0.27468f, 0.27468f, 0.27468f, 0.38737f, 0.50006f, 0.50006f, 0.38737f, 0.38737f, 0.38737f, 0.27468f, 0.16199f, 0.04930f, 0.27468f, 0.50006f, 0.61275f, 0.72544f, 0.61275f, 0.50006f, -0.06339f, -0.28877f, -0.17608f, -0.06339f, 0.04930f, -0.06339f, -0.17608f, -0.40146f, -0.62684f, -0.62684f, -0.40146f, -0.06339f, 0.27468f, 0.72544f, 0.95082f, 1.06351f, 0.72544f, 0.50006f, 0.38737f, 0.27468f, 0.27468f, 0.27468f, 0.38737f, 0.38737f, 0.27468f, -0.06339f, 0.04930f, 0.27468f, 0.38737f, 0.61275f, 0.61275f, 0.27468f, 0.38737f, 0.50006f, 0.61275f, 0.38737f, -0.06339f, -0.17608f, 0.04930f, 0.27468f, 0.38737f, 0.16199f, -0.06339f, -0.28877f, -0.28877f, -0.40146f, -0.40146f, -0.17608f, 0.16199f, 0.38737f, 0.95082f, 1.17620f, 1.17620f, 0.95082f, 0.27468f, 0.04930f, -0.06339f, 0.16199f, 0.27468f, 0.04930f, -0.28877f, -0.28877f, -0.28877f, -0.17608f, -0.28877f, -0.73953f, -0.85222f, -0.85222f, -0.51415f, -0.17608f, 0.38737f, 0.61275f, 0.72544f, 0.83813f, 0.95082f, 1.06351f, 1.06351f, 0.95082f, 0.95082f, 0.50006f, 0.38737f, 0.04930f, 0.16199f, 0.27468f, -0.40146f, -1.19029f, -2.09181f, -1.86643f, -0.28877f, -2.65526f, -9.75473f, -2.65526f, -1.07760f, -0.06339f, 0.16199f, 0.61275f, 0.38737f, 0.04930f, 0.27468f, 0.04930f, -0.51415f, -0.62684f, -0.51415f, -0.51415f, -0.85222f, -1.19029f, -1.19029f, 0.33916f, 0.92881f, 0.92881f, 0.63399f, -0.25048f, -0.76643f, -1.65090f, -2.09313f, -2.09313f, -1.79831f, -1.65090f, -1.94572f, -2.01942f, -1.06125f, -0.39790f, 1.00251f, 3.72963f, 3.36110f, 1.07622f, 1.07622f, 0.85510f, 0.63399f, -0.17678f, -0.61901f, -1.35607f, -1.65090f, -1.72460f, -1.50348f, -1.28237f, -0.54531f, -0.25048f, 0.48657f, 1.14993f, 1.37104f, 1.66587f, 2.25551f, 1.73957f, 1.07622f, 0.70769f, 0.41287f, 0.19175f, -0.17678f, -0.61901f, -1.57719f, -1.79831f, -1.65090f, -1.28237f, -0.54531f, -0.17678f, 0.56028f, 1.07622f, 1.37104f, 1.59216f, 1.51846f, 1.73957f, 1.44475f, 0.48657f, 0.26546f, 0.19175f, 0.85510f, 0.56028f, -0.25048f, -0.61901f, -0.76643f, -0.91384f, -1.20866f, -0.98754f, -0.61901f, -0.02937f, 0.26546f, 1.07622f, 1.22363f, 1.81328f, 1.44475f, 0.04434f, -0.10307f, 0.33916f, -0.02937f, -0.25048f, -0.61901f, -0.54531f, -0.47160f, -0.32419f, -0.25048f, -0.02937f, 0.48657f, 0.85510f, 1.00251f, 1.37104f, 0.04434f, -0.69272f, -0.25048f, 0.56028f, 0.26546f, 0.04434f, 0.33916f, 0.26546f, -0.32419f, -0.32419f, -0.25048f, -0.17678f, 0.41287f, 0.78140f, 0.92881f, 1.44475f, 1.14993f, -0.10307f, -0.32419f, 0.19175f, 0.26546f, -0.02937f, -0.54531f, -0.76643f, -0.76643f, -0.76643f, -0.69272f, -0.54531f, 0.04434f, 0.19175f, 0.63399f, 0.70769f, 1.07622f, 0.85510f, 0.19175f, 0.11804f, 0.33916f, 0.63399f, 0.56028f, 0.11804f, -0.17678f, -0.47160f, -0.47160f, -0.61901f, -0.69272f, -0.47160f, -0.17678f, 0.04434f, 1.00251f, 1.22363f, 1.51846f, 1.51846f, 1.07622f, -0.32419f, -0.47160f, -0.02937f, -0.32419f, -0.54531f, -0.98754f, -1.13495f, -0.98754f, -0.98754f, -0.69272f, -0.39790f, 0.26546f, 0.70769f, 0.70769f, 0.92881f, 1.59216f, 0.41287f, 0.11804f, 0.33916f, 0.41287f, 0.56028f, 0.04434f, -0.17678f, -0.39790f, -0.61901f, -1.06125f, -1.06125f, -0.76643f, -0.54531f, 0.11804f, 0.85510f, 1.07622f, 1.51846f, 1.96069f, 1.51846f, 0.63399f, -0.54531f, 0.11804f, 0.11804f, -0.39790f, -0.61901f, -0.91384f, -0.98754f, -1.06125f, -0.98754f, -0.47160f, -0.17678f, 0.04434f, 0.56028f, 0.70769f, 0.92881f, 1.29734f, 1.73957f, 1.00251f, -0.69272f, -0.17678f, 0.11804f, 0.33916f, 0.04434f, -0.54531f, -0.84013f, -1.06125f, -1.28237f, -1.79831f, -1.57719f, -1.28237f, -0.54531f, 0.04434f, 0.63399f, 0.78140f, 1.44475f, 1.66587f, 2.18181f, 2.03440f, 1.44475f, 0.33916f, 0.48657f, 0.63399f, 0.41287f, -0.32419f, -0.47160f, -0.39790f, -0.84013f, -1.06125f, -1.35607f, -1.42978f, -1.50348f, -1.57719f, -1.42978f, -1.28237f, -1.20866f, -0.84013f, -0.69272f, -0.69272f, -0.69272f, -0.17678f, 0.48657f, 1.51846f, 1.00251f, 0.56028f, -0.02937f, 0.33916f, 0.33916f, 0.33916f, -0.10307f, -0.54531f, -1.20866f, -1.50348f, -1.42978f, -1.42978f, -1.35607f, -1.06125f, -0.84013f, 0.04434f, 1.10447f, 1.23962f, 0.69900f, 0.42869f, -0.11193f, -0.24708f, -0.65255f, -0.78770f, -1.05801f, -1.19317f, -0.92286f, -0.24708f, -0.24708f, 0.56385f, 0.96931f, 0.96931f, 0.29354f, 1.64509f, 2.18571f, 0.69900f, -0.51739f, -0.92286f, -1.32832f, -1.19317f, -0.65255f, -0.24708f, -0.24708f, -0.38224f, -0.65255f, -1.32832f, -1.32832f, -1.19317f, -1.32832f, -1.59863f, -2.13925f, -2.27441f, -2.13925f, -1.32832f, -0.92286f, -1.32832f, -0.78770f, -0.38224f, -0.38224f, -0.24708f, -0.11193f, 0.02323f, 0.15838f, 0.83416f, 1.23962f, 1.91540f, 2.45602f, 2.86148f, 3.40211f, 3.40211f, 2.59117f, 2.45602f, 1.64509f, 1.23962f, 0.42869f, 0.02323f, -0.11193f, -0.51739f, -0.38224f, -0.24708f, -0.51739f, -0.92286f, -0.92286f, -0.65255f, -0.11193f, 0.15838f, 0.69900f, 0.69900f, -0.11193f, -0.11193f, 0.69900f, 0.69900f, 0.15838f, 0.29354f, 0.15838f, 0.15838f, 0.29354f, 0.42869f, 0.29354f, 0.15838f, 0.02323f, 0.15838f, 0.29354f, 0.15838f, 0.56385f, 0.69900f, 0.56385f, 0.56385f, 0.42869f, 0.02323f, -0.11193f, 0.15838f, 0.15838f, -0.24708f, -0.51739f, -0.92286f, -1.05801f, -0.78770f, -0.65255f, -0.51739f, -0.51739f, -0.38224f, 0.29354f, 0.29354f, 0.42869f, 0.29354f, 0.15838f, 0.02323f, 0.15838f, 0.15838f, 0.02323f, -0.51739f, -0.51739f, -0.11193f, 0.02323f, 0.15838f, 0.15838f, 0.15838f, 1.10447f, 0.83416f, 0.42869f, 0.29354f, 0.02323f, 0.02323f, -0.24708f, -0.38224f, -0.38224f, -0.65255f, -0.78770f, -1.19317f, -0.92286f, -0.65255f, -0.51739f, -0.24708f, -0.24708f, -0.24708f, -0.11193f, 0.29354f, 1.10447f, 0.83416f, 0.69900f, 0.56385f, 0.42869f, 0.29354f, 0.29354f, 0.02323f, 0.02323f, -0.24708f, -0.11193f, 0.29354f, 0.42869f, 0.56385f, 0.42869f, 0.69900f, 1.23962f, 0.96931f, 0.69900f, 0.56385f, 0.29354f, -0.24708f, -0.38224f, -0.38224f, -0.38224f, -0.92286f, -1.05801f, -1.32832f, -1.32832f, -0.92286f, -0.51739f, -0.38224f, -0.11193f, 0.02323f, 0.02323f, 0.29354f, 0.83416f, 0.42869f, 0.29354f, 0.15838f, 0.15838f, 0.15838f, 0.15838f, 0.15838f, 0.02323f, 0.02323f, 0.29354f, 0.42869f, 0.56385f, 0.56385f, 0.56385f, 0.56385f, 1.10447f, 1.37478f, 0.56385f, 0.42869f, 0.42869f, 0.02323f, -0.24708f, -0.51739f, -0.65255f, -0.65255f, -0.92286f, -1.46348f, -1.46348f, -1.32832f, -0.92286f, -0.78770f, -0.78770f, -0.65255f, -0.24708f, -0.11193f, 0.02323f, -0.51739f, -0.51739f, 0.29354f, 0.29354f, 0.42869f, 0.29354f, -0.24708f, -0.24708f, -0.24708f, -0.24708f, -0.38224f, -0.38224f, -0.51739f, -0.38224f, -0.24708f, 0.15838f, 0.69900f, 0.96931f, 1.10447f, 0.83416f, 0.83416f, 0.83416f, 1.23962f, 2.18571f, 7.18645f, 1.78024f, 0.29354f, -1.46348f, -2.00410f, -1.59863f, -1.59863f, -1.05801f, -0.92286f, -1.32832f, -1.19317f, -1.05801f, -1.05801f, -1.32832f, -1.19317f, -0.92286f, -0.78770f,}; -float xout[] = { 0.39249f, 0.48451f, 0.73383f, 0.73852f, 1.10356f, 1.51326f, 1.76721f, 1.83251f, 1.31946f, 0.81513f, 0.02872f, 0.09573f, -0.31142f, -1.19899f, -1.17897f, -1.59128f, -1.68418f, -1.58064f, -1.61839f, -0.72027f, -0.35841f, 0.28717f, 1.03874f, 1.20660f, 1.62155f, 1.03099f, 0.66879f, 0.72900f, 0.17669f, -0.18862f, -0.35105f, -0.40824f, -0.50307f, -0.60400f, -0.51233f, -0.42268f, -0.19640f, -0.11939f, -0.08316f, 0.09401f, -0.03073f, 0.05474f, 0.00948f, -0.14916f, -0.29699f, -0.46144f, -0.41988f, -0.36489f, -0.28950f, -0.25506f, -0.41477f, -0.55599f, -0.60255f, -0.37059f, -0.06804f, 0.10718f, 0.32548f, 0.28716f, 0.30881f, 0.11494f, 0.19378f, 0.36549f, 0.49032f, 0.39248f, -0.11330f, -0.16019f, -0.25388f, -0.31549f, -0.40915f, -0.69023f, -0.56602f, -0.27259f, -0.03218f, 0.30087f, 0.40573f, 0.69364f, 0.80954f, 0.71345f, 0.81998f, 0.72108f, 0.49698f, 0.16719f, -0.10408f, -0.19166f, -0.49458f, -0.48501f, -0.35172f, -0.08854f, 0.20221f, 0.22341f, 0.01684f, -0.12514f, -0.43663f, -0.55029f, -0.36411f, -0.09131f, -0.07201f, -0.16178f, -0.05504f, 0.10212f, -0.12362f, -0.37001f, -0.33644f, -0.23595f, -0.00672f, 0.11853f, 0.06942f, 0.20967f, 0.21001f, 0.23882f, 0.36883f, 0.66184f, 0.71750f, 0.42784f, 0.21827f, 0.07058f, -0.03356f, -0.28731f, -0.41314f, -0.34149f, -0.14244f, 0.07088f, 0.01262f, -0.05669f, -0.19078f, -0.33944f, -0.39268f, -0.18032f, 0.07529f, 0.33591f, 0.39141f, 0.32471f, 0.16026f, -0.03295f, -0.25818f, -0.64155f, -0.79985f, -0.93584f, -0.66718f, -0.20500f, 0.17531f, 0.48883f, 0.47296f, 0.40285f, 0.35651f, 0.18949f, 0.29055f, 0.61037f, 0.46581f, 0.27488f, -0.01284f, -0.15280f, -0.17538f, -0.58741f, -0.57524f, -0.47580f, -0.21400f, -0.06589f, -0.10277f, -0.08980f, -0.12787f, -0.20503f, -0.26596f, -0.03148f, 0.32823f, 0.54433f, 0.48218f, 0.34929f, 0.02126f, -0.20263f, -0.49552f, -0.67534f, -0.62950f, -0.68360f, -0.35470f, -0.08379f, 0.14763f, 0.26742f, 0.29058f, 0.28133f, 0.20008f, 0.14494f, 0.41903f, 0.54173f, 0.52405f, 0.29799f, -0.07209f, -0.14607f, -0.46453f, -0.58305f, -0.57501f, -0.69478f, -0.46508f, -0.23234f, 0.07366f, 0.14385f, 0.05992f, -0.20362f, -0.39168f, -0.48250f, -0.07809f, 0.42712f, 0.71610f, 0.81517f, 0.82542f, 0.97280f, 0.77453f, 0.36336f, 0.04949f, -0.26119f, -0.45929f, -0.60788f, -0.69065f, -0.31942f, -0.13976f, 0.09962f, 0.01459f, -0.23469f, -0.43849f, -0.63786f, -0.63111f, -0.59381f, -0.51142f, -0.44169f, -0.36079f, 0.11435f, 0.52288f, 0.56995f, 0.72460f, 1.52536f, 1.96464f, 1.74206f, 0.39785f, 0.85132f, 0.47575f, -0.53417f, 0.15703f, 0.37793f, 0.68815f, 0.26998f, -0.36431f, -0.00991f, -1.89429f, -1.31357f, -0.37315f, -0.73792f, 0.22693f, -0.13869f, -0.04304f, 0.91745f, 0.28913f, 0.00824f, -0.09014f, -0.22067f, -0.39314f, -0.55479f,}; -float ppg[] = { 0.46327f, 0.32949f, 0.07201f, -0.29688f, -0.62103f, -0.64516f, -0.27495f, 0.24132f, 0.54530f, 0.49617f, 0.18035f, -0.26311f, -0.72587f, -1.14389f, -1.46848f, -1.61454f, -1.46102f, -1.00265f, -0.46707f, -0.07844f, 0.23123f, 0.65189f, 1.15061f, 1.54846f, 1.77962f, 1.84234f, 1.68487f, 1.28177f, 0.72119f, 0.13912f, -0.37057f, -0.75262f, -0.92106f, -0.80702f, -0.53024f, -0.32013f, -0.19117f, 0.04964f, 0.44617f, 0.83831f, 1.10851f, 1.21904f, 1.13570f, 0.90103f, 0.59486f, 0.20316f, -0.28679f, -0.74780f, -1.03247f, -1.10836f, -1.04695f, -0.96712f, -0.96142f, -0.95659f, -0.78377f, -0.44865f, -0.14380f, 0.03078f, 0.13561f, 0.21939f, 0.30010f, 0.43695f, 0.63083f, 0.75716f, 0.72338f, 0.56635f, 0.33563f, 0.00841f, -0.39163f, -0.67148f, -0.58419f, -0.11046f, 0.53038f, 1.13658f, 1.57872f, 1.68092f, 1.36379f, 0.82866f, 0.33212f, -0.07932f, -0.46620f, -0.83991f, -1.20091f, -1.53734f, -1.77377f, -1.83781f, -1.73824f, -1.49787f, -1.13511f, -0.75526f, -0.49339f, -0.37189f, -0.32057f, -0.27758f, -0.18547f, -0.01659f, 0.15754f, 0.23167f, 0.19878f, 0.11763f, 0.00490f, -0.15038f, -0.30127f, -0.28197f, 0.12903f, 0.97034f, 1.93402f, 2.56521f, 2.62969f, 2.21650f, 1.58793f, 0.99797f, 0.55670f, 0.18079f, -0.25433f, -0.73201f, -1.14652f, -1.47374f, -1.73648f, -1.85492f, -1.69262f, -1.25530f, -0.75262f, -0.41400f, -0.28372f, -0.25390f, -0.23898f, -0.23591f, -0.24337f, -0.23372f, -0.20477f, -0.17889f, -0.16924f, -0.19249f, -0.29381f, -0.44251f, -0.44909f, -0.11573f, 0.54179f, 1.30633f, 1.97086f, 2.42310f, 2.54723f, 2.23185f, 1.54012f, 0.72251f, -0.00212f, -0.55436f, -0.94782f, -1.27811f, -1.66060f, -2.05800f, -2.24311f, -2.00405f, -1.36847f, -0.59866f, 0.01893f, 0.34660f, 0.38344f, 0.21808f, 0.01586f, -0.09994f, -0.15038f, -0.19249f, -0.21223f, -0.21924f, -0.30873f, -0.49646f, -0.61182f, -0.48769f, -0.15257f, 0.26633f, 0.70189f, 1.12386f, 1.46906f, 1.69321f, 1.80988f, 1.82217f, 1.65329f, 1.23659f, 0.63917f, -0.00124f, -0.61752f, -1.16319f, -1.48558f, -1.43953f, -1.09388f, -0.68244f, -0.39163f, -0.23328f, -0.11704f, 0.00753f, 0.09087f, 0.11280f, 0.12552f, 0.13561f, 0.09613f, -0.01616f, -0.26618f, -0.68815f, -0.99387f, -0.72192f, 0.13210f, 1.01376f, 1.45108f, 1.42871f, 1.11026f, 0.58521f, -0.03677f, -0.55261f, -0.79868f, -0.76710f, -0.55568f, -0.30302f, -0.15871f, -0.13897f, -0.00300f, 0.50143f, 1.26905f, 1.93840f, 2.29326f, 2.33712f, 2.12044f, 1.71207f, 1.24931f, 0.88787f, 0.68522f, 0.52337f, 0.20053f, -0.38417f, -1.12722f, -1.75578f, -2.00668f, -1.83562f, -1.42110f, -0.94255f, -0.41356f, 0.19527f, 0.57732f, 0.19132f, -0.83114f, -1.50488f, -1.29566f, -0.74561f, -0.39163f, -0.17714f, -0.09511f, -0.24337f, -0.29601f, 0.11807f, 0.73567f, 1.00543f, 0.84269f, 0.53345f, 0.17465f, -0.34645f,}; -float ppgf[] = { 0.07078f, -0.15502f, -0.66182f, -1.03541f, -1.72459f, -2.15842f, -2.04216f, -1.59119f, -0.77417f, -0.31896f, 0.15163f, -0.35884f, -0.41444f, 0.05510f, -0.28951f, -0.02326f, 0.22316f, 0.57799f, 1.15132f, 0.64183f, 0.58965f, 0.36472f, 0.11188f, 0.34186f, 0.15807f, 0.81135f, 1.01608f, 0.55277f, 0.54450f, 0.32774f, -0.01952f, -0.34439f, -0.41799f, -0.20301f, -0.01791f, 0.10255f, 0.00523f, 0.16903f, 0.52932f, 0.74430f, 1.13924f, 1.16430f, 1.12622f, 1.05019f, 0.89185f, 0.66460f, 0.13308f, -0.38291f, -0.74298f, -0.85330f, -0.63218f, -0.41113f, -0.35886f, -0.58600f, -0.71572f, -0.55583f, -0.46928f, -0.25638f, -0.17320f, 0.10445f, 0.10632f, 0.07146f, 0.14051f, 0.36468f, 0.83668f, 0.72655f, 0.58951f, 0.32389f, 0.01752f, 0.01875f, -0.01817f, 0.16213f, 0.56257f, 0.83571f, 1.17299f, 0.98729f, 0.55425f, 0.11521f, -0.48786f, -0.80040f, -0.96317f, -1.00710f, -1.09683f, -1.34569f, -1.27919f, -1.35280f, -1.38652f, -1.40933f, -1.33732f, -0.97867f, -0.51023f, -0.24675f, 0.11606f, 0.27271f, 0.17864f, 0.07471f, 0.22955f, 0.39345f, 0.25381f, 0.01551f, 0.12852f, 0.21963f, 0.03517f, -0.04602f, 0.13576f, 0.85180f, 1.86460f, 2.35555f, 2.41968f, 1.97768f, 1.21910f, 0.33612f, -0.16080f, -0.24705f, -0.47261f, -0.80259f, -1.11296f, -1.18643f, -1.32334f, -1.51343f, -1.55018f, -1.32618f, -0.76524f, -0.35730f, -0.09294f, 0.08555f, 0.15369f, -0.05559f, -0.31866f, -0.56963f, -0.59618f, -0.50360f, -0.32950f, -0.15953f, -0.03563f, 0.19904f, 0.35076f, 0.82012f, 1.20897f, 1.51133f, 1.79555f, 1.93426f, 2.07427f, 1.82900f, 1.18362f, 0.53301f, -0.29267f, -1.16473f, -1.41363f, -1.55299f, -1.64776f, -1.90520f, -2.06773f, -1.41664f, -0.79323f, -0.12286f, 0.23294f, 0.41249f, 0.48621f, 0.30788f, 0.14374f, 0.10509f, 0.11559f, -0.16101f, -0.54046f, -0.76358f, -0.79090f, -0.84575f, -0.63308f, -0.28506f, 0.34295f, 0.94167f, 1.33139f, 1.80746f, 1.82377f, 1.77700f, 1.66225f, 1.55474f, 1.36271f, 0.95526f, 0.43909f, -0.14618f, -1.03655f, -1.70492f, -2.00963f, -1.73752f, -1.02179f, -0.53637f, 0.07291f, 0.34977f, 0.45797f, 0.70231f, 0.55596f, 0.34514f, 0.05186f, -0.00824f, 0.03621f, 0.18746f, 0.12550f, -0.20564f, -0.91578f, -1.14904f, -0.58400f, 0.19859f, 0.62565f, 0.45591f, 0.33573f, 0.22186f, -0.08626f, -0.29142f, -0.33939f, -0.15922f, 0.13497f, 0.01640f, -0.01895f, -0.23859f, -0.01758f, 0.73613f, 1.70753f, 2.57627f, 2.92437f, 2.93094f, 2.63186f, 2.15376f, 1.61009f, 0.77352f, 0.16235f, -0.04659f, -0.52407f, -1.90953f, -3.09186f, -3.49785f, -2.40454f, -2.68693f, -1.89685f, -0.40838f, -0.57059f, -0.18267f, -0.11083f, -0.07867f, -0.46683f, -1.49498f, 0.59864f, 0.56797f, -0.01848f, 0.56078f, -0.32204f, -0.10468f, -0.25296f, -0.79938f, 0.44654f, 0.99719f, 0.93283f, 0.75412f, 0.56779f, 0.20835f,}; -int32_t xin_fxp[] = { -10930188, -8094251, -4313003, -1477066, 2304182, 4194807, 413558, -6203627, -8094251, -16602062, -22273936, -19437998, -21328624, -24164560, -27000496, -23219248, -19437998, -18492686, 9866681, 12702617, 7976056, 3249495, -1477066, 413558, 5140119, 5140119, 4194807, 4194807, 4194807, -1477066, -3367690, -6203627, -6203627, -6203627, -7148939, -6203627, -7148939, -6203627, -1477066, 2304182, 4194807, 5140119, 7976056, 8921368, 8921368, 7030744, 4194807, 1358870, 413558, 413558, 4194807, 4194807, 6085432, 7030744, 7030744, 5140119, 413558, -531753, 413558, 5140119, 7030744, 8921368, 7976056, 6085432, 5140119, 3249495, 2304182, 4194807, 4194807, 3249495, 2304182, 2304182, 6085432, 5140119, 4194807, 4194807, 3249495, 413558, 413558, 3249495, 3249495, -531753, -1477066, -1477066, -531753, -1477066, -4313003, -6203627, -8094251, -6203627, -5258315, -5258315, 1358870, 7030744, 8921368, 10811993, 9866681, 5140119, 3249495, 2304182, 1358870, 3249495, 4194807, 4194807, 3249495, 2304182, 3249495, 4194807, 5140119, 4194807, 4194807, 5140119, 5140119, 1358870, -531753, -531753, -531753, 413558, 413558, -1477066, -3367690, -5258315, -6203627, -5258315, -1477066, 1358870, 5140119, 5140119, 6085432, 4194807, 4194807, 2304182, 2304182, 2304182, 3249495, 4194807, 4194807, 3249495, 3249495, 3249495, 2304182, 1358870, 413558, 2304182, 4194807, 5140119, 6085432, 5140119, 4194807, -531753, -2422378, -1477066, -531753, 413558, -531753, -1477066, -3367690, -5258315, -5258315, -3367690, -531753, 2304182, 6085432, 7976056, 8921368, 6085432, 4194807, 3249495, 2304182, 2304182, 2304182, 3249495, 3249495, 2304182, -531753, 413558, 2304182, 3249495, 5140119, 5140119, 2304182, 3249495, 4194807, 5140119, 3249495, -531753, -1477066, 413558, 2304182, 3249495, 1358870, -531753, -2422378, -2422378, -3367690, -3367690, -1477066, 1358870, 3249495, 7976056, 9866681, 9866681, 7976056, 2304182, 413558, -531753, 1358870, 2304182, 413558, -2422378, -2422378, -2422378, -1477066, -2422378, -6203627, -7148939, -7148939, -4313003, -1477066, 3249495, 5140119, 6085432, 7030744, 7976056, 8921368, 8921368, 7976056, 7976056, 4194807, 3249495, 413558, 1358870, 2304182, -3367690, -9984876, -17547374, -15656750, -2422378, -22273936, -81828608, -22273936, -9039564, -531753, 1358870, 5140119, 3249495, 413558, 2304182, 413558, -4313003, -5258315, -4313003, -4313003, -7148939, -9984876, -9984876, 2845080, 7791423, 7791423, 5318293, -2101178, -6429281, -13848753, -17558448, -17558448, -15085318, -13848753, -16321882, -16940122, -8902410, -3337827, 8409663, 31286404, 28194950, 9027988, 9027988, 7173098, 5318293, -1482938, -5192632, -11375540, -13848753, -14466993, -12612104, -10757299, -4574392, -2101178, 4081645, 9646312, 11501117, 13974330, 18920590, 14592571, 9027988, 5936534, 3463404, 1608515, -1482938, -5192632, -13230429, -15085318, -13848753, -10757299, -4574392, -1482938, 4699969, 9027988, 11501117, 13356006, 12737766, 14592571, 12119441, 4081645, 2226840, 1608515, 7173098, 4699969, -2101178, -5192632, -6429281, -7665845, -10138975, -8284086, -5192632, -246373, 2226840, 9027988, 10264552, 15210895, 12119441, 371950, -864613, 2845080, -246373, -2101178, -5192632, -4574392, -3956067, -2719502, -2101178, -246373, 4081645, 7173098, 8409663, 11501117, 371950, -5810956, -2101178, 4699969, 2226840, 371950, 2845080, 2226840, -2719502, -2719502, -2101178, -1482938, 3463404, 6554858, 7791423, 12119441, 9646312, -864613, -2719502, 1608515, 2226840, -246373, -4574392, -6429281, -6429281, -6429281, -5810956, -4574392, 371950, 1608515, 5318293, 5936534, 9027988, 7173098, 1608515, 990191, 2845080, 5318293, 4699969, 990191, -1482938, -3956067, -3956067, -5192632, -5810956, -3956067, -1482938, 371950, 8409663, 10264552, 12737766, 12737766, 9027988, -2719502, -3956067, -246373, -2719502, -4574392, -8284086, -9520651, -8284086, -8284086, -5810956, -3337827, 2226840, 5936534, 5936534, 7791423, 13356006, 3463404, 990191, 2845080, 3463404, 4699969, 371950, -1482938, -3337827, -5192632, -8902410, -8902410, -6429281, -4574392, 990191, 7173098, 9027988, 12737766, 16447460, 12737766, 5318293, -4574392, 990191, 990191, -3337827, -5192632, -7665845, -8284086, -8902410, -8284086, -3956067, -1482938, 371950, 4699969, 5936534, 7791423, 10882877, 14592571, 8409663, -5810956, -1482938, 990191, 2845080, 371950, -4574392, -7047521, -8902410, -10757299, -15085318, -13230429, -10757299, -4574392, 371950, 5318293, 6554858, 12119441, 13974330, 18302348, 17065784, 12119441, 2845080, 4081645, 5318293, 3463404, -2719502, -3956067, -3337827, -7047521, -8902410, -11375540, -11993864, -12612104, -13230429, -11993864, -10757299, -10138975, -7047521, -5810956, -5810956, -5810956, -1482938, 4081645, 12737766, 8409663, 4699969, -246373, 2845080, 2845080, 2845080, -864613, -4574392, -10138975, -12612104, -11993864, -11993864, -11375540, -8902410, -7047521, 371950, 9264966, 10398686, 5863637, 3596112, -938936, -2072657, -5473986, -6607706, -8875231, -10009035, -7741511, -2072657, -2072657, 4729916, 8131161, 8131161, 2462392, 13800015, 18335064, 5863637, -4340182, -7741511, -11142756, -10009035, -5473986, -2072657, -2072657, -3206461, -5473986, -11142756, -11142756, -10009035, -11142756, -13410280, -17945330, -19079134, -17945330, -11142756, -7741511, -11142756, -6607706, -3206461, -3206461, -2072657, -938936, 194867, 1328587, 6997441, 10398686, 16067540, 20602590, 24003834, 28538968, 28538968, 21736310, 20602590, 13800015, 10398686, 3596112, 194867, -938936, -4340182, -3206461, -2072657, -4340182, -7741511, -7741511, -5473986, -938936, 1328587, 5863637, 5863637, -938936, -938936, 5863637, 5863637, 1328587, 2462392, 1328587, 1328587, 2462392, 3596112, 2462392, 1328587, 194867, 1328587, 2462392, 1328587, 4729916, 5863637, 4729916, 4729916, 3596112, 194867, -938936, 1328587, 1328587, -2072657, -4340182, -7741511, -8875231, -6607706, -5473986, -4340182, -4340182, -3206461, 2462392, 2462392, 3596112, 2462392, 1328587, 194867, 1328587, 1328587, 194867, -4340182, -4340182, -938936, 194867, 1328587, 1328587, 1328587, 9264966, 6997441, 3596112, 2462392, 194867, 194867, -2072657, -3206461, -3206461, -5473986, -6607706, -10009035, -7741511, -5473986, -4340182, -2072657, -2072657, -2072657, -938936, 2462392, 9264966, 6997441, 5863637, 4729916, 3596112, 2462392, 2462392, 194867, 194867, -2072657, -938936, 2462392, 3596112, 4729916, 3596112, 5863637, 10398686, 8131161, 5863637, 4729916, 2462392, -2072657, -3206461, -3206461, -3206461, -7741511, -8875231, -11142756, -11142756, -7741511, -4340182, -3206461, -938936, 194867, 194867, 2462392, 6997441, 3596112, 2462392, 1328587, 1328587, 1328587, 1328587, 1328587, 194867, 194867, 2462392, 3596112, 4729916, 4729916, 4729916, 4729916, 9264966, 11532491, 4729916, 3596112, 3596112, 194867, -2072657, -4340182, -5473986, -5473986, -7741511, -12276560, -12276560, -11142756, -7741511, -6607706, -6607706, -5473986, -2072657, -938936, 194867, -4340182, -4340182, 2462392, 2462392, 3596112, 2462392, -2072657, -2072657, -2072657, -2072657, -3206461, -3206461, -4340182, -3206461, -2072657, 1328587, 5863637, 8131161, 9264966, 6997441, 6997441, 6997441, 10398686, 18335064, 60284312, 14933736, 2462392, -12276560, -16811610, -13410280, -13410280, -8875231, -7741511, -11142756, -10009035, -8875231, -8875231, -11142756, -10009035, -7741511, -6607706,}; -int32_t xout_fxp[] = { 3292444, 4064364, 6155812, 6195155, 9257332, 12694145, 14824432, 15372208, 11068433, 6837806, 240920, 803041, -2612380, -10057857, -9889917, -13348624, -14127926, -13259369, -13576039, -6042062, -3006561, 2408956, 8713583, 10121694, 13602547, 8648571, 5610217, 6115295, 1482183, -1582259, -2944820, -3424565, -4220057, -5066719, -4297735, -3545696, -1647522, -1001515, -697596, 788613, -257781, 459192, 79524, -1251244, -2491332, -3870839, -3522208, -3060919, -2428502, -2139598, -3479343, -4663982, -5054556, -3108734, -570760, 899091, 2730324, 2408872, 2590486, 964186, 1625544, 3065952, 4113102, 3292360, -950429, -1343771, -2129699, -2646522, -3432199, -5790069, -4748120, -2286650, -269945, 2523880, 3403510, 5818674, 6790913, 5984852, 6878491, 6048857, 4168970, 1402491, -873086, -1607760, -4148837, -4068558, -2950441, -742727, 1696260, 1874098, 141264, -1049750, -3662718, -4616167, -3054376, -765963, -604063, -1357109, -461708, 856644, -1036999, -3103868, -2822263, -1979292, -56371, 994301, 582337, 1758839, 1761691, 2003367, 3093970, 5551916, 6018826, 3588982, 1830981, 592067, -281521, -2410131, -3465669, -2864625, -1194873, 594584, 105864, -475550, -1600378, -2847429, -3294038, -1512633, 631578, 2817817, 3283385, 2723865, 1344358, -276404, -2165770, -5381711, -6709628, -7850395, -5596711, -1719664, 1470606, 4100603, 3967476, 3379350, 2990622, 1589557, 2437310, 5120154, 3907497, 2305860, -107709, -1281779, -1471194, -4927552, -4825463, -3991299, -1795162, -552725, -862097, -753297, -1072651, -1719916, -2231034, -264073, 2753392, 4566171, 4044819, 2930057, 178341, -1699783, -4156723, -5665162, -5280628, -5734452, -2975439, -702881, 1238410, 2243281, 2437561, 2359967, 1678392, 1215844, 3515078, 4544360, 4396050, 2499721, -604734, -1225324, -3896760, -4890978, -4823533, -5828237, -3901373, -1949009, 617904, 1206701, 502645, -1708088, -3285650, -4047503, -655066, 3582942, 6007082, 6838141, 6924125, 8160438, 6497228, 3048084, 415152, -2191020, -3852803, -5099267, -5793592, -2679489, -1172391, 835673, 122389, -1968722, -3678320, -5350757, -5294134, -4981239, -4290102, -3705164, -3026526, 959237, 4386235, 4781087, 6078385, 12795647, 16480595, 14613458, 3337407, 7141390, 3990880, -4480942, 1317263, 3170306, 5772620, 2264756, -3056053, -83131, -15890456, -11019024, -3130209, -6190121, 1903626, -1163416, -361045, 7696128, 2425398, 69122, -756149, -1851114, -3297897, -4653916,}; +// float xout[] = { 0.39249f, 0.48451f, 0.73383f, 0.73852f, 1.10356f, 1.51326f, 1.76721f, 1.83251f, 1.31946f, 0.81513f, 0.02872f, 0.09573f, -0.31142f, -1.19899f, -1.17897f, -1.59128f, -1.68418f, -1.58064f, -1.61839f, -0.72027f, -0.35841f, 0.28717f, 1.03874f, 1.20660f, 1.62155f, 1.03099f, 0.66879f, 0.72900f, 0.17669f, -0.18862f, -0.35105f, -0.40824f, -0.50307f, -0.60400f, -0.51233f, -0.42268f, -0.19640f, -0.11939f, -0.08316f, 0.09401f, -0.03073f, 0.05474f, 0.00948f, -0.14916f, -0.29699f, -0.46144f, -0.41988f, -0.36489f, -0.28950f, -0.25506f, -0.41477f, -0.55599f, -0.60255f, -0.37059f, -0.06804f, 0.10718f, 0.32548f, 0.28716f, 0.30881f, 0.11494f, 0.19378f, 0.36549f, 0.49032f, 0.39248f, -0.11330f, -0.16019f, -0.25388f, -0.31549f, -0.40915f, -0.69023f, -0.56602f, -0.27259f, -0.03218f, 0.30087f, 0.40573f, 0.69364f, 0.80954f, 0.71345f, 0.81998f, 0.72108f, 0.49698f, 0.16719f, -0.10408f, -0.19166f, -0.49458f, -0.48501f, -0.35172f, -0.08854f, 0.20221f, 0.22341f, 0.01684f, -0.12514f, -0.43663f, -0.55029f, -0.36411f, -0.09131f, -0.07201f, -0.16178f, -0.05504f, 0.10212f, -0.12362f, -0.37001f, -0.33644f, -0.23595f, -0.00672f, 0.11853f, 0.06942f, 0.20967f, 0.21001f, 0.23882f, 0.36883f, 0.66184f, 0.71750f, 0.42784f, 0.21827f, 0.07058f, -0.03356f, -0.28731f, -0.41314f, -0.34149f, -0.14244f, 0.07088f, 0.01262f, -0.05669f, -0.19078f, -0.33944f, -0.39268f, -0.18032f, 0.07529f, 0.33591f, 0.39141f, 0.32471f, 0.16026f, -0.03295f, -0.25818f, -0.64155f, -0.79985f, -0.93584f, -0.66718f, -0.20500f, 0.17531f, 0.48883f, 0.47296f, 0.40285f, 0.35651f, 0.18949f, 0.29055f, 0.61037f, 0.46581f, 0.27488f, -0.01284f, -0.15280f, -0.17538f, -0.58741f, -0.57524f, -0.47580f, -0.21400f, -0.06589f, -0.10277f, -0.08980f, -0.12787f, -0.20503f, -0.26596f, -0.03148f, 0.32823f, 0.54433f, 0.48218f, 0.34929f, 0.02126f, -0.20263f, -0.49552f, -0.67534f, -0.62950f, -0.68360f, -0.35470f, -0.08379f, 0.14763f, 0.26742f, 0.29058f, 0.28133f, 0.20008f, 0.14494f, 0.41903f, 0.54173f, 0.52405f, 0.29799f, -0.07209f, -0.14607f, -0.46453f, -0.58305f, -0.57501f, -0.69478f, -0.46508f, -0.23234f, 0.07366f, 0.14385f, 0.05992f, -0.20362f, -0.39168f, -0.48250f, -0.07809f, 0.42712f, 0.71610f, 0.81517f, 0.82542f, 0.97280f, 0.77453f, 0.36336f, 0.04949f, -0.26119f, -0.45929f, -0.60788f, -0.69065f, -0.31942f, -0.13976f, 0.09962f, 0.01459f, -0.23469f, -0.43849f, -0.63786f, -0.63111f, -0.59381f, -0.51142f, -0.44169f, -0.36079f, 0.11435f, 0.52288f, 0.56995f, 0.72460f, 1.52536f, 1.96464f, 1.74206f, 0.39785f, 0.85132f, 0.47575f, -0.53417f, 0.15703f, 0.37793f, 0.68815f, 0.26998f, -0.36431f, -0.00991f, -1.89429f, -1.31357f, -0.37315f, -0.73792f, 0.22693f, -0.13869f, -0.04304f, 0.91745f, 0.28913f, 0.00824f, -0.09014f, -0.22067f, -0.39314f, -0.55479f,}; +// float ppg[] = { 0.46327f, 0.32949f, 0.07201f, -0.29688f, -0.62103f, -0.64516f, -0.27495f, 0.24132f, 0.54530f, 0.49617f, 0.18035f, -0.26311f, -0.72587f, -1.14389f, -1.46848f, -1.61454f, -1.46102f, -1.00265f, -0.46707f, -0.07844f, 0.23123f, 0.65189f, 1.15061f, 1.54846f, 1.77962f, 1.84234f, 1.68487f, 1.28177f, 0.72119f, 0.13912f, -0.37057f, -0.75262f, -0.92106f, -0.80702f, -0.53024f, -0.32013f, -0.19117f, 0.04964f, 0.44617f, 0.83831f, 1.10851f, 1.21904f, 1.13570f, 0.90103f, 0.59486f, 0.20316f, -0.28679f, -0.74780f, -1.03247f, -1.10836f, -1.04695f, -0.96712f, -0.96142f, -0.95659f, -0.78377f, -0.44865f, -0.14380f, 0.03078f, 0.13561f, 0.21939f, 0.30010f, 0.43695f, 0.63083f, 0.75716f, 0.72338f, 0.56635f, 0.33563f, 0.00841f, -0.39163f, -0.67148f, -0.58419f, -0.11046f, 0.53038f, 1.13658f, 1.57872f, 1.68092f, 1.36379f, 0.82866f, 0.33212f, -0.07932f, -0.46620f, -0.83991f, -1.20091f, -1.53734f, -1.77377f, -1.83781f, -1.73824f, -1.49787f, -1.13511f, -0.75526f, -0.49339f, -0.37189f, -0.32057f, -0.27758f, -0.18547f, -0.01659f, 0.15754f, 0.23167f, 0.19878f, 0.11763f, 0.00490f, -0.15038f, -0.30127f, -0.28197f, 0.12903f, 0.97034f, 1.93402f, 2.56521f, 2.62969f, 2.21650f, 1.58793f, 0.99797f, 0.55670f, 0.18079f, -0.25433f, -0.73201f, -1.14652f, -1.47374f, -1.73648f, -1.85492f, -1.69262f, -1.25530f, -0.75262f, -0.41400f, -0.28372f, -0.25390f, -0.23898f, -0.23591f, -0.24337f, -0.23372f, -0.20477f, -0.17889f, -0.16924f, -0.19249f, -0.29381f, -0.44251f, -0.44909f, -0.11573f, 0.54179f, 1.30633f, 1.97086f, 2.42310f, 2.54723f, 2.23185f, 1.54012f, 0.72251f, -0.00212f, -0.55436f, -0.94782f, -1.27811f, -1.66060f, -2.05800f, -2.24311f, -2.00405f, -1.36847f, -0.59866f, 0.01893f, 0.34660f, 0.38344f, 0.21808f, 0.01586f, -0.09994f, -0.15038f, -0.19249f, -0.21223f, -0.21924f, -0.30873f, -0.49646f, -0.61182f, -0.48769f, -0.15257f, 0.26633f, 0.70189f, 1.12386f, 1.46906f, 1.69321f, 1.80988f, 1.82217f, 1.65329f, 1.23659f, 0.63917f, -0.00124f, -0.61752f, -1.16319f, -1.48558f, -1.43953f, -1.09388f, -0.68244f, -0.39163f, -0.23328f, -0.11704f, 0.00753f, 0.09087f, 0.11280f, 0.12552f, 0.13561f, 0.09613f, -0.01616f, -0.26618f, -0.68815f, -0.99387f, -0.72192f, 0.13210f, 1.01376f, 1.45108f, 1.42871f, 1.11026f, 0.58521f, -0.03677f, -0.55261f, -0.79868f, -0.76710f, -0.55568f, -0.30302f, -0.15871f, -0.13897f, -0.00300f, 0.50143f, 1.26905f, 1.93840f, 2.29326f, 2.33712f, 2.12044f, 1.71207f, 1.24931f, 0.88787f, 0.68522f, 0.52337f, 0.20053f, -0.38417f, -1.12722f, -1.75578f, -2.00668f, -1.83562f, -1.42110f, -0.94255f, -0.41356f, 0.19527f, 0.57732f, 0.19132f, -0.83114f, -1.50488f, -1.29566f, -0.74561f, -0.39163f, -0.17714f, -0.09511f, -0.24337f, -0.29601f, 0.11807f, 0.73567f, 1.00543f, 0.84269f, 0.53345f, 0.17465f, -0.34645f,}; +// float ppgf[] = { 0.07078f, -0.15502f, -0.66182f, -1.03541f, -1.72459f, -2.15842f, -2.04216f, -1.59119f, -0.77417f, -0.31896f, 0.15163f, -0.35884f, -0.41444f, 0.05510f, -0.28951f, -0.02326f, 0.22316f, 0.57799f, 1.15132f, 0.64183f, 0.58965f, 0.36472f, 0.11188f, 0.34186f, 0.15807f, 0.81135f, 1.01608f, 0.55277f, 0.54450f, 0.32774f, -0.01952f, -0.34439f, -0.41799f, -0.20301f, -0.01791f, 0.10255f, 0.00523f, 0.16903f, 0.52932f, 0.74430f, 1.13924f, 1.16430f, 1.12622f, 1.05019f, 0.89185f, 0.66460f, 0.13308f, -0.38291f, -0.74298f, -0.85330f, -0.63218f, -0.41113f, -0.35886f, -0.58600f, -0.71572f, -0.55583f, -0.46928f, -0.25638f, -0.17320f, 0.10445f, 0.10632f, 0.07146f, 0.14051f, 0.36468f, 0.83668f, 0.72655f, 0.58951f, 0.32389f, 0.01752f, 0.01875f, -0.01817f, 0.16213f, 0.56257f, 0.83571f, 1.17299f, 0.98729f, 0.55425f, 0.11521f, -0.48786f, -0.80040f, -0.96317f, -1.00710f, -1.09683f, -1.34569f, -1.27919f, -1.35280f, -1.38652f, -1.40933f, -1.33732f, -0.97867f, -0.51023f, -0.24675f, 0.11606f, 0.27271f, 0.17864f, 0.07471f, 0.22955f, 0.39345f, 0.25381f, 0.01551f, 0.12852f, 0.21963f, 0.03517f, -0.04602f, 0.13576f, 0.85180f, 1.86460f, 2.35555f, 2.41968f, 1.97768f, 1.21910f, 0.33612f, -0.16080f, -0.24705f, -0.47261f, -0.80259f, -1.11296f, -1.18643f, -1.32334f, -1.51343f, -1.55018f, -1.32618f, -0.76524f, -0.35730f, -0.09294f, 0.08555f, 0.15369f, -0.05559f, -0.31866f, -0.56963f, -0.59618f, -0.50360f, -0.32950f, -0.15953f, -0.03563f, 0.19904f, 0.35076f, 0.82012f, 1.20897f, 1.51133f, 1.79555f, 1.93426f, 2.07427f, 1.82900f, 1.18362f, 0.53301f, -0.29267f, -1.16473f, -1.41363f, -1.55299f, -1.64776f, -1.90520f, -2.06773f, -1.41664f, -0.79323f, -0.12286f, 0.23294f, 0.41249f, 0.48621f, 0.30788f, 0.14374f, 0.10509f, 0.11559f, -0.16101f, -0.54046f, -0.76358f, -0.79090f, -0.84575f, -0.63308f, -0.28506f, 0.34295f, 0.94167f, 1.33139f, 1.80746f, 1.82377f, 1.77700f, 1.66225f, 1.55474f, 1.36271f, 0.95526f, 0.43909f, -0.14618f, -1.03655f, -1.70492f, -2.00963f, -1.73752f, -1.02179f, -0.53637f, 0.07291f, 0.34977f, 0.45797f, 0.70231f, 0.55596f, 0.34514f, 0.05186f, -0.00824f, 0.03621f, 0.18746f, 0.12550f, -0.20564f, -0.91578f, -1.14904f, -0.58400f, 0.19859f, 0.62565f, 0.45591f, 0.33573f, 0.22186f, -0.08626f, -0.29142f, -0.33939f, -0.15922f, 0.13497f, 0.01640f, -0.01895f, -0.23859f, -0.01758f, 0.73613f, 1.70753f, 2.57627f, 2.92437f, 2.93094f, 2.63186f, 2.15376f, 1.61009f, 0.77352f, 0.16235f, -0.04659f, -0.52407f, -1.90953f, -3.09186f, -3.49785f, -2.40454f, -2.68693f, -1.89685f, -0.40838f, -0.57059f, -0.18267f, -0.11083f, -0.07867f, -0.46683f, -1.49498f, 0.59864f, 0.56797f, -0.01848f, 0.56078f, -0.32204f, -0.10468f, -0.25296f, -0.79938f, 0.44654f, 0.99719f, 0.93283f, 0.75412f, 0.56779f, 0.20835f,}; +// int32_t xin_fxp[] = { -10930188, -8094251, -4313003, -1477066, 2304182, 4194807, 413558, -6203627, -8094251, -16602062, -22273936, -19437998, -21328624, -24164560, -27000496, -23219248, -19437998, -18492686, 9866681, 12702617, 7976056, 3249495, -1477066, 413558, 5140119, 5140119, 4194807, 4194807, 4194807, -1477066, -3367690, -6203627, -6203627, -6203627, -7148939, -6203627, -7148939, -6203627, -1477066, 2304182, 4194807, 5140119, 7976056, 8921368, 8921368, 7030744, 4194807, 1358870, 413558, 413558, 4194807, 4194807, 6085432, 7030744, 7030744, 5140119, 413558, -531753, 413558, 5140119, 7030744, 8921368, 7976056, 6085432, 5140119, 3249495, 2304182, 4194807, 4194807, 3249495, 2304182, 2304182, 6085432, 5140119, 4194807, 4194807, 3249495, 413558, 413558, 3249495, 3249495, -531753, -1477066, -1477066, -531753, -1477066, -4313003, -6203627, -8094251, -6203627, -5258315, -5258315, 1358870, 7030744, 8921368, 10811993, 9866681, 5140119, 3249495, 2304182, 1358870, 3249495, 4194807, 4194807, 3249495, 2304182, 3249495, 4194807, 5140119, 4194807, 4194807, 5140119, 5140119, 1358870, -531753, -531753, -531753, 413558, 413558, -1477066, -3367690, -5258315, -6203627, -5258315, -1477066, 1358870, 5140119, 5140119, 6085432, 4194807, 4194807, 2304182, 2304182, 2304182, 3249495, 4194807, 4194807, 3249495, 3249495, 3249495, 2304182, 1358870, 413558, 2304182, 4194807, 5140119, 6085432, 5140119, 4194807, -531753, -2422378, -1477066, -531753, 413558, -531753, -1477066, -3367690, -5258315, -5258315, -3367690, -531753, 2304182, 6085432, 7976056, 8921368, 6085432, 4194807, 3249495, 2304182, 2304182, 2304182, 3249495, 3249495, 2304182, -531753, 413558, 2304182, 3249495, 5140119, 5140119, 2304182, 3249495, 4194807, 5140119, 3249495, -531753, -1477066, 413558, 2304182, 3249495, 1358870, -531753, -2422378, -2422378, -3367690, -3367690, -1477066, 1358870, 3249495, 7976056, 9866681, 9866681, 7976056, 2304182, 413558, -531753, 1358870, 2304182, 413558, -2422378, -2422378, -2422378, -1477066, -2422378, -6203627, -7148939, -7148939, -4313003, -1477066, 3249495, 5140119, 6085432, 7030744, 7976056, 8921368, 8921368, 7976056, 7976056, 4194807, 3249495, 413558, 1358870, 2304182, -3367690, -9984876, -17547374, -15656750, -2422378, -22273936, -81828608, -22273936, -9039564, -531753, 1358870, 5140119, 3249495, 413558, 2304182, 413558, -4313003, -5258315, -4313003, -4313003, -7148939, -9984876, -9984876, 2845080, 7791423, 7791423, 5318293, -2101178, -6429281, -13848753, -17558448, -17558448, -15085318, -13848753, -16321882, -16940122, -8902410, -3337827, 8409663, 31286404, 28194950, 9027988, 9027988, 7173098, 5318293, -1482938, -5192632, -11375540, -13848753, -14466993, -12612104, -10757299, -4574392, -2101178, 4081645, 9646312, 11501117, 13974330, 18920590, 14592571, 9027988, 5936534, 3463404, 1608515, -1482938, -5192632, -13230429, -15085318, -13848753, -10757299, -4574392, -1482938, 4699969, 9027988, 11501117, 13356006, 12737766, 14592571, 12119441, 4081645, 2226840, 1608515, 7173098, 4699969, -2101178, -5192632, -6429281, -7665845, -10138975, -8284086, -5192632, -246373, 2226840, 9027988, 10264552, 15210895, 12119441, 371950, -864613, 2845080, -246373, -2101178, -5192632, -4574392, -3956067, -2719502, -2101178, -246373, 4081645, 7173098, 8409663, 11501117, 371950, -5810956, -2101178, 4699969, 2226840, 371950, 2845080, 2226840, -2719502, -2719502, -2101178, -1482938, 3463404, 6554858, 7791423, 12119441, 9646312, -864613, -2719502, 1608515, 2226840, -246373, -4574392, -6429281, -6429281, -6429281, -5810956, -4574392, 371950, 1608515, 5318293, 5936534, 9027988, 7173098, 1608515, 990191, 2845080, 5318293, 4699969, 990191, -1482938, -3956067, -3956067, -5192632, -5810956, -3956067, -1482938, 371950, 8409663, 10264552, 12737766, 12737766, 9027988, -2719502, -3956067, -246373, -2719502, -4574392, -8284086, -9520651, -8284086, -8284086, -5810956, -3337827, 2226840, 5936534, 5936534, 7791423, 13356006, 3463404, 990191, 2845080, 3463404, 4699969, 371950, -1482938, -3337827, -5192632, -8902410, -8902410, -6429281, -4574392, 990191, 7173098, 9027988, 12737766, 16447460, 12737766, 5318293, -4574392, 990191, 990191, -3337827, -5192632, -7665845, -8284086, -8902410, -8284086, -3956067, -1482938, 371950, 4699969, 5936534, 7791423, 10882877, 14592571, 8409663, -5810956, -1482938, 990191, 2845080, 371950, -4574392, -7047521, -8902410, -10757299, -15085318, -13230429, -10757299, -4574392, 371950, 5318293, 6554858, 12119441, 13974330, 18302348, 17065784, 12119441, 2845080, 4081645, 5318293, 3463404, -2719502, -3956067, -3337827, -7047521, -8902410, -11375540, -11993864, -12612104, -13230429, -11993864, -10757299, -10138975, -7047521, -5810956, -5810956, -5810956, -1482938, 4081645, 12737766, 8409663, 4699969, -246373, 2845080, 2845080, 2845080, -864613, -4574392, -10138975, -12612104, -11993864, -11993864, -11375540, -8902410, -7047521, 371950, 9264966, 10398686, 5863637, 3596112, -938936, -2072657, -5473986, -6607706, -8875231, -10009035, -7741511, -2072657, -2072657, 4729916, 8131161, 8131161, 2462392, 13800015, 18335064, 5863637, -4340182, -7741511, -11142756, -10009035, -5473986, -2072657, -2072657, -3206461, -5473986, -11142756, -11142756, -10009035, -11142756, -13410280, -17945330, -19079134, -17945330, -11142756, -7741511, -11142756, -6607706, -3206461, -3206461, -2072657, -938936, 194867, 1328587, 6997441, 10398686, 16067540, 20602590, 24003834, 28538968, 28538968, 21736310, 20602590, 13800015, 10398686, 3596112, 194867, -938936, -4340182, -3206461, -2072657, -4340182, -7741511, -7741511, -5473986, -938936, 1328587, 5863637, 5863637, -938936, -938936, 5863637, 5863637, 1328587, 2462392, 1328587, 1328587, 2462392, 3596112, 2462392, 1328587, 194867, 1328587, 2462392, 1328587, 4729916, 5863637, 4729916, 4729916, 3596112, 194867, -938936, 1328587, 1328587, -2072657, -4340182, -7741511, -8875231, -6607706, -5473986, -4340182, -4340182, -3206461, 2462392, 2462392, 3596112, 2462392, 1328587, 194867, 1328587, 1328587, 194867, -4340182, -4340182, -938936, 194867, 1328587, 1328587, 1328587, 9264966, 6997441, 3596112, 2462392, 194867, 194867, -2072657, -3206461, -3206461, -5473986, -6607706, -10009035, -7741511, -5473986, -4340182, -2072657, -2072657, -2072657, -938936, 2462392, 9264966, 6997441, 5863637, 4729916, 3596112, 2462392, 2462392, 194867, 194867, -2072657, -938936, 2462392, 3596112, 4729916, 3596112, 5863637, 10398686, 8131161, 5863637, 4729916, 2462392, -2072657, -3206461, -3206461, -3206461, -7741511, -8875231, -11142756, -11142756, -7741511, -4340182, -3206461, -938936, 194867, 194867, 2462392, 6997441, 3596112, 2462392, 1328587, 1328587, 1328587, 1328587, 1328587, 194867, 194867, 2462392, 3596112, 4729916, 4729916, 4729916, 4729916, 9264966, 11532491, 4729916, 3596112, 3596112, 194867, -2072657, -4340182, -5473986, -5473986, -7741511, -12276560, -12276560, -11142756, -7741511, -6607706, -6607706, -5473986, -2072657, -938936, 194867, -4340182, -4340182, 2462392, 2462392, 3596112, 2462392, -2072657, -2072657, -2072657, -2072657, -3206461, -3206461, -4340182, -3206461, -2072657, 1328587, 5863637, 8131161, 9264966, 6997441, 6997441, 6997441, 10398686, 18335064, 60284312, 14933736, 2462392, -12276560, -16811610, -13410280, -13410280, -8875231, -7741511, -11142756, -10009035, -8875231, -8875231, -11142756, -10009035, -7741511, -6607706,}; +// int32_t xout_fxp[] = { 3292444, 4064364, 6155812, 6195155, 9257332, 12694145, 14824432, 15372208, 11068433, 6837806, 240920, 803041, -2612380, -10057857, -9889917, -13348624, -14127926, -13259369, -13576039, -6042062, -3006561, 2408956, 8713583, 10121694, 13602547, 8648571, 5610217, 6115295, 1482183, -1582259, -2944820, -3424565, -4220057, -5066719, -4297735, -3545696, -1647522, -1001515, -697596, 788613, -257781, 459192, 79524, -1251244, -2491332, -3870839, -3522208, -3060919, -2428502, -2139598, -3479343, -4663982, -5054556, -3108734, -570760, 899091, 2730324, 2408872, 2590486, 964186, 1625544, 3065952, 4113102, 3292360, -950429, -1343771, -2129699, -2646522, -3432199, -5790069, -4748120, -2286650, -269945, 2523880, 3403510, 5818674, 6790913, 5984852, 6878491, 6048857, 4168970, 1402491, -873086, -1607760, -4148837, -4068558, -2950441, -742727, 1696260, 1874098, 141264, -1049750, -3662718, -4616167, -3054376, -765963, -604063, -1357109, -461708, 856644, -1036999, -3103868, -2822263, -1979292, -56371, 994301, 582337, 1758839, 1761691, 2003367, 3093970, 5551916, 6018826, 3588982, 1830981, 592067, -281521, -2410131, -3465669, -2864625, -1194873, 594584, 105864, -475550, -1600378, -2847429, -3294038, -1512633, 631578, 2817817, 3283385, 2723865, 1344358, -276404, -2165770, -5381711, -6709628, -7850395, -5596711, -1719664, 1470606, 4100603, 3967476, 3379350, 2990622, 1589557, 2437310, 5120154, 3907497, 2305860, -107709, -1281779, -1471194, -4927552, -4825463, -3991299, -1795162, -552725, -862097, -753297, -1072651, -1719916, -2231034, -264073, 2753392, 4566171, 4044819, 2930057, 178341, -1699783, -4156723, -5665162, -5280628, -5734452, -2975439, -702881, 1238410, 2243281, 2437561, 2359967, 1678392, 1215844, 3515078, 4544360, 4396050, 2499721, -604734, -1225324, -3896760, -4890978, -4823533, -5828237, -3901373, -1949009, 617904, 1206701, 502645, -1708088, -3285650, -4047503, -655066, 3582942, 6007082, 6838141, 6924125, 8160438, 6497228, 3048084, 415152, -2191020, -3852803, -5099267, -5793592, -2679489, -1172391, 835673, 122389, -1968722, -3678320, -5350757, -5294134, -4981239, -4290102, -3705164, -3026526, 959237, 4386235, 4781087, 6078385, 12795647, 16480595, 14613458, 3337407, 7141390, 3990880, -4480942, 1317263, 3170306, 5772620, 2264756, -3056053, -83131, -15890456, -11019024, -3130209, -6190121, 1903626, -1163416, -361045, 7696128, 2425398, 69122, -756149, -1851114, -3297897, -4653916,}; float result[256]; float layer1Output[3*256]; -fxp32* weights1Fxp[63]; -fxp32* weights2Fxp[3]; +// fxp32* weights1Fxp[63]; +// fxp32* weights2Fxp[3]; -fxp32 layer1OutputFxp[3*256]; +// fxp32 layer1OutputFxp[3*256]; -fxp32 resultFxp[256]; +// fxp32 resultFxp[256]; #endif // clang-format on @@ -58,46 +59,54 @@ int main() { layer1.dim = (Dim2D){3u, 21u}; layer1.padding = SAME; layer1.weightsFloat = weights1; - layer1.weightsFxp = weights1Fxp; + // layer1.weightsFxp = weights1Fxp; Conv2DLayer layer2; layer2.dim = (Dim2D){3u, 1u}; layer2.padding = VALID; layer2.weightsFloat = weights2; - layer2.weightsFxp = weights2Fxp; + // layer2.weightsFxp = weights2Fxp; Cnn cnn; cnn.layer1 = &layer1; cnn.layer2 = &layer2; cnn.inputDim = (Dim2D){3u, 256u}; cnn.outputDim = (Dim2D){1u, 256u}; - cnn.layer1Output = layer1Output; - cnn.layer1OutputFxp = layer1OutputFxp; + // cnn.layer1Output = layer1Output; + // cnn.layer1OutputFxp = layer1OutputFxp; // Forward pass printf("Forward pass\n"); - Cnn_forwardFloat(&cnn, xin, result); + start_time(); + Conv2DLayer_forwardFloat(&layer1, cnn.inputDim, xin, layer1Output); + stop_time(); + printf("ticks: %d\n", get_time()); - printf("Comparing results\n"); - compareVectorsFloat(result, xout, 256, COMP_PREC); - printf("Test passed\n"); + start_time(); + Conv2DLayer_forwardFloat(&layer2, cnn.inputDim, layer1Output, result); + stop_time(); + printf("ticks: %d\n", get_time()); - printf("Test predict method\n"); - Cnn_predictFloat(&cnn, xin, ppg, result); + // printf("Comparing results\n"); + // compareVectorsFloat(result, xout, 256, COMP_PREC); + // printf("Test passed\n"); - compareVectorsFloat(result, ppgf, 256, COMP_PREC); - printf("Test passed\n"); + // printf("Test predict method\n"); + // Cnn_predictFloat(&cnn, xin, ppg, result); - // Freeze model - printf("Freezing model\n"); - Cnn_freezeModel(&cnn); + // compareVectorsFloat(result, ppgf, 256, COMP_PREC); + // printf("Test passed\n"); - printf("Running cnn test with fixed point\n"); - Cnn_forwardFxp(&cnn, xin_fxp, resultFxp); + // // Freeze model + // printf("Freezing model\n"); + // Cnn_freezeModel(&cnn); - printf("Comparing results\n"); - compareVectorsFxp(resultFxp, xout_fxp, 256, COMP_PREC_I32); - printf("Test passed\n"); + // printf("Running cnn test with fixed point\n"); + // Cnn_forwardFxp(&cnn, xin_fxp, resultFxp); + + // printf("Comparing results\n"); + // compareVectorsFxp(resultFxp, xout_fxp, 256, COMP_PREC_I32); + // printf("Test passed\n"); printf("CNN test finished\n"); From f1e74868ae4a2bb554b2aeabc220d4b101a168b0 Mon Sep 17 00:00:00 2001 From: LinusCrugnola Date: Fri, 7 Jun 2024 14:45:02 +0200 Subject: [PATCH 27/27] finish --- mcu_cfg.hjson | 4 +- sw/applications/l_train/cnn.c | 65 +++----- sw/applications/l_train/config-cnn.h | 2 +- sw/applications/l_train/core_portme.c | 106 +++++++++++++ sw/applications/l_train/core_portme.h | 93 +++++++++++ sw/applications/l_train/coremark.h | 212 ++++++++++++++++++++++++++ sw/applications/l_train/main.c | 2 +- 7 files changed, 436 insertions(+), 48 deletions(-) create mode 100644 sw/applications/l_train/core_portme.c create mode 100644 sw/applications/l_train/core_portme.h create mode 100644 sw/applications/l_train/coremark.h diff --git a/mcu_cfg.hjson b/mcu_cfg.hjson index 9a780c8c9..8174d2452 100644 --- a/mcu_cfg.hjson +++ b/mcu_cfg.hjson @@ -8,8 +8,8 @@ cpu_type: cv32e20 linker_script: { - stack_size: 0xC00, - heap_size: 0x800, + stack_size: 0x1600, + heap_size: 0x200, } debug: { diff --git a/sw/applications/l_train/cnn.c b/sw/applications/l_train/cnn.c index 3942d87a2..46a4cb0b6 100644 --- a/sw/applications/l_train/cnn.c +++ b/sw/applications/l_train/cnn.c @@ -14,6 +14,8 @@ #include "cnn.h" +#include "coremark.h" + #include #include // clang-format on @@ -22,7 +24,7 @@ static complex_t ppgfft[256]; static complex_t outputfft[256]; -static float output[256]; +static float forwardPass[256]; static float layer1Output[3 * 256]; static complex_t gradGToW2[256]; @@ -30,12 +32,8 @@ static float gradLToM[3 * 256]; static float w1Velocity[3 * 21] = {0.0f}; static float w2Velocity[3] = {0.0f}; -// #ifndef DYN_ALLOCATION -// static fxp32 layer1OutputFxp[3 * 256]; -// #endif - -#ifdef TRACK_LOSS -static float absV[256]; +#ifndef DYN_ALLOCATION +static fxp32 layer1OutputFxp[3 * 256]; #endif #ifdef DYN_ALLOCATION @@ -69,8 +67,8 @@ void Cnn_forwardFxp(CnnHandle self, fxp32* input, fxp32* output) { Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1Output, output); free(layer1Output); #else - // Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1OutputFxp); - // Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1OutputFxp, output); + Conv2DLayer_forwardFxp(self->layer1, self->inputDim, input, layer1OutputFxp); + Conv2DLayer_forwardFxp(self->layer2, self->inputDim, layer1OutputFxp, output); #endif } @@ -95,22 +93,13 @@ void Cnn_predictFloat(CnnHandle self, float* acc, float* ppg, float* output) { float Cnn_sampleLoss(CnnHandle self, complex_t* ypredfft, complex_t* ytruefft) { float loss = 0.0f; - // NOTE: could be optimized by reusing .r and .i for the abs -#ifdef DYN_ALLOCATION - fxpMul* absV = (fxpMul*)calloc(self->outputDim.y * self->outputDim.x, sizeof(fxpMul)); -#elif !defined(TRACK_LOSS) - fxpMul absV[self->outputDim.y * self->outputDim.x]; -#endif float ytr, ypr, yti, ypi; for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { ytr = fxp32_toFloat(ytruefft[i].r); ypr = fxp32_toFloat(ypredfft[i].r); yti = fxp32_toFloat(ytruefft[i].i); ypi = fxp32_toFloat(ypredfft[i].i); - absV[i] = (ytr - ypr) * (ytr - ypr) + (yti - ypi) * (yti - ypi); - } - for (int i = 0; i < self->outputDim.y * self->outputDim.x; ++i) { - loss += absV[i]; + loss += (ytr - ypr) * (ytr - ypr) + (yti - ypi) * (yti - ypi); } return loss; } @@ -169,14 +158,14 @@ void Cnn_sgdStep(CnnHandle self, float* acc, float* ppg) { // get ppg-output in time domain and store in output for (int i = 0; i < self->outputDim.y * self->outputDim.x; i++) { - output[i] = ppg[i] - output[i]; + forwardPass[i] = ppg[i] - forwardPass[i]; } // fill the values in the dL/dm matrix for (int i = 0; i < self->inputDim.x; i++) { float factor = -512 * self->layer2->weightsFloat[i]; for (int j = 0; j < self->inputDim.y; j++) { - gradLToM[i * self->inputDim.y + j] = output[j] * factor; + gradLToM[i * self->inputDim.y + j] = forwardPass[j] * factor; } } @@ -219,33 +208,21 @@ void Cnn_train(CnnHandle self, float* acc, float* ppg, int nEpochs, bool logAllL // FFT of the ppg arrToComplex(ppg, ppgfft, self->outputDim.y * self->outputDim.x, 0); fft(ppgfft, self->fftBits); - // array to track loss -#ifdef TRACK_LOSS - // float lossArr[nEpochs]; -#endif for (int i = 0; i < nEpochs; ++i) { - Cnn_forwardFloat(self, acc, output); - arrToComplex(output, outputfft, self->outputDim.y * self->outputDim.x, 0); + start_time(); + Cnn_forwardFloat(self, acc, forwardPass); + arrToComplex(forwardPass, outputfft, self->outputDim.y * self->outputDim.x, 0); fft(outputfft, self->fftBits); - if (i == 0 || i == nEpochs - 1) { - printf("Loss epoch %d: %d\n", i, (int)(Cnn_sampleLoss(self, outputfft, ppgfft))); - } -#ifdef TRACK_LOSS - lossArr[i] = Cnn_sampleLoss(self, outputfft, ppgfft); -#endif +// #ifdef TRACK_LOSS +// if (logAllLosses) +// printf("loss epoch %d: %d\n", i, (int)Cnn_sampleLoss(self, outputfft, ppgfft)); +// else if (i == 0 || i == nEpochs - 1) +// printf("loss epoch %d: %d\n", i + 1, (int)Cnn_sampleLoss(self, outputfft, ppgfft)); +// #endif Cnn_sgdStep(self, acc, ppg); + stop_time(); + printf("Time epoch %d: %d\n", i+1, get_time()); } -#ifdef TRACK_LOSS - if (logAllLosses) { - for (int i = 0; i < nEpochs; ++i) { - printf("Loss epoch %d: %f\n", i, lossArr[i]); - } - } - else { - printf("First loss: %f\n", lossArr[0]); - printf("Final loss: %f\n", lossArr[nEpochs - 1]); - } -#endif } void Cnn_freezeModel(CnnHandle self) { diff --git a/sw/applications/l_train/config-cnn.h b/sw/applications/l_train/config-cnn.h index 451fa3056..0b2748af1 100644 --- a/sw/applications/l_train/config-cnn.h +++ b/sw/applications/l_train/config-cnn.h @@ -21,7 +21,7 @@ * @def TRACK_LOSS * @brief define this macro to enable tracking of the loss */ -// #define TRACK_LOSS +#define TRACK_LOSS /** * @def LOG_GRADIENTS diff --git a/sw/applications/l_train/core_portme.c b/sw/applications/l_train/core_portme.c new file mode 100644 index 000000000..28279a9e0 --- /dev/null +++ b/sw/applications/l_train/core_portme.c @@ -0,0 +1,106 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include "csr.h" +#include "x-heep.h" + +#include "coremark.h" + +#define ITERATIONS 1 + +ee_u32 default_num_contexts = 1; + +static CORETIMETYPE start_time_val, stop_time_val; + +#if VALIDATION_RUN +volatile ee_s32 seed1_volatile = 0x3415; +volatile ee_s32 seed2_volatile = 0x3415; +volatile ee_s32 seed3_volatile = 0x66; +#endif +#if PERFORMANCE_RUN +volatile ee_s32 seed1_volatile = 0x0; +volatile ee_s32 seed2_volatile = 0x0; +volatile ee_s32 seed3_volatile = 0x66; +#endif +#if PROFILE_RUN +volatile ee_s32 seed1_volatile = 0x8; +volatile ee_s32 seed2_volatile = 0x8; +volatile ee_s32 seed3_volatile = 0x8; +#endif +volatile ee_s32 seed4_volatile = ITERATIONS; +volatile ee_s32 seed5_volatile = 0; + +void +portable_init(core_portable *p, int *argc, char *argv[]) +{ + // Don't need to do anything here atm. + (void)p; + (void)argc; + (void)argv; +} + +void +portable_fini(core_portable *p) +{ + // Don't need to do anything here atm. + (void)p; +} + +void +start_time(void) +{ + // Enable mcycle counter and read value + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + + CSR_READ(CSR_REG_MCYCLE, &start_time_val); +} + +void +stop_time(void) +{ + CSR_READ(CSR_REG_MCYCLE, &stop_time_val); +} + +CORE_TICKS +get_time(void) +{ + return (stop_time_val - start_time_val); +} + +secs_ret +time_in_secs(CORE_TICKS ticks) +{ + return ticks*1E-6; // Normalized to 1 MHz clock period +} \ No newline at end of file diff --git a/sw/applications/l_train/core_portme.h b/sw/applications/l_train/core_portme.h new file mode 100644 index 000000000..762bbdcdc --- /dev/null +++ b/sw/applications/l_train/core_portme.h @@ -0,0 +1,93 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include +#include + +typedef signed short ee_s16; +typedef unsigned short ee_u16; +typedef signed int ee_s32; +typedef double ee_f32; +typedef unsigned char ee_u8; +typedef unsigned int ee_u32; +typedef ee_u32 ee_ptr_int; +typedef size_t ee_size_t; + +typedef ee_u32 CORE_TICKS; + +typedef struct CORE_PORTABLE_S +{ + ee_u8 portable_id; +} core_portable; + +#ifndef MULTITHREAD +#define MULTITHREAD 1 // 1 means single-core +#define USE_PTHREAD 0 +#define USE_FORK 0 +#define USE_SOCKET 0 +#endif + +#ifndef COMPILER_VERSION +#ifdef __GNUC__ +#define COMPILER_VERSION "GCC"__VERSION__ +#else +#define COMPILER_VERSION "Undefined non-gcc compiler used" +#endif +#endif + +#ifndef COMPILER_FLAGS +#define COMPILER_FLAGS FLAGS_STR +#endif + +#ifndef MEM_LOCATION +#define MEM_LOCATION "" +#endif + +#ifndef SEED_METHOD +#define SEED_METHOD SEED_VOLATILE +#endif + +#ifndef HAS_PRINTF +#define HAS_PRINTF 1 +#endif + +#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3)) + +#define CORETIMETYPE ee_u32 + +extern ee_u32 default_num_contexts; + +void portable_init(core_portable *p, int *argc, char *argv[]); +void portable_fini(core_portable *p); \ No newline at end of file diff --git a/sw/applications/l_train/coremark.h b/sw/applications/l_train/coremark.h new file mode 100644 index 000000000..489c92574 --- /dev/null +++ b/sw/applications/l_train/coremark.h @@ -0,0 +1,212 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +/* Topic: Description + This file contains declarations of the various benchmark functions. +*/ + +/* Configuration: TOTAL_DATA_SIZE + Define total size for data algorithms will operate on +*/ +#ifndef TOTAL_DATA_SIZE +#define TOTAL_DATA_SIZE 2 * 1000 +#endif + +#define SEED_ARG 0 +#define SEED_FUNC 1 +#define SEED_VOLATILE 2 + +#define MEM_STATIC 0 +#define MEM_MALLOC 1 +#define MEM_STACK 2 + +#include "core_portme.h" + +#if HAS_STDIO +#include +#endif +#if HAS_PRINTF +/* By default, printfs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 + +#if TARGET_SIM && PRINTF_IN_SIM + #define ee_printf(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA + #define ee_printf(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define ee_printf printf +#endif + +#endif + +/* Actual benchmark execution in iterate */ +void *iterate(void *pres); + +/* Typedef: secs_ret + For machines that have floating point support, get number of seconds as + a double. Otherwise an unsigned int. +*/ +#if HAS_FLOAT +typedef double secs_ret; +#else +typedef ee_u32 secs_ret; +#endif + +#if MAIN_HAS_NORETURN +#define MAIN_RETURN_VAL +#define MAIN_RETURN_TYPE void +#else +#define MAIN_RETURN_VAL 0 +#define MAIN_RETURN_TYPE int +#endif + +void start_time(void); +void stop_time(void); +void reset_time(); +CORE_TICKS get_time(void); +secs_ret time_in_secs(CORE_TICKS ticks); + +/* Misc useful functions */ +ee_u16 crcu8(ee_u8 data, ee_u16 crc); +ee_u16 crc16(ee_s16 newval, ee_u16 crc); +ee_u16 crcu16(ee_u16 newval, ee_u16 crc); +ee_u16 crcu32(ee_u32 newval, ee_u16 crc); +ee_u8 check_data_types(void); +void * portable_malloc(ee_size_t size); +void portable_free(void *p); +ee_s32 parseval(char *valstring); + +/* Algorithm IDS */ +#define ID_LIST (1 << 0) +#define ID_MATRIX (1 << 1) +#define ID_STATE (1 << 2) +#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE) +#define NUM_ALGORITHMS 3 + +/* list data structures */ +typedef struct list_data_s +{ + ee_s16 data16; + ee_s16 idx; +} list_data; + +typedef struct list_head_s +{ + struct list_head_s *next; + struct list_data_s *info; +} list_head; + +/*matrix benchmark related stuff */ +#define MATDAT_INT 1 +#if MATDAT_INT +typedef ee_s16 MATDAT; +typedef ee_s32 MATRES; +#else +typedef ee_f16 MATDAT; +typedef ee_f32 MATRES; +#endif + +typedef struct MAT_PARAMS_S +{ + int N; + MATDAT *A; + MATDAT *B; + MATRES *C; +} mat_params; + +/* state machine related stuff */ +/* List of all the possible states for the FSM */ +typedef enum CORE_STATE +{ + CORE_START = 0, + CORE_INVALID, + CORE_S1, + CORE_S2, + CORE_INT, + CORE_FLOAT, + CORE_EXPONENT, + CORE_SCIENTIFIC, + NUM_CORE_STATES +} core_state_e; + +/* Helper structure to hold results */ +typedef struct RESULTS_S +{ + /* inputs */ + ee_s16 seed1; /* Initializing seed */ + ee_s16 seed2; /* Initializing seed */ + ee_s16 seed3; /* Initializing seed */ + void * memblock[4]; /* Pointer to safe memory location */ + ee_u32 size; /* Size of the data */ + ee_u32 iterations; /* Number of iterations to execute */ + ee_u32 execs; /* Bitmask of operations to execute */ + struct list_head_s *list; + mat_params mat; + /* outputs */ + ee_u16 crc; + ee_u16 crclist; + ee_u16 crcmatrix; + ee_u16 crcstate; + ee_s16 err; + /* ultithread specific */ + core_portable port; +} core_results; + +/* Multicore execution handling */ +#if (MULTITHREAD > 1) +ee_u8 core_start_parallel(core_results *res); +ee_u8 core_stop_parallel(core_results *res); +#endif + +/* list benchmark functions */ +list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed); +ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx); + +/* state benchmark functions */ +void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p); +ee_u16 core_bench_state(ee_u32 blksize, + ee_u8 *memblock, + ee_s16 seed1, + ee_s16 seed2, + ee_s16 step, + ee_u16 crc); + +/* matrix benchmark functions */ +ee_u32 core_init_matrix(ee_u32 blksize, + void * memblk, + ee_s32 seed, + mat_params *p); +ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc); \ No newline at end of file diff --git a/sw/applications/l_train/main.c b/sw/applications/l_train/main.c index 7865dc611..353a052c5 100644 --- a/sw/applications/l_train/main.c +++ b/sw/applications/l_train/main.c @@ -61,7 +61,7 @@ void test_lossTrack1() { cnn.momentum = 1e-2f; // Forward pass - Cnn_train(&cnn, xin_0, ppg_0, 1000, false); + Cnn_train(&cnn, xin_0, ppg_0, 100, true); } int main() {