From 49787fff359779ee0e0582b74ee375fd1d63c724 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 22 Dec 2024 08:48:21 +0100 Subject: [PATCH 1/9] =?UTF-8?q?=E2=9A=A0=EF=B8=8F=20[sysinfo]=20remove=20c?= =?UTF-8?q?lock-gating=20flag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/datasheet/soc_sysinfo.adoc | 2 +- rtl/core/neorv32_sysinfo.vhd | 3 +-- sw/lib/include/neorv32_sysinfo.h | 2 +- sw/svd/neorv32.svd | 1 - 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/datasheet/soc_sysinfo.adoc b/docs/datasheet/soc_sysinfo.adoc index 0c25a271a..f104b18e5 100644 --- a/docs/datasheet/soc_sysinfo.adoc +++ b/docs/datasheet/soc_sysinfo.adoc @@ -80,7 +80,7 @@ Bit fields in this register are set to all-zero if the according memory system i | `4` | `SYSINFO_SOC_OCD` | set if on-chip debugger is implemented (via top's `OCD_EN` generic) | `5` | `SYSINFO_SOC_ICACHE` | set if processor-internal instruction cache is implemented (via top's `ICACHE_EN` generic) | `6` | `SYSINFO_SOC_DCACHE` | set if processor-internal data cache is implemented (via top's `DCACHE_EN` generic) -| `7` | `SYSINFO_SOC_CLOCK_GATING` | set if CPU clock gating is implemented (via top's `CLOCK_GATING_EN` generic) +| `7` | - |_reserved_, read as zero | `8` | `SYSINFO_SOC_XBUS_CACHE` | set if external bus interface cache is implemented (via top's `XBUS_CACHE_EN` generic) | `9` | `SYSINFO_SOC_XIP` | set if XIP module is implemented (via top's `XIP_EN` generic) | `10` | `SYSINFO_SOC_XIP_CACHE` | set if XIP cache is implemented (via top's `XIP_CACHE_EN` generic) diff --git a/rtl/core/neorv32_sysinfo.vhd b/rtl/core/neorv32_sysinfo.vhd index 0272d4c8d..59fd3adf5 100644 --- a/rtl/core/neorv32_sysinfo.vhd +++ b/rtl/core/neorv32_sysinfo.vhd @@ -18,7 +18,6 @@ use neorv32.neorv32_package.all; entity neorv32_sysinfo is generic ( CLOCK_FREQUENCY : natural; -- clock frequency of clk_i in Hz - CLOCK_GATING_EN : boolean; -- enable clock gating when in sleep mode BOOT_MODE_SELECT : natural; -- boot configuration select (default = 0 = bootloader) INT_BOOTLOADER_EN : boolean; -- boot configuration: true = boot explicit bootloader; false = boot from int/ext (I)MEM MEM_INT_IMEM_EN : boolean; -- implement processor-internal instruction memory @@ -118,7 +117,7 @@ begin sysinfo(2)(4) <= '1' when OCD_EN else '0'; -- on-chip debugger implemented? sysinfo(2)(5) <= '1' when ICACHE_EN else '0'; -- processor-internal instruction cache implemented? sysinfo(2)(6) <= '1' when DCACHE_EN else '0'; -- processor-internal data cache implemented? - sysinfo(2)(7) <= '1' when CLOCK_GATING_EN else '0'; -- enable clock gating when in sleep mode + sysinfo(2)(7) <= '0'; -- reserved sysinfo(2)(8) <= '1' when xcache_en_c else '0'; -- external bus interface cache implemented? sysinfo(2)(9) <= '1' when XIP_EN else '0'; -- execute in-place module implemented? sysinfo(2)(10) <= '1' when xip_cache_en_c else '0'; -- execute in-place cache implemented? diff --git a/sw/lib/include/neorv32_sysinfo.h b/sw/lib/include/neorv32_sysinfo.h index ae7093b5b..2da7dbcf4 100644 --- a/sw/lib/include/neorv32_sysinfo.h +++ b/sw/lib/include/neorv32_sysinfo.h @@ -51,7 +51,7 @@ enum NEORV32_SYSINFO_SOC_enum { SYSINFO_SOC_OCD = 4, /**< SYSINFO_SOC (4) (r/-): On-chip debugger implemented when 1 (via OCD_EN generic) */ SYSINFO_SOC_ICACHE = 5, /**< SYSINFO_SOC (5) (r/-): Processor-internal instruction cache implemented when 1 (via ICACHE_EN generic) */ SYSINFO_SOC_DCACHE = 6, /**< SYSINFO_SOC (6) (r/-): Processor-internal instruction cache implemented when 1 (via DCACHE_EN generic) */ - SYSINFO_SOC_CLOCK_GATING = 7, /**< SYSINFO_SOC (7) (r/-): Clock gating implemented when 1 (via CLOCK_GATING_EN generic) */ + SYSINFO_SOC_XBUS_CACHE = 8, /**< SYSINFO_SOC (8) (r/-): External bus cache implemented when 1 (via XBUS_CACHE_EN generic) */ SYSINFO_SOC_XIP = 9, /**< SYSINFO_SOC (9) (r/-): Execute in-place module implemented when 1 (via XIP_EN generic) */ SYSINFO_SOC_XIP_CACHE = 10, /**< SYSINFO_SOC (10) (r/-): Execute in-place cache implemented when 1 (via XIP_CACHE_EN generic) */ diff --git a/sw/svd/neorv32.svd b/sw/svd/neorv32.svd index b86d74605..ccc53b1d2 100644 --- a/sw/svd/neorv32.svd +++ b/sw/svd/neorv32.svd @@ -1763,7 +1763,6 @@ SYSINFO_SOC_OCD[4:4]On-chip debugger implemented SYSINFO_SOC_ICACHE[5:5]Processor-internal instruction cache implemented SYSINFO_SOC_DCACHE[6:6]Processor-internal data cache implemented - SYSINFO_SOC_CLOCK_GATING[7:7]Clock gating implemented SYSINFO_SOC_XBUS_CACHE[8:8]External bus cache implemented SYSINFO_SOC_XIP[9:9]Execute in place module implemented SYSINFO_SOC_XIP_CACHE[10:10]Execute in place cache implemented From a2e826b6cb8fb67e17f55e6a67ee74d6d09d7932 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 22 Dec 2024 08:49:53 +0100 Subject: [PATCH 2/9] [mxisa csr] add clock gating flag "cpu tuning option" --- docs/datasheet/cpu_csr.adoc | 9 +++++---- rtl/core/neorv32_cpu_control.vhd | 33 ++++++++++++++------------------ sw/lib/include/neorv32_cpu_csr.h | 3 ++- sw/lib/source/neorv32_rte.c | 2 +- 4 files changed, 22 insertions(+), 25 deletions(-) diff --git a/docs/datasheet/cpu_csr.adoc b/docs/datasheet/cpu_csr.adoc index 873d55cfd..6002aa07a 100644 --- a/docs/datasheet/cpu_csr.adoc +++ b/docs/datasheet/cpu_csr.adoc @@ -979,9 +979,10 @@ discover ISA sub-extensions and CPU configuration options | 23 | `CSR_MXISA_ZBB` | r/- | <<_zbb_isa_extension>> available | 24 | `CSR_MXISA_ZBS` | r/- | <<_zbs_isa_extension>> available | 25 | `CSR_MXISA_ZALRSC` | r/- | <<_zalrsc_isa_extension>> available -| 27:26 | - | r/- | _reserved_, hardwired to zero -| 28 | `CSR_MXISA_RFHWRST` | r/- | full hardware reset of register file available when set (`REGFILE_HW_RST`) -| 29 | `CSR_MXISA_FASTMUL` | r/- | fast multiplication available when set (`FAST_MUL_EN`) -| 30 | `CSR_MXISA_FASTSHIFT` | r/- | fast shifts available when set (`FAST_SHIFT_EN`) +| 28:26 | - | r/- | _reserved_, hardwired to zero +| 27 | `CSR_MXISA_CLKGATE` | r/- | sleep-mode clock gating implemented when set (`CLOCK_GATING_EN`), see <<_cpu_tuning_options> +| 28 | `CSR_MXISA_RFHWRST` | r/- | full hardware reset of register file available when set (`REGFILE_HW_RST`), see <<_cpu_tuning_options>> +| 29 | `CSR_MXISA_FASTMUL` | r/- | fast multiplication available when set (`FAST_MUL_EN`), see <<_cpu_tuning_options> +| 30 | `CSR_MXISA_FASTSHIFT` | r/- | fast shifts available when set (`FAST_SHIFT_EN`), see <<_cpu_tuning_options> | 31 | `CSR_MXISA_IS_SIM` | r/- | set if CPU is being **simulated** (⚠️ not guaranteed) |======================= diff --git a/rtl/core/neorv32_cpu_control.vhd b/rtl/core/neorv32_cpu_control.vhd index 232a93298..a7974716b 100644 --- a/rtl/core/neorv32_cpu_control.vhd +++ b/rtl/core/neorv32_cpu_control.vhd @@ -65,6 +65,7 @@ entity neorv32_cpu_control is RISCV_ISA_Sdtrig : boolean; -- implement trigger module extension RISCV_ISA_Smpmp : boolean; -- implement physical memory protection -- Tuning Options -- + CLOCK_GATING_EN : boolean; -- enable clock gating when in sleep mode FAST_MUL_EN : boolean; -- use DSPs for M extension's multiplier FAST_SHIFT_EN : boolean; -- use barrel shifter for shift operations REGFILE_HW_RST : boolean; -- implement full hardware reset for register file @@ -980,7 +981,7 @@ begin -- ------------------------------------------------------------ -- Privilege level -- ------------------------------------------------------------ - if (csr_addr_v(11 downto 2) = csr_dcsr_c(11 downto 2)) and -- debug-mode-only CSR (dcsr, dpc, dscratch)? + if (csr_addr_v(11 downto 4) = csr_dcsr_c(11 downto 4)) and -- debug-mode-only CSR? RISCV_ISA_Sdext and (debug_ctrl.run = '0') then -- debug-mode implemented and not running? csr_valid(0) <= '0'; -- invalid access elsif RISCV_ISA_Zicntr and RISCV_ISA_U and (csr.privilege_eff = '0') and -- any user-mode counters available and in user-mode? @@ -1001,16 +1002,16 @@ begin -- ------------------------------------------------------------------------------------------- illegal_check: process(exe_engine, csr, csr_valid, debug_ctrl) begin + illegal_cmd <= '1'; -- default: illegal case exe_engine.ir(instr_opcode_msb_c downto instr_opcode_lsb_c) is -- check entire opcode when opcode_lui_c | opcode_auipc_c | opcode_jal_c => -- U-instruction type illegal_cmd <= '0'; -- all encodings are valid when opcode_jalr_c => -- unconditional jump-and-link - case exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is - when "000" => illegal_cmd <= '0'; - when others => illegal_cmd <= '1'; - end case; + if (exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "000") then + illegal_cmd <= '0'; + end if; when opcode_branch_c => -- conditional branch case exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is @@ -1031,21 +1032,18 @@ begin end case; when opcode_amo_c => -- atomic memory operation (LR/SC) - if RISCV_ISA_Zalrsc and (exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "010") and - (exe_engine.ir(instr_funct7_lsb_c+6 downto instr_funct7_lsb_c+3) = "0001") then -- LR.W/SC.W + if (exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = "010") and RISCV_ISA_Zalrsc and + (exe_engine.ir(instr_funct7_lsb_c+6 downto instr_funct7_lsb_c+3) = "0001") then -- LR.W/SC.W illegal_cmd <= '0'; - else - illegal_cmd <= '1'; end if; - when opcode_alu_c | opcode_alui_c | opcode_fop_c | opcode_cust0_c | opcode_cust1_c => -- ALU[I] / FPU / CFU operation + when opcode_alu_c | opcode_alui_c | opcode_fop_c | opcode_cust0_c | opcode_cust1_c => -- ALU[I] / FPU / custom operations illegal_cmd <= '0'; -- [NOTE] valid if not terminated/invalidated by the "instruction execution monitor" when opcode_fence_c => -- memory ordering - case exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) is - when funct3_fence_c | funct3_fencei_c => illegal_cmd <= '0'; - when others => illegal_cmd <= '1'; - end case; + if (exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c+1) = funct3_fence_c(2 downto 1)) then + illegal_cmd <= '0'; + end if; when opcode_system_c => -- CSR / system instruction if (exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_env_c) then -- system environment @@ -1058,12 +1056,8 @@ begin when funct12_wfi_c => illegal_cmd <= (not csr.privilege) and csr.mstatus_tw; -- wfi allowed in M-mode or if TW is zero when others => illegal_cmd <= '1'; -- undefined end case; - else - illegal_cmd <= '1'; end if; - elsif (csr_valid /= "111") or (exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_csril_c) then -- invalid CSR operation - illegal_cmd <= '1'; - else + elsif (csr_valid = "111") and (exe_engine.ir(instr_funct3_msb_c downto instr_funct3_lsb_c) /= funct3_csril_c) then -- valid CSR operation illegal_cmd <= '0'; end if; @@ -1862,6 +1856,7 @@ begin csr.rdata(26) <= '0'; -- reserved csr.rdata(27) <= '0'; -- reserved -- tuning options -- + csr.rdata(27) <= bool_to_ulogic_f(CLOCK_GATING_EN); -- enable clock gating when in sleep mode csr.rdata(28) <= bool_to_ulogic_f(REGFILE_HW_RST); -- full hardware reset of register file csr.rdata(29) <= bool_to_ulogic_f(FAST_MUL_EN); -- DSP-based multiplication (M extensions only) csr.rdata(30) <= bool_to_ulogic_f(FAST_SHIFT_EN); -- parallel logic for shifts (barrel shifters) diff --git a/sw/lib/include/neorv32_cpu_csr.h b/sw/lib/include/neorv32_cpu_csr.h index dc9ce141e..56ffe80af 100644 --- a/sw/lib/include/neorv32_cpu_csr.h +++ b/sw/lib/include/neorv32_cpu_csr.h @@ -300,7 +300,7 @@ enum NEORV32_CSR_MISA_enum { /**********************************************************************//** - * CPU mxisa CSR (r/-): Machine _extended_ instruction set extensions (NEORV32-specific) + * CPU mxisa CSR (r/-): Machine extended instruction set extensions (NEORV32-specific) **************************************************************************/ enum NEORV32_CSR_XISA_enum { // ISA (sub-)extensions @@ -331,6 +331,7 @@ enum NEORV32_CSR_XISA_enum { CSR_MXISA_ZBS = 24, /**< CPU mxisa CSR (24): single-bit bit-manipulation operations (r/-)*/ CSR_MXISA_ZALRSC = 25, /**< CPU mxisa CSR (25): atomic reservation-set operations (r/-)*/ // Tuning options + CSR_MXISA_CLKGATE = 27, /**< CPU mxisa CSR (27): clock gating enabled (r/-)*/ CSR_MXISA_RFHWRST = 28, /**< CPU mxisa CSR (28): register file has full hardware reset (r/-)*/ CSR_MXISA_FASTMUL = 29, /**< CPU mxisa CSR (29): DSP-based multiplication (M extensions only) (r/-)*/ CSR_MXISA_FASTSHIFT = 30, /**< CPU mxisa CSR (30): parallel logic for shifts (barrel shifters) (r/-)*/ diff --git a/sw/lib/source/neorv32_rte.c b/sw/lib/source/neorv32_rte.c index ec5b535ad..a7410ed7a 100644 --- a/sw/lib/source/neorv32_rte.c +++ b/sw/lib/source/neorv32_rte.c @@ -402,7 +402,7 @@ void neorv32_rte_print_hw_config(void) { neorv32_uart0_printf("Clock speed: %u Hz\n", neorv32_sysinfo_get_clk()); neorv32_uart0_printf("Clock gating: "); - if (NEORV32_SYSINFO->SOC & (1 << SYSINFO_SOC_CLOCK_GATING)) { neorv32_uart0_printf("enabled\n"); } + if (neorv32_cpu_csr_read(CSR_MXISA) & (1 << CSR_MXISA_CLKGATE)) { neorv32_uart0_printf("enabled\n"); } else { neorv32_uart0_printf("disabled\n"); } neorv32_uart0_printf("On-chip debugger: "); From 79d11abc5a3b97d9d43f7b0c632cf00a935bd1c9 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 22 Dec 2024 08:53:41 +0100 Subject: [PATCH 3/9] [cpu] add clock gating (switch) --- rtl/core/neorv32_cpu.vhd | 47 +++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/rtl/core/neorv32_cpu.vhd b/rtl/core/neorv32_cpu.vhd index 1a39fa370..fe39ba448 100644 --- a/rtl/core/neorv32_cpu.vhd +++ b/rtl/core/neorv32_cpu.vhd @@ -54,6 +54,7 @@ entity neorv32_cpu is RISCV_ISA_Sdtrig : boolean; -- implement trigger module extension RISCV_ISA_Smpmp : boolean; -- implement physical memory protection -- Tuning Options -- + CLOCK_GATING_EN : boolean; -- enable clock gating when in sleep mode FAST_MUL_EN : boolean; -- use DSPs for M extension's multiplier FAST_SHIFT_EN : boolean; -- use barrel shifter for shift operations REGFILE_HW_RST : boolean; -- implement full hardware reset for register file @@ -69,7 +70,6 @@ entity neorv32_cpu is port ( -- global control -- clk_i : in std_ulogic; -- switchable global clock, rising edge - clk_aux_i : in std_ulogic; -- always-on clock, rising edge rstn_i : in std_ulogic; -- global reset, low-active, async sleep_o : out std_ulogic; -- cpu is in sleep mode when set debug_o : out std_ulogic; -- cpu is in debug mode when set @@ -108,6 +108,7 @@ architecture neorv32_cpu_rtl of neorv32_cpu is signal xcsr_rdata_res : std_ulogic_vector(XLEN-1 downto 0); -- local signals -- + signal clk_gated : std_ulogic; -- switchable clock (clock gating) signal ctrl : ctrl_bus_t; -- main control bus signal alu_imm : std_ulogic_vector(XLEN-1 downto 0); -- immediate signal rf_wdata : std_ulogic_vector(XLEN-1 downto 0); -- register file write data @@ -178,6 +179,25 @@ begin assert not is_simulation_c report "[NEORV32] Assuming this is a simulation." severity warning; + -- Clock Gating --------------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + neorv32_cpu_clockgate_enabled: + if CLOCK_GATING_EN generate + neorv32_cpu_clockgate_inst: entity neorv32.neorv32_clockgate + port map ( + clk_i => clk_i, + rstn_i => rstn_i, + halt_i => ctrl.cpu_sleep, + clk_o => clk_gated + ); + end generate; + + neorv32_cpu_clockgate_disabled: + if not CLOCK_GATING_EN generate + clk_gated <= clk_i; + end generate; + + -- Control Unit --------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- neorv32_cpu_control_inst: entity neorv32.neorv32_cpu_control @@ -219,6 +239,7 @@ begin RISCV_ISA_Sdtrig => RISCV_ISA_Sdtrig, -- implement trigger module extension RISCV_ISA_Smpmp => RISCV_ISA_Smpmp, -- implement physical memory protection -- Tuning Options -- + CLOCK_GATING_EN => CLOCK_GATING_EN, -- enable clock gating when in sleep mode FAST_MUL_EN => FAST_MUL_EN, -- use DSPs for M extension's multiplier FAST_SHIFT_EN => FAST_SHIFT_EN, -- use barrel shifter for shift operations REGFILE_HW_RST => REGFILE_HW_RST, -- implement full hardware reset for register file @@ -228,8 +249,8 @@ begin ) port map ( -- global control -- - clk_i => clk_i, -- global clock, rising edge - clk_aux_i => clk_aux_i, -- always-on clock, rising edge + clk_i => clk_gated, -- global clock, rising edge + clk_aux_i => clk_i, -- always-on clock, rising edge rstn_i => rstn_i, -- global reset, low-active, async ctrl_o => ctrl, -- main control bus -- instruction fetch interface -- @@ -283,14 +304,14 @@ begin ) port map ( -- global control -- - clk_i => clk_i, -- global clock, rising edge - rstn_i => rstn_i, -- global reset, low-active, async - ctrl_i => ctrl, -- main control bus + clk_i => clk_gated, -- global clock, rising edge + rstn_i => rstn_i, -- global reset, low-active, async + ctrl_i => ctrl, -- main control bus -- operands -- - rd_i => rf_wdata, -- destination operand rd - rs1_o => rs1, -- source operand rs1 - rs2_o => rs2, -- source operand rs2 - rs3_o => rs3 -- source operand rs3 + rd_i => rf_wdata, -- destination operand rd + rs1_o => rs1, -- source operand rs1 + rs2_o => rs2, -- source operand rs2 + rs3_o => rs3 -- source operand rs3 ); -- all buses are zero unless there is an according operation -- @@ -324,7 +345,7 @@ begin ) port map ( -- global control -- - clk_i => clk_i, -- global clock, rising edge + clk_i => clk_gated, -- global clock, rising edge rstn_i => rstn_i, -- global reset, low-active, async ctrl_i => ctrl, -- main control bus -- CSR interface -- @@ -355,7 +376,7 @@ begin ) port map ( -- global control -- - clk_i => clk_i, -- global clock, rising edge + clk_i => clk_gated, -- global clock, rising edge rstn_i => rstn_i, -- global reset, low-active, async ctrl_i => ctrl, -- main control bus -- cpu data access interface -- @@ -385,7 +406,7 @@ begin ) port map ( -- global control -- - clk_i => clk_i, -- global clock, rising edge + clk_i => clk_gated, -- global clock, rising edge rstn_i => rstn_i, -- global reset, low-active, async ctrl_i => ctrl, -- main control bus -- CSR interface -- From a54b39fae5cb4fbe3748d704b288910c2173fbbc Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 22 Dec 2024 08:55:02 +0100 Subject: [PATCH 4/9] [top] remove clock gating switch CLOCK_GATING_EN is now a CPU-only tuning option --- rtl/core/neorv32_package.vhd | 4 ++-- rtl/core/neorv32_top.vhd | 27 +++------------------------ 2 files changed, 5 insertions(+), 26 deletions(-) diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index aa6ee9847..5040d0f78 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -29,7 +29,7 @@ package neorv32_package is -- Architecture Constants ----------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100706"; -- hardware version + constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100707"; -- hardware version constant archid_c : natural := 19; -- official RISC-V architecture ID constant XLEN : natural := 32; -- native data path width @@ -717,7 +717,6 @@ package neorv32_package is generic ( -- Processor Clocking -- CLOCK_FREQUENCY : natural := 0; - CLOCK_GATING_EN : boolean := false; -- Identification -- HART_ID : std_ulogic_vector(31 downto 0) := x"00000000"; JEDEC_ID : std_ulogic_vector(10 downto 0) := "00000000000"; @@ -751,6 +750,7 @@ package neorv32_package is RISCV_ISA_Zksh : boolean := false; RISCV_ISA_Zxcfu : boolean := false; -- Tuning Options -- + CLOCK_GATING_EN : boolean := false; FAST_MUL_EN : boolean := false; FAST_SHIFT_EN : boolean := false; REGFILE_HW_RST : boolean := false; diff --git a/rtl/core/neorv32_top.vhd b/rtl/core/neorv32_top.vhd index 1b9affa41..ef2f4be2d 100644 --- a/rtl/core/neorv32_top.vhd +++ b/rtl/core/neorv32_top.vhd @@ -23,7 +23,6 @@ entity neorv32_top is generic ( -- Processor Clocking -- CLOCK_FREQUENCY : natural := 0; -- clock frequency of clk_i in Hz - CLOCK_GATING_EN : boolean := false; -- enable clock gating when in sleep mode -- Core Identification -- HART_ID : std_ulogic_vector(31 downto 0) := x"00000000"; -- hardware thread ID @@ -62,6 +61,7 @@ entity neorv32_top is RISCV_ISA_Zxcfu : boolean := false; -- implement custom (instr.) functions unit -- Tuning Options -- + CLOCK_GATING_EN : boolean := false; -- enable clock gating when in sleep mode FAST_MUL_EN : boolean := false; -- use DSPs for M extension's multiplier FAST_SHIFT_EN : boolean := false; -- use barrel shifter for shift operations REGFILE_HW_RST : boolean := false; -- implement full hardware reset for register file @@ -290,7 +290,6 @@ architecture neorv32_top_rtl of neorv32_top is signal rstn_wdt, rstn_sys, rstn_ext : std_ulogic; -- clock system -- - signal clk_cpu : std_ulogic; -- CPU core clock, can be switched off signal clk_gen : std_ulogic_vector(7 downto 0); -- scaled clock-enables -- type clk_gen_en_enum_t is ( @@ -463,25 +462,6 @@ begin core_complex: if true generate - -- CPU Clock Gating ----------------------------------------------------------------------- - -- ------------------------------------------------------------------------------------------- - neorv32_cpu_clockgate_inst_true: - if CLOCK_GATING_EN generate - neorv32_cpu_clockgate_inst: entity neorv32.neorv32_clockgate - port map ( - clk_i => clk_i, - rstn_i => rstn_sys, - halt_i => cpu_sleep, - clk_o => clk_cpu - ); - end generate; - - neorv32_cpu_clockgate_inst_false: - if not CLOCK_GATING_EN generate - clk_cpu <= clk_i; - end generate; - - -- CPU Core ------------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- neorv32_cpu_inst: entity neorv32.neorv32_cpu @@ -519,6 +499,7 @@ begin RISCV_ISA_Sdtrig => OCD_EN, RISCV_ISA_Smpmp => cpu_smpmp_c, -- Tuning Options -- + CLOCK_GATING_EN => CLOCK_GATING_EN, FAST_MUL_EN => FAST_MUL_EN, FAST_SHIFT_EN => FAST_SHIFT_EN, REGFILE_HW_RST => REGFILE_HW_RST, @@ -533,8 +514,7 @@ begin ) port map ( -- global control -- - clk_i => clk_cpu, -- switchable clock - clk_aux_i => clk_i, -- always-on clock + clk_i => clk_i, rstn_i => rstn_sys, sleep_o => cpu_sleep, debug_o => cpu_debug, @@ -1624,7 +1604,6 @@ begin neorv32_sysinfo_inst: entity neorv32.neorv32_sysinfo generic map ( CLOCK_FREQUENCY => CLOCK_FREQUENCY, - CLOCK_GATING_EN => CLOCK_GATING_EN, BOOT_MODE_SELECT => BOOT_MODE_SELECT, INT_BOOTLOADER_EN => bootrom_en_c, MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, From 8cf48eb05cf32b515fe62a4f5278244dfc1bd0b2 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 22 Dec 2024 08:55:38 +0100 Subject: [PATCH 5/9] [top] re-arrange generics --- rtl/system_integration/neorv32_vivado_ip.vhd | 2 +- sim/neorv32_tb.vhd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/system_integration/neorv32_vivado_ip.vhd b/rtl/system_integration/neorv32_vivado_ip.vhd index c06228390..d63f4ed1c 100644 --- a/rtl/system_integration/neorv32_vivado_ip.vhd +++ b/rtl/system_integration/neorv32_vivado_ip.vhd @@ -354,7 +354,6 @@ begin generic map ( -- Clocking -- CLOCK_FREQUENCY => CLOCK_FREQUENCY, - CLOCK_GATING_EN => false, -- clock gating is not supported here -- Identification -- HART_ID => std_ulogic_vector(HART_ID), JEDEC_ID => std_ulogic_vector(JEDEC_ID), @@ -388,6 +387,7 @@ begin RISCV_ISA_Zksh => RISCV_ISA_Zksh, RISCV_ISA_Zxcfu => RISCV_ISA_Zxcfu, -- Extension Options -- + CLOCK_GATING_EN => false, -- clock gating is not supported here FAST_MUL_EN => FAST_MUL_EN, FAST_SHIFT_EN => FAST_SHIFT_EN, REGFILE_HW_RST => REGFILE_HW_RST, diff --git a/sim/neorv32_tb.vhd b/sim/neorv32_tb.vhd index c51053b4e..26a6595f2 100644 --- a/sim/neorv32_tb.vhd +++ b/sim/neorv32_tb.vhd @@ -109,7 +109,6 @@ begin generic map ( -- Clocking -- CLOCK_FREQUENCY => CLOCK_FREQUENCY, - CLOCK_GATING_EN => true, -- Identification -- HART_ID => x"00000000", JEDEC_ID => "00000000000", @@ -143,6 +142,7 @@ begin RISCV_ISA_Zmmul => RISCV_ISA_Zmmul, RISCV_ISA_Zxcfu => RISCV_ISA_Zxcfu, -- Extension Options -- + CLOCK_GATING_EN => true, FAST_MUL_EN => FAST_MUL_EN, FAST_SHIFT_EN => FAST_SHIFT_EN, REGFILE_HW_RST => REGFILE_HW_RST, From 24abdd05c8b364680b43f45a1bc94bc63ebfb154 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 22 Dec 2024 08:55:51 +0100 Subject: [PATCH 6/9] [rtl] update CPU file-list file --- rtl/file_list_cpu.f | 1 + 1 file changed, 1 insertion(+) diff --git a/rtl/file_list_cpu.f b/rtl/file_list_cpu.f index c830a770d..2df0a0b67 100644 --- a/rtl/file_list_cpu.f +++ b/rtl/file_list_cpu.f @@ -1,4 +1,5 @@ NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_package.vhd +NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_clockgate.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_fifo.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_decompressor.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu_control.vhd From ef771c33944f956fcfdfbc243953d8c16c46548e Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 22 Dec 2024 14:34:41 +0100 Subject: [PATCH 7/9] [changelog] add v1.10.7.7 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8be0fbaaf..c042972ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12 | Date | Version | Comment | Ticket | |:----:|:-------:|:--------|:------:| +| 22.12.2024 | 1.10.7.7 | :warning: move clock gating switch from processor top to CPU clock; `CLOCK_GATING_EN` is now a CPU tuning option | [#1124](https://github.com/stnolting/neorv32/pull/1124) | | 21.12.2024 | 1.10.7.6 | minor rtl cleanups and optimizations | [#1123](https://github.com/stnolting/neorv32/pull/1123) | | 19.12.2024 | 1.10.7.5 | :test_tube: use time-multiplex PMP architecture (reducing area footprint) | [#1105](https://github.com/stnolting/neorv32/pull/1105) | | 14.12.2024 | 1.10.7.4 | :sparkles: add new module: I2C-compatible **Two-Wire Device Controller (TWD)** | [#1121](https://github.com/stnolting/neorv32/pull/1121) | From 10ccc8646592ba5a37179788485dad1ad6c04873 Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 22 Dec 2024 14:43:39 +0100 Subject: [PATCH 8/9] [docs] cleanup; add clock gating --- docs/datasheet/cpu.adoc | 72 +++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/docs/datasheet/cpu.adoc b/docs/datasheet/cpu.adoc index 8012e3551..b931ff355 100644 --- a/docs/datasheet/cpu.adoc +++ b/docs/datasheet/cpu.adoc @@ -65,23 +65,22 @@ direction as seen from the CPU. |======================= | Signal | Width/Type | Dir | Description 4+^| **Global Signals** -| `clk_i` | 1 | in | Global clock line, all registers triggering on rising edge, this clock can be switched off during <<_sleep_mode>> -| `clk_aux_i` | 1 | in | Always-on clock, used to keep the the sleep control active when `clk_i` is switched off -| `rstn_i` | 1 | in | Global reset, low-active -| `sleep_o` | 1 | out | CPU is in <<_sleep_mode>> when set -| `debug_o` | 1 | out | CPU is in <<_cpu_debug_mode,debug mode>> when set +| `clk_i` | 1 | in | Global clock line, all registers triggering on rising edge. +| `rstn_i` | 1 | in | Global reset, low-active. +| `sleep_o` | 1 | out | CPU is in <<_sleep_mode>> when set. +| `debug_o` | 1 | out | CPU is in <<_cpu_debug_mode,debug mode>> when set. 4+^| **Interrupts (<<_traps_exceptions_and_interrupts>>)** -| `msi_i` | 1 | in | RISC-V machine software interrupt -| `mei_i` | 1 | in | RISC-V machine external interrupt -| `mti_i` | 1 | in | RISC-V machine timer interrupt -| `firq_i` | 16 | in | Custom fast interrupt request signals -| `dbi_i` | 1 | in | Request CPU to halt and enter debug mode (RISC-V <<_on_chip_debugger_ocd>>) +| `msi_i` | 1 | in | RISC-V machine software interrupt. +| `mei_i` | 1 | in | RISC-V machine external interrupt. +| `mti_i` | 1 | in | RISC-V machine timer interrupt. +| `firq_i` | 16 | in | Custom fast interrupt request signals. +| `dbi_i` | 1 | in | Request CPU to halt and enter debug mode (RISC-V <<_on_chip_debugger_ocd>>). 4+^| **Instruction <<_bus_interface>>** -| `ibus_req_o` | `bus_req_t` | out | Instruction fetch bus request -| `ibus_rsp_i` | `bus_rsp_t` | in | Instruction fetch bus response +| `ibus_req_o` | `bus_req_t` | out | Instruction fetch bus request. +| `ibus_rsp_i` | `bus_rsp_t` | in | Instruction fetch bus response. 4+^| **Data <<_bus_interface>>** -| `dbus_req_o` | `bus_req_t` | out | Data access (load/store) bus request -| `dbus_rsp_i` | `bus_rsp_t` | in | Data access (load/store) bus response +| `dbus_req_o` | `bus_req_t` | out | Data access (load/store) bus request. +| `dbus_rsp_i` | `bus_rsp_t` | in | Data access (load/store) bus response. |======================= .Bus Interface Protocol @@ -110,6 +109,7 @@ The generic type "suv(x:y)" represents a `std_ulogic_vector(x downto y)`. [options="header",grid="rows"] |======================= | Name | Type | Description +| `HART_ID` | suv(31:0) | Value for the <<_mhartid>> CSR. | `VENDOR_ID` | suv(31:0) | Value for the <<_mvendorid>> CSR. | `BOOT_ADDR` | suv(31:0) | CPU reset address. See section <<_address_space>>. | `DEBUG_PARK_ADDR` | suv(31:0) | "Park loop" entry address for the <<_on_chip_debugger_ocd>>, has to be 4-byte aligned. @@ -119,6 +119,10 @@ The generic type "suv(x:y)" represents a `std_ulogic_vector(x downto y)`. | `RISCV_ISA_Smpmp` | boolean | Implement RISC-V-compatible physical memory protection (PMP). See section <<_smpmp_isa_extension>>. |======================= +.Tuning Option Generics +[TIP] +Additional generics that are related to certain _tuning options_ are listed in section <<_cpu_tuning_options>>. + <<< // #################################################################################################################### @@ -253,6 +257,21 @@ Note that these configuration options have no impact on the actual functionality Software can check for configured tuning options via specific flags in the <<_mxisa>> CSR. +{empty} + +[discrete] +===== **`CLOCK_GATING_EN`** + +[cols="<1,<8"] +[frame="topbot",grid="none"] +|======================= +| Name | Clock gating +| Type | `boolean` +| Default | `false`, disabled +| Description | When **enabled** the CPU's primary clock is switched off when the CPU enters <<_sleep_mode>>. See <<_cpu_clock_gating>>. +| | When **disabled** the CPU clock system is implemented as single always-on clock domain. +|======================= + + {empty} + [discrete] ===== **`FAST_MUL_EN`** @@ -314,7 +333,7 @@ like blockRAM. Note that these primitives do not provide any kind of hardware re ==== Sleep Mode The NEORV32 CPU provides a single sleep mode that can be entered to power-down the core reducing -dynamic power consumption. Sleep mode is entered by executing the `wfi` ("wait for interrupt") instruction. +dynamic power consumption. Sleep mode is entered by executing the RISC-V `wfi` ("wait for interrupt") instruction. .Execution Details [NOTE] @@ -323,7 +342,7 @@ if `TW` in <<_mstatus>> is set. When executed in debug-mode or during single-ste simple `nop` without entering sleep mode. After executing the `wfi` instruction the CPU's `sleep_o` signal (<<_cpu_top_entity_signals>>) will become set -as soon as the CPU has fully halted ("CPU is sleeping"): +as soon as the CPU has fully halted: [start=1] .The front-end (instruction fetch) has stopped. There is no pending instruction fetch bus access. @@ -331,14 +350,25 @@ as soon as the CPU has fully halted ("CPU is sleeping"): .There is no enabled interrupt being pending. CPU-external modules like memories, timers and peripheral interfaces are not affected by this. Furthermore, the CPU will -continue to buffer/enqueue incoming interrupt. The CPU will leave sleep mode as soon as any _enabled_ interrupt (via <<_mie>>) +continue to buffer/enqueue incoming interrupts. The CPU will leave sleep mode as soon as any _enabled_ interrupt (via <<_mie>>) source becomes _pending_ or if a debug session is started. -===== Power-Down Mode -Optionally, the sleep mode can also be used to shut down the CPU's main clock to further reduce power consumption -by halting the core's clock tree. This clock gating mode is enabled by the `CLOCK_GATING_EN` generic -(<<_processor_top_entity_generics>>). See section <<_processor_clocking>> for more information. +==== CPU Clock Gating + +The single clock domain of the CPU core can be split into an always-on clock domain and a switchable clock domain. +The switchable clock domain can be deactivated to further reduce reduce dynamic power consumption. CPU-external modules +like timers, interfaces and memories are not affected by the clock gating. + +The splitting into two clock domain is enabled by the `CLOCK_GATING_EN` generic (<<_processor_top_entity_generics>> / +<<_cpu_tuning_options>>). When enabled, a generic clock switching gate is added to decouple the switchable clock from +the always-on clock domain. Whenever the CPU enters <<_sleep_mode>> the switchable clock domain is shut down. + +.Clock Switch Hardware +[NOTE] +By default, a generic clock switch is used (`rtl/core/neorv32_clockgate.vhd`). Especially for FPGA setups it is highly +recommended to replace this default module by a technology-specific primitive or macro wrapper to improve synthesis results +(clock skew, global clock tree usage, etc.). ==== Full Virtualization From facd50aa8083bde9427f1b118bdfed5b9085e5cc Mon Sep 17 00:00:00 2001 From: stnolting Date: Sun, 22 Dec 2024 14:43:52 +0100 Subject: [PATCH 9/9] [docs] cleanups --- docs/datasheet/soc.adoc | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/docs/datasheet/soc.adoc b/docs/datasheet/soc.adoc index ae63d6d3b..bfd115a21 100644 --- a/docs/datasheet/soc.adoc +++ b/docs/datasheet/soc.adoc @@ -210,7 +210,6 @@ The generic type "`suv(x:y)`" is an abbreviation for "`std_ulogic_vector(x downt | Name | Type | Default | Description 4+^| **<<_processor_clocking>>** | `CLOCK_FREQUENCY` | natural | 0 | The clock frequency of the processor's `clk_i` input port in Hertz (Hz). -| `CLOCK_GATING_EN` | boolean | false | Enable clock gating when CPU is in sleep mode (see sections <<_sleep_mode>> and <<_processor_clocking>>). 4+^| **Core Identification** | `HART_ID` | suv(31:0) | x"00000000" | The hart thread ID of the CPU (passed to <<_mhartid>> CSR). | `JEDEC_ID` | suv(10:0) | "00000000000" | JEDEC ID; continuation codes plus vendor ID (passed to <<_mvendorid>> CSR and to the <<_debug_transport_module_dtm>>). @@ -243,7 +242,8 @@ The generic type "`suv(x:y)`" is an abbreviation for "`std_ulogic_vector(x downt | `RISCV_ISA_Zksh` | boolean | false | Enable <<_zksh_isa_extension>> (scalar cryptography ShangMi hash functions). | `RISCV_ISA_Zmmul` | boolean | false | Enable <<_zmmul_isa_extension>> (hardware-based integer multiplication). | `RISCV_ISA_Zxcfu` | boolean | false | Enable NEORV32-specific <<_zxcfu_isa_extension>> (custom RISC-V instructions). -4+^| **CPU <<_architecture>> Tuning Options** +4+^| **<<_cpu_tuning_options>>** +| `CLOCK_GATING_EN` | boolean | false | Implement sleep-mode clock gating (see sections <<_sleep_mode>> and <<_processor_clocking>>). | `FAST_MUL_EN` | boolean | false | Implement fast but large full-parallel multipliers (trying to infer DSP blocks); see section <<_cpu_arithmetic_logic_unit>>. | `FAST_SHIFT_EN` | boolean | false | Implement fast but large full-parallel barrel shifters; see section <<_cpu_arithmetic_logic_unit>>. | `REGFILE_HW_RST` | boolean | false | Implement full hardware reset for register file (use individual FFs instead of BRAM); see section <<_cpu_register_file>>. @@ -329,27 +329,13 @@ The generic type "`suv(x:y)`" is an abbreviation for "`std_ulogic_vector(x downt The processor is implemented as fully-synchronous logic design using a single clock domain that is driven entirely by the top's `clk_i` signal. This clock signal is used by all internal registers and memories. All of them trigger -on the **rising edge** of this clock signal - the only exception it the default <<_clock_gating>> module. External -"clocks" like the OCD's JTAG clock or the SDI's serial clock are synchronized into the processor's clock domain -before being used as "general logic signal" (and not as a dedicated clock). +on the **rising edge** of this clock signal. External "clocks" like the OCD's JTAG clock or the SDI's serial clock +are synchronized into the processor's clock domain before being used as "general logic signal" (and not as a dedicated clock). -==== Clock Gating - -The single clock domain of the processor can be split into an always-on clock domain and a switchable clock domain. -The switchable clock domain is used to clock the CPU core, the CPU's bus switch and - if implemented - the caches. -This domain can be deactivated to reduce power consumption. The always-on clock domain is used to clock all other -processor modules like peripherals, memories and IO devices. Hence, these modules can continue operation (e.g. a -timer keeps running) even if the CPU is shut down. - -The splitting into two clock domain is enabled by the `CLOCK_GATING_EN` generic (<<_processor_top_entity_generics>>). -When enabled, a generic clock switching gate is added to decouple the switchable clock from the always-on clock domain -(VHDL file `neorv32_clockgate.vhd`). Whenever the CPU enters <<_sleep_mode>> the CPU clock domain ist shut down. - -.Clock Switch Hardware +.CPU Clock Gating [NOTE] -By default, a generic clock gate is used (`rtl/core/neorv32_clockgate.vhd`) to shut down the CPU clock. -Especially for FPGA setups it is highly recommended to replace this default version by a technology-specific primitive -or macro wrapper to improve efficiency (clock skew, global clock tree usage, etc.). +The CPU core provides an optional clock-gating feature to switch off large parts of the core when sleep mode is entered. +See section <<_cpu_clock_gating>> for more information. ==== Peripheral Clocks