[cpu] add hardware reset for all FFs

stnolting · Nov 10, 2023 · 6b0018d · 6b0018d
1 parent aff0493
commit 6b0018d
Show file tree

Hide file tree

Showing 6 changed files with 119 additions and 45 deletions.
diff --git a/rtl/core/neorv32_cpu_control.vhd b/rtl/core/neorv32_cpu_control.vhd
@@ -502,9 +502,11 @@ begin
   issue_engine_enabled:
   if (CPU_EXTENSION_RISCV_C = true) generate
 
-    issue_engine_fsm_sync: process(clk_i)
+    issue_engine_fsm_sync: process(rstn_i, clk_i)
     begin
-      if rising_edge(clk_i) then
+      if (rstn_i = '0') then
+        issue_engine.align <= '0'; -- start aligned after reset
+      elsif rising_edge(clk_i) then
         if (fetch_engine.restart = '1') then
           issue_engine.align <= execute_engine.pc(1); -- branch to unaligned address?
         elsif (issue_engine.ack = '1') then
@@ -561,9 +563,11 @@ begin
 
   -- Immediate Generator --------------------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  imm_gen: process(clk_i)
+  imm_gen: process(rstn_i, clk_i)
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      imm_o <= (others => '0');
+    elsif rising_edge(clk_i) then
       -- default = I-immediate: ALU-immediate, load, jump-and-link with register --
       imm_o(XLEN-1 downto 11) <= (others => execute_engine.ir(31)); -- sign extension
       imm_o(10 downto 01)     <= execute_engine.ir(30 downto 21);
@@ -1460,9 +1464,11 @@ begin
 
   -- Trap Priority Logic --------------------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  trap_priority: process(clk_i)
+  trap_priority: process(rstn_i, clk_i)
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      trap_ctrl.cause <= (others => '0');
+    elsif rising_edge(clk_i) then
       -- standard RISC-V exceptions --
       if    (trap_ctrl.exc_buf(exc_ialign_c)   = '1') then trap_ctrl.cause <= trap_ima_c;  -- instruction address misaligned
       elsif (trap_ctrl.exc_buf(exc_iaccess_c)  = '1') then trap_ctrl.cause <= trap_iaf_c;  -- instruction access fault
@@ -1473,6 +1479,8 @@ begin
       elsif (trap_ctrl.exc_buf(exc_lalign_c)   = '1') then trap_ctrl.cause <= trap_lma_c;  -- load address misaligned
       elsif (trap_ctrl.exc_buf(exc_saccess_c)  = '1') then trap_ctrl.cause <= trap_saf_c;  -- store access fault
       elsif (trap_ctrl.exc_buf(exc_laccess_c)  = '1') then trap_ctrl.cause <= trap_laf_c;  -- load access fault
+
+
       -- standard RISC-V debug mode exceptions and interrupts --
       elsif (trap_ctrl.irq_buf(irq_db_halt_c)  = '1') then trap_ctrl.cause <= trap_db_halt_c;  -- external halt request (async)
       elsif (trap_ctrl.exc_buf(exc_db_hw_c)    = '1') then trap_ctrl.cause <= trap_db_trig_c;  -- hardware trigger (sync)
@@ -2243,9 +2251,11 @@ begin
 
   -- Counter Increment Control (Trigger Events) ---------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  counter_event: process(clk_i)
+  counter_event: process(rstn_i, clk_i)
   begin -- increment if an enabled event fires; do not increment if CPU is in debug mode or if counter is inhibited
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      cnt.inc <= (others => '0');
+    elsif rising_edge(clk_i) then
       cnt.inc <= (others => '0'); -- default
       -- base counters --
       cnt.inc(0) <= cnt_event(hpmcnt_event_cy_c) and (not csr.mcountinhibit(0)) and (not debug_ctrl.running);

diff --git a/rtl/core/neorv32_cpu_cp_bitmanip.vhd b/rtl/core/neorv32_cpu_cp_bitmanip.vhd
@@ -305,9 +305,14 @@ begin
   serial_shifter:
   if (FAST_SHIFT_EN = false) generate
 
-    shifter_unit: process(clk_i)
+    shifter_unit: process(rstn_i, clk_i)
     begin
-      if rising_edge(clk_i) then
+      if (rstn_i = '0') then
+        shifter.cnt     <= (others => '0');
+        shifter.sreg    <= (others => '0');
+        shifter.cnt_max <= (others => '0');
+        shifter.bcnt    <= (others => '0');
+      elsif rising_edge(clk_i) then
         if (shifter.start = '1') then -- trigger new shift
           shifter.cnt <= (others => '0');
           -- shift operand --
@@ -420,9 +425,12 @@ begin
 
   -- Carry-Less Multiplication Core ---------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  clmul_core: process(clk_i)
+  clmul_core: process(rstn_i, clk_i)
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      clmul.cnt  <= (others => '0');
+      clmul.prod <= (others => '0');
+    elsif rising_edge(clk_i) then
       if (clmul.start = '1') then -- start new multiplication
         clmul.cnt                 <= (others => '0');
         clmul.cnt(clmul.cnt'left) <= '1';
@@ -544,9 +552,11 @@ begin
 
   -- Output Gate ----------------------------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  output_gate: process(clk_i)
+  output_gate: process(rstn_i, clk_i)
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      res_o <= (others => '0');
+    elsif rising_edge(clk_i) then
       res_o <= (others => '0'); -- default
       if (valid = '1') then
         res_o <= res_out(op_andn_c)   or res_out(op_orn_c)    or res_out(op_xnor_c)  or

diff --git a/rtl/core/neorv32_cpu_cp_cfu.vhd b/rtl/core/neorv32_cpu_cp_cfu.vhd
@@ -320,9 +320,15 @@ begin
   madd.done <= madd.sreg(madd.sreg'left);
 
   -- arithmetic core --
-  madd_core: process(clk_i)
+  madd_core: process(rstn_i, clk_i)
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      madd.opa <= (others => '0');
+      madd.opb <= (others => '0');
+      madd.opc <= (others => '0');
+      madd.mul <= (others => '0');
+      madd.res <= (others => '0');
+    elsif rising_edge(clk_i) then
       -- stage 0: buffer input operands --
       madd.opa <= rs1_i;
       madd.opb <= rs2_i;

diff --git a/rtl/core/neorv32_cpu_cp_fpu.vhd b/rtl/core/neorv32_cpu_cp_fpu.vhd
@@ -495,10 +495,15 @@ begin
 
   -- Floating-Point Comparator --------------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  float_comparator: process(clk_i)
+  float_comparator: process(rstn_i, clk_i)
     variable cond_v : std_ulogic_vector(1 downto 0);
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      comp_equal_ff   <= '0';
+      comp_less_ff    <= '0';
+      fu_compare.done <= '0';
+      fu_min_max.done <= '0';
+    elsif rising_edge(clk_i) then
       -- equal --
       if ((fpu_operands.rs1_class(fp_class_pos_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_pos_inf_c) = '1')) or -- +inf == +inf
          ((fpu_operands.rs1_class(fp_class_neg_inf_c)   = '1') and (fpu_operands.rs2_class(fp_class_neg_inf_c) = '1')) or -- -inf == -inf
@@ -643,11 +648,15 @@ begin
 
   -- Convert: [unsigned] Integer to Float (FCVT.W.S) ----------------------------------------
   -- -------------------------------------------------------------------------------------------
-  convert_i2f: process(clk_i)
+  convert_i2f: process(rstn_i, clk_i)
   begin
     -- this process only computes the absolute input value
     -- the actual conversion is done by the normalizer
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      fu_conv_i2f.result <= (others => '0');
+      fu_conv_i2f.sign   <= '0';
+      fu_conv_i2f.done   <= '0';
+    elsif rising_edge(clk_i) then
       if (ctrl_i.ir_funct12(0) = '0') and (rs1_i(31) = '1') then -- convert signed integer
         fu_conv_i2f.result <= std_ulogic_vector(0 - unsigned(rs1_i));
         fu_conv_i2f.sign   <= rs1_i(31); -- original sign
@@ -662,9 +671,18 @@ begin
 
   -- Multiplier Core (FMUL) -----------------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  multiplier_core: process(clk_i)
+  multiplier_core: process(rstn_i, clk_i)
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      multiplier.opa     <= (others => '0');
+      multiplier.opb     <= (others => '0');
+      multiplier.buf_ff  <= (others => '0');
+      multiplier.product <= (others => '0');
+      multiplier.sign    <= '0';
+      multiplier.exp_res <= (others => '0');
+      multiplier.flags   <= (others => '0');
+      multiplier.latency <= (others => '0');
+    elsif rising_edge(clk_i) then
       -- multiplier core --
       if (multiplier.start = '1') then -- FIXME / TODO remove buffer?
         multiplier.opa <= unsigned('1' & fpu_operands.rs1(22 downto 0)); -- append hidden one
@@ -694,6 +712,10 @@ begin
         ((fpu_operands.rs1_class(fp_class_pos_inf_c)  or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
          (fpu_operands.rs2_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c))); -- mul(+/-inf, +/-zero)
 
+      -- unused exception flags --
+      multiplier.flags(fp_exc_dz_c) <= '0'; -- division by zero: not possible here
+      multiplier.flags(fp_exc_nx_c) <= '0'; -- inexcat: not possible here
+
       -- latency shift register --
       multiplier.latency <= multiplier.latency(multiplier.latency'left-1 downto 0) & multiplier.start;
     end if;
@@ -707,20 +729,18 @@ begin
   multiplier.done  <= multiplier.latency(multiplier.latency'left);
   fu_mul.done      <= multiplier.done;
 
-  -- unused exception flags --
-  multiplier.flags(fp_exc_dz_c) <= '0'; -- division by zero: not possible here
-  multiplier.flags(fp_exc_nx_c) <= '0'; -- inexcat: not possible here
-
 
   -- result class --
-  multiplier_class_core: process(clk_i)
+  multiplier_class_core: process(rstn_i, clk_i)
     variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
     variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
     variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
     variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
     variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      multiplier.res_class <= (others => '0');
+    elsif rising_edge(clk_i) then
       -- minions --
       a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
       a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
@@ -813,9 +833,21 @@ begin
 
   -- Adder/Subtractor Core (FADD, FSUB) -----------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  adder_subtractor_core: process(clk_i)
+  adder_subtractor_core: process(rstn_i, clk_i)
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      addsub.latency            <= (others => '0');
+      addsub.exp_comp           <= (others => '0');
+      addsub.man_sreg           <= (others => '0');
+      addsub.exp_cnt            <= (others => '0');
+      addsub.man_g_ext          <= '0';
+      addsub.man_r_ext          <= '0';
+      addsub.man_s_ext          <= '0';
+      addsub.man_comp           <= '0';
+      addsub.add_stage          <= (others => '0');
+      addsub.res_sign           <= '0';
+      addsub.flags(fp_exc_nv_c) <= '0';
+    elsif rising_edge(clk_i) then
       -- arbitration / latency --
       if (ctrl_engine.state = S_IDLE) then -- hacky "reset"
         addsub.latency <= (others => '0');
@@ -948,14 +980,16 @@ begin
 
 
   -- result class --
-  adder_subtractor_class_core: process(clk_i)
+  adder_subtractor_class_core: process(rstn_i, clk_i)
     variable a_pos_norm_v, a_neg_norm_v, b_pos_norm_v, b_neg_norm_v : std_ulogic;
     variable a_pos_subn_v, a_neg_subn_v, b_pos_subn_v, b_neg_subn_v : std_ulogic;
     variable a_pos_zero_v, a_neg_zero_v, b_pos_zero_v, b_neg_zero_v : std_ulogic;
     variable a_pos_inf_v,  a_neg_inf_v,  b_pos_inf_v,  b_neg_inf_v  : std_ulogic;
     variable a_snan_v,     a_qnan_v,     b_snan_v,     b_qnan_v     : std_ulogic;
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      addsub.res_class <= (others => '0');
+    elsif rising_edge(clk_i) then
       -- minions --
       a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c);    b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
       a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c);    b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
@@ -1163,9 +1197,14 @@ begin
 
   -- Output Result to CPU Pipeline ----------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  output_gate: process(clk_i)
+  output_gate: process(rstn_i, clk_i)
   begin
-    if rising_edge(clk_i) then
+    if (rstn_i = '0') then
+      res_o  <= (others => '0');
+      fflags <= (others => '0');
+    elsif rising_edge(clk_i) then
+      res_o  <= (others => '0');
+      fflags <= (others => '0');
       if (ctrl_engine.valid = '1') then
         case funct_ff is
           when op_class_c =>
@@ -1187,9 +1226,6 @@ begin
             res_o  <= normalizer.result;
             fflags <= normalizer.flags_out;
         end case;
-      else
-        res_o  <= (others => '0');
-        fflags <= (others => '0');
       end if;
     end if;
   end process output_gate;

diff --git a/rtl/core/neorv32_cpu_cp_muldiv.vhd b/rtl/core/neorv32_cpu_cp_muldiv.vhd
@@ -194,9 +194,13 @@ begin
   if (FAST_MUL_EN = true) generate
 
     -- direct approach --
-    multiplier_core: process(clk_i)
+    multiplier_core: process(rstn_i, clk_i)
     begin
-      if rising_edge(clk_i) then
+      if (rstn_i = '0') then
+        mul.dsp_x <= (others => '0');
+        mul.dsp_y <= (others => '0');
+        mul.prod  <= (others => '0');
+      elsif rising_edge(clk_i) then
         if (mul.start = '1') then
           mul.dsp_x <= signed((rs1_i(rs1_i'left) and ctrl.rs1_is_signed) & rs1_i);
           mul.dsp_y <= signed((rs2_i(rs2_i'left) and ctrl.rs2_is_signed) & rs2_i);
@@ -225,9 +229,11 @@ begin
   if (FAST_MUL_EN = false) generate
 
     -- shift-and-add algorithm --
-    multiplier_core: process(clk_i)
+    multiplier_core: process(rstn_i, clk_i)
     begin
-      if rising_edge(clk_i) then
+      if (rstn_i = '0') then
+        mul.prod <= (others => '0');
+      elsif rising_edge(clk_i) then
         if (mul.start = '1') then -- start new multiplication
           mul.prod(63 downto 32) <= (others => '0');
           mul.prod(31 downto 00) <= rs1_i;
@@ -271,9 +277,12 @@ begin
   if (DIVISION_EN = true) generate
 
     -- restoring division algorithm --
-    divider_core: process(clk_i)
+    divider_core: process(rstn_i, clk_i)
     begin
-      if rising_edge(clk_i) then
+      if (rstn_i = '0') then
+        div.quotient  <= (others => '0');
+        div.remainder <= (others => '0');
+      elsif rising_edge(clk_i) then
         if (div.start = '1') then -- start new division
           if ((rs1_i(rs1_i'left) and ctrl.rs1_is_signed) = '1') then -- signed division?
             div.quotient <= std_ulogic_vector(0 - unsigned(rs1_i)); -- make positive

diff --git a/rtl/core/neorv32_cpu_cp_shifter.vhd b/rtl/core/neorv32_cpu_cp_shifter.vhd
@@ -150,9 +150,12 @@ begin
     end process barrel_shifter_core;
 
     -- pipeline register --
-    barrel_shifter_buf: process(clk_i)
+    barrel_shifter_buf: process(rstn_i, clk_i)
     begin
-      if rising_edge(clk_i) then
+      if (rstn_i = '0') then
+        bs_start  <= '0';
+        bs_result <= (others => '0');
+      elsif rising_edge(clk_i) then
         bs_start  <= start_i;
         bs_result <= bs_level(0); -- this register can be moved by the register balancing
       end if;