Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 fix minor bug in FPU MUL instruction #1028

Merged
merged 4 commits into from
Sep 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12

| Date | Version | Comment | Ticket |
|:----:|:-------:|:--------|:------:|
| 20.09-2024 | 1.10.4.2 | :bug: fix minor bug in FPU's multiplication instruction (invalid-check logic if any operand is sNAN) | [#1028](https://github.com/stnolting/neorv32/pull/1028) |
| 20.09.2024 | 1.10.4.1 | rtl signal renamings to make the code more readable | [#1026](https://github.com/stnolting/neorv32/pull/1026) |
| 16.09.2024 | [**:rocket:1.10.4**](https://github.com/stnolting/neorv32/releases/tag/v1.10.4) | **New release** | |
| 15.09.2024 | 1.10.3.10 | :bug: SW: fix stack-alignment (has to be 128-bit-aligned) before entering the very first procedure (`main`) | [#1021](https://github.com/stnolting/neorv32/pull/1021) |
Expand Down
84 changes: 38 additions & 46 deletions rtl/core/neorv32_cpu_cp_fpu.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,8 @@ begin
op_e_all_one_v := and_reduce_f(op_data(i)(30 downto 23));

-- check special cases --
op_is_zero_v := op_e_all_zero_v and op_m_all_zero_v; -- zero
op_is_inf_v := op_e_all_one_v and op_m_all_zero_v; -- infinity
op_is_zero_v := op_e_all_zero_v and op_m_all_zero_v; -- zero
op_is_inf_v := op_e_all_one_v and op_m_all_zero_v; -- infinity
-- As we are flushing subnormals before classification they will show up as 0.0
-- So we check calculate the denorm value is the non-flushed mantissa gated by the op_e_all_zero
if (i = 0) then
Expand All @@ -381,7 +381,7 @@ begin
--if (i = 2) then
-- op_is_denorm_v := or_reduce_f(rs3_i(22 downto 0)) and op_e_all_zero_v; -- set the number to subnormal
--end if;
op_is_nan_v := op_e_all_one_v and (not op_m_all_zero_v); -- NaN
op_is_nan_v := op_e_all_one_v and (not op_m_all_zero_v); -- NaN

-- actual attributes --
op_class(i)(fp_class_neg_inf_c) <= op_data(i)(31) and op_is_inf_v; -- negative infinity
Expand Down Expand Up @@ -772,14 +772,10 @@ begin
elsif rising_edge(clk_i) then
-- multiplier core --
-- if the inputs to the multiplier is +/- zero or +/- denorm the result will always be +/- zero
if ((fpu_operands.rs1_class(fp_class_pos_zero_c) or
fpu_operands.rs1_class(fp_class_neg_zero_c) or
fpu_operands.rs2_class(fp_class_pos_zero_c) or
fpu_operands.rs2_class(fp_class_neg_zero_c) or
fpu_operands.rs1_class(fp_class_pos_denorm_c) or
fpu_operands.rs1_class(fp_class_neg_denorm_c) or
fpu_operands.rs2_class(fp_class_pos_denorm_c) or
fpu_operands.rs2_class(fp_class_neg_denorm_c)) = '1') then
if ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs1_class(fp_class_neg_zero_c) or
fpu_operands.rs2_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c) or
fpu_operands.rs1_class(fp_class_pos_denorm_c) or fpu_operands.rs1_class(fp_class_neg_denorm_c) or
fpu_operands.rs2_class(fp_class_pos_denorm_c) or fpu_operands.rs2_class(fp_class_neg_denorm_c)) = '1') then
if (multiplier.start = '1') then
-- the result will be 0 so force it to be 0
multiplier.product <= (others => '0');
Expand All @@ -794,7 +790,7 @@ begin
multiplier.product <= std_ulogic_vector(multiplier.buf_ff(47 downto 0)); -- let the register balancing do the magic here
multiplier.exp_res <= std_ulogic_vector(unsigned('0' & multiplier.exp_sum) - 127);
end if;
multiplier.sign <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- resulting sign
multiplier.sign <= fpu_operands.rs1(31) xor fpu_operands.rs2(31); -- resulting sign

-- exponent computation --
-- assume we are exact and the operation hasn't over/under flown
Expand All @@ -804,14 +800,10 @@ begin

-- Multiplier exception handling
-- Check that one operand is not inf or NAN before potentially setting OF, UF, and NX flags
if ((fpu_operands.rs1_class(fp_class_pos_inf_c) or
fpu_operands.rs2_class(fp_class_pos_inf_c) or
fpu_operands.rs1_class(fp_class_neg_inf_c) or
fpu_operands.rs2_class(fp_class_neg_inf_c) or
fpu_operands.rs1_class(fp_class_snan_c) or
fpu_operands.rs2_class(fp_class_snan_c) or
fpu_operands.rs1_class(fp_class_qnan_c) or
fpu_operands.rs2_class(fp_class_qnan_c)) = '0') then
if ((fpu_operands.rs1_class(fp_class_pos_inf_c) or fpu_operands.rs2_class(fp_class_pos_inf_c) or
fpu_operands.rs1_class(fp_class_neg_inf_c) or fpu_operands.rs2_class(fp_class_neg_inf_c) or
fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_snan_c) or
fpu_operands.rs1_class(fp_class_qnan_c) or fpu_operands.rs2_class(fp_class_qnan_c)) = '0') then
if (multiplier.exp_res(multiplier.exp_res'left) = '1') then -- underflow (exp_res is "negative")
multiplier.flags(fp_exc_of_c) <= '0';
multiplier.flags(fp_exc_uf_c) <= '1';
Expand All @@ -826,12 +818,12 @@ begin
end if;

-- invalid operation --
-- Any multiplication between +/- inf and +/- zoer is a not valid operation
-- Any multiplication between +/- inf and +/- zero is a not valid operation
-- Any multiplication with sNAN is not a valid operation
-- If subnormals are flushed to zero we need to treat them as zero for exception handling
if (not FPU_SUBNORMAL_SUPPORT) then
multiplier.flags(fp_exc_nv_c) <=
((fpu_operands.rs2_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_snan_c))) or -- mul(sNAN, X) or mul(X, sNAN)
((fpu_operands.rs1_class(fp_class_snan_c) or fpu_operands.rs2_class(fp_class_snan_c))) or -- mul(sNAN, X) or mul(X, sNAN)
((fpu_operands.rs1_class(fp_class_pos_denorm_c) or fpu_operands.rs1_class(fp_class_neg_denorm_c)) and
(fpu_operands.rs2_class(fp_class_pos_inf_c) or fpu_operands.rs2_class(fp_class_neg_inf_c))) or -- mul(+/-denorm, +/-inf)
((fpu_operands.rs1_class(fp_class_pos_inf_c) or fpu_operands.rs1_class(fp_class_neg_inf_c)) and
Expand Down Expand Up @@ -877,16 +869,16 @@ begin
multiplier.res_class <= (others => '0');
elsif rising_edge(clk_i) then
-- minions --
a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c); b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c); b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c); b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c); b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c); b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c); b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
a_pos_inf_v := fpu_operands.rs1_class(fp_class_pos_inf_c); b_pos_inf_v := fpu_operands.rs2_class(fp_class_pos_inf_c);
a_neg_inf_v := fpu_operands.rs1_class(fp_class_neg_inf_c); b_neg_inf_v := fpu_operands.rs2_class(fp_class_neg_inf_c);
a_snan_v := fpu_operands.rs1_class(fp_class_snan_c); b_snan_v := fpu_operands.rs2_class(fp_class_snan_c);
a_qnan_v := fpu_operands.rs1_class(fp_class_qnan_c); b_qnan_v := fpu_operands.rs2_class(fp_class_qnan_c);
a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c); b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c); b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c); b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c); b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c); b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c); b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
a_pos_inf_v := fpu_operands.rs1_class(fp_class_pos_inf_c); b_pos_inf_v := fpu_operands.rs2_class(fp_class_pos_inf_c);
a_neg_inf_v := fpu_operands.rs1_class(fp_class_neg_inf_c); b_neg_inf_v := fpu_operands.rs2_class(fp_class_neg_inf_c);
a_snan_v := fpu_operands.rs1_class(fp_class_snan_c); b_snan_v := fpu_operands.rs2_class(fp_class_snan_c);
a_qnan_v := fpu_operands.rs1_class(fp_class_qnan_c); b_qnan_v := fpu_operands.rs2_class(fp_class_qnan_c);

-- +normal --
multiplier.res_class(fp_class_pos_norm_c) <=
Expand Down Expand Up @@ -1094,8 +1086,8 @@ begin
end if;
else
-- also use denorm for the check as we flush denorms.
if ((fpu_operands.rs1_class(fp_class_pos_zero_c ) or fpu_operands.rs2_class(fp_class_pos_zero_c) or
fpu_operands.rs1_class(fp_class_neg_zero_c ) or fpu_operands.rs2_class(fp_class_neg_zero_c) or
if ((fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_pos_zero_c) or
fpu_operands.rs1_class(fp_class_neg_zero_c) or fpu_operands.rs2_class(fp_class_neg_zero_c) or
fpu_operands.rs1_class(fp_class_pos_denorm_c) or fpu_operands.rs2_class(fp_class_pos_denorm_c) or
fpu_operands.rs1_class(fp_class_neg_denorm_c) or fpu_operands.rs2_class(fp_class_neg_denorm_c)) = '0') then -- no input is zero
addsub.man_sreg <= addsub.small_man;
Expand Down Expand Up @@ -1218,13 +1210,13 @@ begin
addsub.flags(fp_exc_nv_c) <= '0';
if (ctrl_i.ir_funct12(7) = '0') then -- add
-- Do we have 2 infinities of opposite sign?
if (((fpu_operands.rs1_class(fp_class_pos_inf_c) and fpu_operands.rs2_class(fp_class_neg_inf_c)) or
if (((fpu_operands.rs1_class(fp_class_pos_inf_c) and fpu_operands.rs2_class(fp_class_neg_inf_c)) or
(fpu_operands.rs1_class(fp_class_neg_inf_c) and fpu_operands.rs2_class(fp_class_pos_inf_c))) = '1') then
addsub.flags(fp_exc_nv_c) <= '1';
end if;
else -- sub
-- Do we have 2 infinities of same sign?
if (((fpu_operands.rs1_class(fp_class_pos_inf_c) and fpu_operands.rs2_class(fp_class_pos_inf_c)) or
if (((fpu_operands.rs1_class(fp_class_pos_inf_c) and fpu_operands.rs2_class(fp_class_pos_inf_c)) or
(fpu_operands.rs1_class(fp_class_neg_inf_c) and fpu_operands.rs2_class(fp_class_neg_inf_c))) = '1') then
addsub.flags(fp_exc_nv_c) <= '1';
end if;
Expand Down Expand Up @@ -1269,30 +1261,30 @@ begin
addsub.res_class <= (others => '0');
elsif rising_edge(clk_i) then
-- minions --
a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c); b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c); b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
a_pos_norm_v := fpu_operands.rs1_class(fp_class_pos_norm_c); b_pos_norm_v := fpu_operands.rs2_class(fp_class_pos_norm_c);
a_neg_norm_v := fpu_operands.rs1_class(fp_class_neg_norm_c); b_neg_norm_v := fpu_operands.rs2_class(fp_class_neg_norm_c);
-- as we can now correctly classify subnormals we need to override the post-add class
-- if we don't support subnormals as part of the add/sub circuit
if (FPU_SUBNORMAL_SUPPORT) then
a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c); b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c); b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
a_pos_subn_v := fpu_operands.rs1_class(fp_class_pos_denorm_c); b_pos_subn_v := fpu_operands.rs2_class(fp_class_pos_denorm_c);
a_neg_subn_v := fpu_operands.rs1_class(fp_class_neg_denorm_c); b_neg_subn_v := fpu_operands.rs2_class(fp_class_neg_denorm_c);
else
a_pos_subn_v := '0'; b_pos_subn_v := '0';
a_neg_subn_v := '0'; b_neg_subn_v := '0';
end if;
if (FPU_SUBNORMAL_SUPPORT) then
a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c); b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c); b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c); b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c);
a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c); b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c);
else
a_pos_zero_v := fpu_operands.rs1_class(fp_class_pos_zero_c) or fpu_operands.rs1_class(fp_class_pos_denorm_c);
b_pos_zero_v := fpu_operands.rs2_class(fp_class_pos_zero_c) or fpu_operands.rs2_class(fp_class_pos_denorm_c);
a_neg_zero_v := fpu_operands.rs1_class(fp_class_neg_zero_c) or fpu_operands.rs1_class(fp_class_neg_denorm_c);
b_neg_zero_v := fpu_operands.rs2_class(fp_class_neg_zero_c) or fpu_operands.rs2_class(fp_class_neg_denorm_c);
end if;
a_pos_inf_v := fpu_operands.rs1_class(fp_class_pos_inf_c); b_pos_inf_v := fpu_operands.rs2_class(fp_class_pos_inf_c);
a_neg_inf_v := fpu_operands.rs1_class(fp_class_neg_inf_c); b_neg_inf_v := fpu_operands.rs2_class(fp_class_neg_inf_c);
a_snan_v := fpu_operands.rs1_class(fp_class_snan_c); b_snan_v := fpu_operands.rs2_class(fp_class_snan_c);
a_qnan_v := fpu_operands.rs1_class(fp_class_qnan_c); b_qnan_v := fpu_operands.rs2_class(fp_class_qnan_c);
a_pos_inf_v := fpu_operands.rs1_class(fp_class_pos_inf_c); b_pos_inf_v := fpu_operands.rs2_class(fp_class_pos_inf_c);
a_neg_inf_v := fpu_operands.rs1_class(fp_class_neg_inf_c); b_neg_inf_v := fpu_operands.rs2_class(fp_class_neg_inf_c);
a_snan_v := fpu_operands.rs1_class(fp_class_snan_c); b_snan_v := fpu_operands.rs2_class(fp_class_snan_c);
a_qnan_v := fpu_operands.rs1_class(fp_class_qnan_c); b_qnan_v := fpu_operands.rs2_class(fp_class_qnan_c);

if (ctrl_i.ir_funct12(7) = '0') then -- addition
-- +infinity --
Expand Down
2 changes: 1 addition & 1 deletion rtl/core/neorv32_package.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ package neorv32_package is

-- Architecture Constants -----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100401"; -- hardware version
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100402"; -- hardware version
constant archid_c : natural := 19; -- official RISC-V architecture ID
constant XLEN : natural := 32; -- native data path width

Expand Down