From f0a78717a5a5074994af89ac2086141e449a789f Mon Sep 17 00:00:00 2001 From: otoomey Date: Fri, 8 Mar 2024 21:03:41 +0000 Subject: [PATCH] Working state --- hw/snitch_cluster/src/snitch_vfpr.sv | 41 ++++++++++++------- .../sw/apps/tutorial/src/axpy.c | 19 +++++++-- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/hw/snitch_cluster/src/snitch_vfpr.sv b/hw/snitch_cluster/src/snitch_vfpr.sv index a11a3112b..97550db69 100644 --- a/hw/snitch_cluster/src/snitch_vfpr.sv +++ b/hw/snitch_cluster/src/snitch_vfpr.sv @@ -2,6 +2,7 @@ module snitch_vfpr import snitch_pkg::*; #( parameter int unsigned DataWidth = 0, parameter int unsigned AddrWidth = 0, parameter int unsigned TCDMMemAddrWidth = 0, + parameter int unsigned RspBufferDepth = 3, parameter type tcdm_req_t = logic, parameter type tcdm_rsp_t = logic, parameter type tcdm_user_t = logic, @@ -63,6 +64,16 @@ module snitch_vfpr import snitch_pkg::*; #( ); for (genvar i = 0; i < 3; i++) begin + logic cong_out_valid, cong_out_ready; + logic [1:0] rsp_congestion; + stream_stall i_full_stall ( + .valid_i(rvalid_fork[i]), + .ready_o(rready_fork[i]), + .stall(rsp_congestion == (RspBufferDepth - 1)), + .valid_o(cong_out_valid), + .ready_i(cong_out_ready) + ); + logic ic_in_valid, ic_in_ready; logic track_in_valid, track_in_ready; @@ -71,8 +82,8 @@ module snitch_vfpr import snitch_pkg::*; #( ) i_tcdm_bypass ( .clk_i, .rst_ni(~rst_i), - .valid_i(rvalid_fork[i]), - .ready_o(rready_fork[i]), + .valid_i(cong_out_valid), + .ready_o(cong_out_ready), .valid_o({ic_in_valid, track_in_valid}), .ready_i({ic_in_ready, track_in_ready}) ); @@ -113,22 +124,24 @@ module snitch_vfpr import snitch_pkg::*; #( // buffer the interconnect output - necessary because // the ic expects output to be always ready logic ic_out_valid, ic_out_ready; - fall_through_register #( - .T(data_t) + stream_fifo #( + .FALL_THROUGH ( 1'b0 ), + .DEPTH ( RspBufferDepth ), + .T ( data_t ) ) i_rsp_buffer ( .clk_i, - .rst_ni(~rst_i), - .clr_i('0), - .testmode_i('0), - .valid_i(vfpr_rsp[i].p_valid), - .ready_o(/* unused */), - .data_i(vfpr_rsp[i].p.data), - .valid_o(ic_out_valid), - .ready_i(ic_out_ready), - .data_o(rdata_o[i]) + .rst_ni (~rst_i), + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (rsp_congestion), + .data_i (vfpr_rsp[i].p.data), + .valid_i (vfpr_rsp[i].p_valid), + .ready_o (/* open */), + .data_o (rdata_o[i]), + .valid_o (ic_out_valid), + .ready_i (ic_out_ready) ); - stream_merge #( .N_INP(2) ) i_rsp_join ( diff --git a/target/snitch_cluster/sw/apps/tutorial/src/axpy.c b/target/snitch_cluster/sw/apps/tutorial/src/axpy.c index 263537fc2..f579f8b04 100644 --- a/target/snitch_cluster/sw/apps/tutorial/src/axpy.c +++ b/target/snitch_cluster/sw/apps/tutorial/src/axpy.c @@ -4,9 +4,22 @@ // Define your kernel void axpy(uint32_t l, double a, double *x, double *y, double *z) { - for (uint32_t i = 0; i < l ; i++) { - z[i] = a * x[i] + y[i]; - } + // for (uint32_t i = 0; i < l ; i++) { + // z[i] = a * x[i] + y[i]; + // } + // asm volatile( + // "frep.o %[n_frep], %[unroll], 8, 13 \n" + // "fmadd.d f16, f0, %[acc], f8 \n" // rd = rs1 x rs2 + rs3 + // : [ acc ] "+f"(a) + // : [ n_frep ] "r"(32), [ unroll ] "i"(1) + // : "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23" + // ); + asm volatile( + "frep.o %[n_frep], %[unroll], 0, 0 \n" + "fmadd.d f16, f0, %[acc], f8 \n" + : [ acc ] "+f"(a) + : [ n_frep ] "r"(31), [ unroll ] "i"(1) + : "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23"); snrt_fpu_fence(); }