From 6e3d30ae28cd5d8d6d7d61ab4075efd54e9cb213 Mon Sep 17 00:00:00 2001 From: Maximilian Heer <119745617+maximilianheer@users.noreply.github.com> Date: Tue, 19 Nov 2024 07:57:02 +0100 Subject: [PATCH] =?UTF-8?q?Bugfix=20for=20the=20retrans-issue=20that=20sto?= =?UTF-8?q?pped=20RDMA-writes=20at=208k:=20We=20now=20h=E2=80=A6=20(#86)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Bugfix for the retrans-issue that stopped RDMA-writes at 8k: We now have a ddr-write-access counter in rdma_mux_retrans that allows to split the incoming data stream in multiple MTU-sized bursts for writing to the retrans-memory in HBM. --- hw/hdl/network/rdma/rdma_mux_retrans.sv | 104 ++++++++++++------------ hw/hdl/network/rdma/roce_stack.sv | 34 ++++++++ 2 files changed, 87 insertions(+), 51 deletions(-) diff --git a/hw/hdl/network/rdma/rdma_mux_retrans.sv b/hw/hdl/network/rdma/rdma_mux_retrans.sv index 53fb6c8d..e80388d4 100644 --- a/hw/hdl/network/rdma/rdma_mux_retrans.sv +++ b/hw/hdl/network/rdma/rdma_mux_retrans.sv @@ -169,7 +169,7 @@ logic [0:0] state_C, state_N; logic rd_C, rd_N; logic actv_C, actv_N; -logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N; +logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N, cnt_ddr_wr; logic tr_done; @@ -295,6 +295,22 @@ always_comb begin: DP endcase end +// Counting the outgoing data transmissions to the retrans buffer +always_ff @ (posedge aclk) begin + + if(aresetn == 1'b0) begin + cnt_ddr_wr <= 1'b0; + end else begin + if(s_req_net.valid) begin + // Once a new command comes in, set the transmission counter to the length transmitted via the command interface + cnt_ddr_wr <= s_req_net.data.len[LEN_BITS-1:0]/64; + end else begin + // Decrement the counter with every successfull write to the retrans-memory + cnt_ddr_wr <= (axis_ddr_wr.tvalid & axis_ddr_wr.tready) ? (cnt_ddr_wr-1) : cnt_ddr_wr; + end + end +end + // Mux always_comb begin if(state_C == ST_MUX) begin @@ -343,62 +359,48 @@ assign axis_net.tlast = actv_C ? (rd_C ? s_axis_user_rsp.tlast : s_axis_user_req // Data-loop? Not exactly what this is for. Seems to loop data back from the top-level module to the top-level module assign axis_ddr_wr.tdata = s_axis_user_req.tdata; assign axis_ddr_wr.tkeep = s_axis_user_req.tkeep; -assign axis_ddr_wr.tlast = s_axis_user_req.tlast; +assign axis_ddr_wr.tlast = (cnt_ddr_wr == 1); // // DEBUG // - -// create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_retrans -// set_property -dict [list CONFIG.C_DATA_DEPTH {8192} CONFIG.C_PROBE29_WIDTH {22} CONFIG.C_PROBE23_WIDTH {28} CONFIG.C_NUM_OF_PROBES {35} CONFIG.Component_Name {ila_retrans} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_PROBE34_MU_CNT {2} CONFIG.C_PROBE33_MU_CNT {2} CONFIG.C_PROBE32_MU_CNT {2} CONFIG.C_PROBE31_MU_CNT {2} CONFIG.C_PROBE30_MU_CNT {2} CONFIG.C_PROBE29_MU_CNT {2} CONFIG.C_PROBE28_MU_CNT {2} CONFIG.C_PROBE27_MU_CNT {2} CONFIG.C_PROBE26_MU_CNT {2} CONFIG.C_PROBE25_MU_CNT {2} CONFIG.C_PROBE24_MU_CNT {2} CONFIG.C_PROBE23_MU_CNT {2} CONFIG.C_PROBE22_MU_CNT {2} CONFIG.C_PROBE21_MU_CNT {2} CONFIG.C_PROBE20_MU_CNT {2} CONFIG.C_PROBE19_MU_CNT {2} CONFIG.C_PROBE18_MU_CNT {2} CONFIG.C_PROBE17_MU_CNT {2} CONFIG.C_PROBE16_MU_CNT {2} CONFIG.C_PROBE15_MU_CNT {2} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_retrans] - /* ila_retrans inst_ila_retrans ( - .clk(aclk), - - .probe0(s_req_net.valid), - .probe1(s_req_net.ready), - - .probe2(m_req_user.valid), - .probe3(m_req_user.ready), - - .probe4(s_axis_user_rsp.tvalid), - .probe5(s_axis_user_rsp.tready), - .probe6(s_axis_user_rsp.tlast), - - .probe7(s_axis_user_req.tvalid), - .probe8(s_axis_user_req.tready), - .probe9(s_axis_user_req.tlast), - - .probe10(m_axis_net.tvalid), - .probe11(m_axis_net.tready), - .probe12(m_axis_net.tlast), - - .probe13(m_req_ddr_rd.valid), - .probe14(m_req_ddr_rd.ready), - .probe15(m_req_ddr_wr.valid), - .probe16(m_req_ddr_wr.ready), - - .probe17(s_axis_ddr.tvalid), - .probe18(s_axis_ddr.tready), - .probe19(s_axis_ddr.tlast), - - .probe20(m_axis_ddr.tvalid), - .probe21(m_axis_ddr.tready), - .probe22(m_axis_ddr.tlast), - - .probe23(len_snk[27:0]), // 28 - .probe24(actv_snk), - .probe25(rd_snk), - .probe26(seq_snk_valid), - .probe27(seq_snk_ready), - .probe28(state_C), - .probe29(cnt_C[21:0]), // 22 - .probe30(rd_C), - .probe31(actv_C), - .probe32(tr_done), - .probe33(req_user.ready), - .probe34(req_user.valid) + .clk(aclk), + .probe0(s_req_net.valid), + .probe1(s_req_net.data), // 128 + .probe2(s_req_net.ready), + .probe3(s_axis_user_req.tvalid), + .probe4(s_axis_user_req.tdata), // 512 + .probe5(s_axis_user_req.tkeep), // 64 + .probe6(s_axis_user_req.tready), + .probe7(s_axis_user_req.tlast), + .probe8(m_axis_net.tvalid), + .probe9(m_axis_net.tdata), // 512 + .probe10(m_axis_net.tkeep), // 64 + .probe11(m_axis_net.tready), + .probe12(m_axis_net.tlast), + .probe13(m_req_ddr_wr.valid), + .probe14(m_req_ddr_wr.data), // 128 + .probe15(m_req_ddr_wr.ready), + .probe16(m_axis_ddr.tvalid), + .probe17(m_axis_ddr.tdata), // 512 + .probe18(m_axis_ddr.tkeep), // 64 + .probe19(m_axis_ddr.tready), + .probe20(m_axis_ddr.tlast), + .probe21(seq_snk_valid), + .probe22(seq_snk_ready), + .probe23(rd_snk), + .probe24(actv_snk), + .probe25(cnt_C), // 26 + .probe26(state_C), + .probe27(cnt_ddr_wr), // 26 + .probe28(tr_done) ); */ +/* +create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_retrans +set_property -dict [list CONFIG.C_PROBE29_WIDTH {22} CONFIG.C_PROBE23_WIDTH {28} CONFIG.C_NUM_OF_PROBES {35} CONFIG.Component_Name {ila_retrans} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_PROBE34_MU_CNT {2} CONFIG.C_PROBE33_MU_CNT {2} CONFIG.C_PROBE32_MU_CNT {2} CONFIG.C_PROBE31_MU_CNT {2} CONFIG.C_PROBE30_MU_CNT {2} CONFIG.C_PROBE29_MU_CNT {2} CONFIG.C_PROBE28_MU_CNT {2} CONFIG.C_PROBE27_MU_CNT {2} CONFIG.C_PROBE26_MU_CNT {2} CONFIG.C_PROBE25_MU_CNT {2} CONFIG.C_PROBE24_MU_CNT {2} CONFIG.C_PROBE23_MU_CNT {2} CONFIG.C_PROBE22_MU_CNT {2} CONFIG.C_PROBE21_MU_CNT {2} CONFIG.C_PROBE20_MU_CNT {2} CONFIG.C_PROBE19_MU_CNT {2} CONFIG.C_PROBE18_MU_CNT {2} CONFIG.C_PROBE17_MU_CNT {2} CONFIG.C_PROBE16_MU_CNT {2} CONFIG.C_PROBE15_MU_CNT {2} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_retrans] +*/ + endmodule \ No newline at end of file diff --git a/hw/hdl/network/rdma/roce_stack.sv b/hw/hdl/network/rdma/roce_stack.sv index 650f4abb..543aa530 100644 --- a/hw/hdl/network/rdma/roce_stack.sv +++ b/hw/hdl/network/rdma/roce_stack.sv @@ -220,6 +220,40 @@ assign rdma_wr_req.ready = m_rdma_wr_req.ready; // RoCE stack // +/* ila_rdma inst_ila_rdma ( + .clk(nclk), + .probe0(s_rdma_qp_interface.valid), + .probe1(s_rdma_qp_interface.ready), + .probe2(s_rdma_qp_interface.data), // 184 + .probe3(s_rdma_conn_interface.valid), + .probe4(s_rdma_conn_interface.ready), + .probe5(s_rdma_conn_interface.data), // 184 + .probe6(s_rdma_sq.valid), + .probe7(s_rdma_sq.ready), + .probe8(s_rdma_sq.data), // 256 + .probe9(m_rdma_rd_req.valid), + .probe10(m_rdma_rd_req.ready), + .probe11(m_rdma_rd_req.data), // 128 + .probe12(m_rdma_wr_req.valid), + .probe13(m_rdma_wr_req.ready), + .probe14(m_rdma_wr_req.data), // 128 + .probe15(m_rdma_mem_rd_cmd.valid), + .probe16(m_rdma_mem_rd_cmd.ready), + .probe17(m_rdma_mem_rd_cmd.data), // 96 + .probe18(m_rdma_mem_wr_cmd.valid), + .probe19(m_rdma_mem_wr_cmd.ready), + .probe20(m_rdma_mem_wr_cmd.data), // 96 + .probe21(s_axis_rdma_rd_req.tvalid), + .probe22(s_axis_rdma_rd_req.tdata), // 512 + .probe23(s_axis_rdma_rd_req.tkeep), // 64 + .probe24(s_axis_rdma_rd_req.tready), + .probe25(s_axis_rdma_rd_req.tlast), + .probe26(m_axis_rdma_wr.tvalid), + .probe27(m_axis_rdma_wr.tdata), // 512 + .probe28(m_axis_rdma_wr.tkeep), // 64 + .probe29(m_axis_rdma_wr.tready), + .probe30(m_axis_rdma_wr.tlast) +); */ /* ila_rdma inst_ila_rdma (