From 0c28a751f8fd30ff500ad2681bc0f539db0b7070 Mon Sep 17 00:00:00 2001 From: Xinhu Date: Fri, 9 Dec 2022 16:07:20 +0100 Subject: [PATCH] Replace the dma with iDMA from PULP, add the driver of iDMA --- core-v-mini-mcu.core | 2 +- hw/core-v-mini-mcu/ao_peripheral_subsystem.sv | 23 +- hw/ip/iDMA/Makefile | 23 + hw/ip/iDMA/axi/axi_to_mem.sv | 758 ++++++++++++++++++ hw/ip/iDMA/axi/axi_to_mem_split.sv | 258 ++++++ hw/ip/iDMA/frontend/idma_reg32_frontend.h | 47 ++ hw/ip/iDMA/frontend/idma_reg32_frontend.hjson | 104 +++ hw/ip/iDMA/frontend/idma_reg32_frontend.html | 107 +++ hw/ip/iDMA/frontend/idma_reg32_frontend.sv | 138 ++++ .../frontend/idma_reg32_frontend_reg_pkg.sv | 97 +++ .../frontend/idma_reg32_frontend_reg_top.sv | 377 +++++++++ hw/ip/iDMA/frontend/idma_transfer_id_gen.sv | 79 ++ hw/ip/iDMA/frontend/reg_html.css | 74 ++ hw/ip/iDMA/iDMA.core | 40 + hw/ip/iDMA/idma_reg32_wrap.sv | 152 ++++ hw/ip/iDMA/include/axi/assign.svh | 655 +++++++++++++++ hw/ip/iDMA/include/axi/axi_pkg.sv | 423 ++++++++++ hw/ip/iDMA/include/axi/typedef.svh | 211 +++++ hw/ip/iDMA/include/idma/idma_pkg.sv | 82 ++ hw/ip/iDMA/include/idma/typedef.svh | 95 +++ hw/ip/iDMA/src/idma_axi_transport_layer.sv | 469 +++++++++++ hw/ip/iDMA/src/idma_backend.sv | 699 ++++++++++++++++ hw/ip/iDMA/src/idma_buffer.sv | 54 ++ hw/ip/iDMA/src/idma_channel_coupler.sv | 183 +++++ hw/ip/iDMA/src/idma_legalizer.sv | 411 ++++++++++ hw/ip/iDMA/src/idma_stream_fifo.sv | 127 +++ sw/device/lib/drivers/dma/dma.c | 28 +- sw/device/lib/drivers/dma/dma.h | 47 +- sw/device/lib/drivers/dma/dma_regs.h | 62 +- 29 files changed, 5735 insertions(+), 90 deletions(-) create mode 100644 hw/ip/iDMA/Makefile create mode 100644 hw/ip/iDMA/axi/axi_to_mem.sv create mode 100644 hw/ip/iDMA/axi/axi_to_mem_split.sv create mode 100644 hw/ip/iDMA/frontend/idma_reg32_frontend.h create mode 100644 hw/ip/iDMA/frontend/idma_reg32_frontend.hjson create mode 100644 hw/ip/iDMA/frontend/idma_reg32_frontend.html create mode 100644 hw/ip/iDMA/frontend/idma_reg32_frontend.sv create mode 100644 hw/ip/iDMA/frontend/idma_reg32_frontend_reg_pkg.sv create mode 100644 hw/ip/iDMA/frontend/idma_reg32_frontend_reg_top.sv create mode 100644 hw/ip/iDMA/frontend/idma_transfer_id_gen.sv create mode 100644 hw/ip/iDMA/frontend/reg_html.css create mode 100644 hw/ip/iDMA/iDMA.core create mode 100644 hw/ip/iDMA/idma_reg32_wrap.sv create mode 100644 hw/ip/iDMA/include/axi/assign.svh create mode 100644 hw/ip/iDMA/include/axi/axi_pkg.sv create mode 100644 hw/ip/iDMA/include/axi/typedef.svh create mode 100644 hw/ip/iDMA/include/idma/idma_pkg.sv create mode 100644 hw/ip/iDMA/include/idma/typedef.svh create mode 100644 hw/ip/iDMA/src/idma_axi_transport_layer.sv create mode 100644 hw/ip/iDMA/src/idma_backend.sv create mode 100644 hw/ip/iDMA/src/idma_buffer.sv create mode 100644 hw/ip/iDMA/src/idma_channel_coupler.sv create mode 100644 hw/ip/iDMA/src/idma_legalizer.sv create mode 100644 hw/ip/iDMA/src/idma_stream_fifo.sv diff --git a/core-v-mini-mcu.core b/core-v-mini-mcu.core index 1c0d6c818..c26ccfb62 100644 --- a/core-v-mini-mcu.core +++ b/core-v-mini-mcu.core @@ -28,7 +28,7 @@ filesets: - yosyshq:picorv32_spimemio:0-r1 - x-heep:obi_spimemio:0.1.0 - x-heep:ip:boot_rom - - x-heep:ip:dma + - x-heep:ip:idma - x-heep:ip:power_manager - x-heep:ip:fast_intr_ctrl files: diff --git a/hw/core-v-mini-mcu/ao_peripheral_subsystem.sv b/hw/core-v-mini-mcu/ao_peripheral_subsystem.sv index 1ba744a35..84f2356da 100644 --- a/hw/core-v-mini-mcu/ao_peripheral_subsystem.sv +++ b/hw/core-v-mini-mcu/ao_peripheral_subsystem.sv @@ -316,11 +316,15 @@ module ao_peripheral_subsystem .intr_timer_expired_1_0_o(rv_timer_1_intr_o) ); - dma #( - .reg_req_t (reg_pkg::reg_req_t), - .reg_rsp_t (reg_pkg::reg_rsp_t), - .obi_req_t (obi_pkg::obi_req_t), - .obi_resp_t(obi_pkg::obi_resp_t) + dma_reg32_wrap #( + .reg_req_t(reg_pkg::reg_req_t), + .reg_rsp_t(reg_pkg::reg_rsp_t), + .obi_req_t(obi_pkg::obi_req_t), + .obi_resp_t(obi_pkg::obi_resp_t), + .OBI_DATA_WIDTH(32), + .OBI_ADDR_WIDTH(32), + .OBI_USER_WIDTH(4), + .OBI_ID_WIDTH(4) ) dma_i ( .clk_i, .rst_ni, @@ -330,13 +334,14 @@ module ao_peripheral_subsystem .dma_master0_ch0_resp_i, .dma_master1_ch0_req_o, .dma_master1_ch0_resp_i, - .spi_rx_valid_i(spi_rx_valid), - .spi_tx_ready_i(spi_tx_ready), - .spi_flash_rx_valid_i(spi_flash_rx_valid), - .spi_flash_tx_ready_i(spi_flash_tx_ready), + .spi_rx_valid_i(), + .spi_tx_ready_i(), + .spi_flash_rx_valid_i(), + .spi_flash_tx_ready_i(), .dma_intr_o ); + assign pad_req_o = ao_peripheral_slv_req[core_v_mini_mcu_pkg::PAD_CONTROL_IDX]; assign ao_peripheral_slv_rsp[core_v_mini_mcu_pkg::PAD_CONTROL_IDX] = pad_resp_i; diff --git a/hw/ip/iDMA/Makefile b/hw/ip/iDMA/Makefile new file mode 100644 index 000000000..a218cd84e --- /dev/null +++ b/hw/ip/iDMA/Makefile @@ -0,0 +1,23 @@ + +BENDER ?= bender +REG_PATH ?= $(shell $(BENDER) path register_interface) +REG_TOOL ?= $(REG_PATH)/vendor/lowrisc_opentitan/util/regtool.py + +REG32_FE_DIR = ./frontend +REG32_HJSON = $(REG32_FE_DIR)/idma_reg32_frontend.hjson + +REG_HTML_STRING = "\n\n\n\n\n" + +reg32_regs: + $(PYTHON) $(REG_TOOL) $(REG32_HJSON) -t $(REG32_FE_DIR) -r + $(PYTHON) $(REG_TOOL) $(REG32_HJSON) -D > $(REG32_FE_DIR)/idma_reg32_frontend.h + printf $(REG_HTML_STRING) > $(REG32_FE_DIR)/idma_reg32_frontend.html + $(PYTHON) $(REG_TOOL) $(REG32_HJSON) -d >> $(REG32_FE_DIR)/idma_reg32_frontend.html + printf "\n" >> $(REG32_FE_DIR)/idma_reg32_frontend.html + cp $(REG_PATH)/vendor/lowrisc_opentitan/util/reggen/reg_html.css $(REG32_FE_DIR) + +clean: + rm -f $(REG32_FE_DIR)/idma_reg32_frontend.h + rm -f $(REG32_FE_DIR)/idma_reg32_frontend_reg_pkg.sv + rm -f $(REG32_FE_DIR)/idma_reg32_frontend_reg_top.sv + rm -f $(REG32_FE_DIR)/reg_html.css \ No newline at end of file diff --git a/hw/ip/iDMA/axi/axi_to_mem.sv b/hw/ip/iDMA/axi/axi_to_mem.sv new file mode 100644 index 000000000..cc1b395ce --- /dev/null +++ b/hw/ip/iDMA/axi/axi_to_mem.sv @@ -0,0 +1,758 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Authors: +// - Michael Rogenmoser + +`include "common_cells/registers.svh" +/// AXI4+ATOP slave module which translates AXI bursts into a memory stream. +/// If both read and write channels of the AXI4+ATOP are active, both will have an +/// utilization of 50%. +module axi_to_mem #( + /// AXI4+ATOP request type. See `include/axi/typedef.svh`. + parameter type axi_req_t = logic, + /// AXI4+ATOP response type. See `include/axi/typedef.svh`. + parameter type axi_resp_t = logic, + /// Address width, has to be less or equal than the width off the AXI address field. + /// Determines the width of `mem_addr_o`. Has to be wide enough to emit the memory region + /// which should be accessible. + parameter int unsigned AddrWidth = 0, + /// AXI4+ATOP data width. + parameter int unsigned DataWidth = 0, + /// AXI4+ATOP ID width. + parameter int unsigned IdWidth = 0, + /// Number of banks at output, must evenly divide `DataWidth`. + parameter int unsigned NumBanks = 0, + /// Depth of memory response buffer. This should be equal to the memory response latency. + parameter int unsigned BufDepth = 1, + /// Hide write requests if the strb == '0 + parameter bit HideStrb = 1'b0, + /// Depth of output fifo/fall_through_register. Increase for asymmetric backpressure (contention) on banks. + parameter int unsigned OutFifoDepth = 1, + /// Dependent parameter, do not override. Memory address type. + localparam type addr_t = logic [AddrWidth-1:0], + /// Dependent parameter, do not override. Memory data type. + localparam type mem_data_t = logic [DataWidth/NumBanks-1:0], + /// Dependent parameter, do not override. Memory write strobe type. + localparam type mem_strb_t = logic [DataWidth/NumBanks/8-1:0] +) ( + /// Clock input. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// The unit is busy handling an AXI4+ATOP request. + output logic busy_o, + /// AXI4+ATOP slave port, request input. + input axi_req_t axi_req_i, + /// AXI4+ATOP slave port, response output. + output axi_resp_t axi_resp_o, + /// Memory stream master, request is valid for this bank. + output logic [NumBanks-1:0] mem_req_o, + /// Memory stream master, request can be granted by this bank. + input logic [NumBanks-1:0] mem_gnt_i, + /// Memory stream master, byte address of the request. + output addr_t [NumBanks-1:0] mem_addr_o, + /// Memory stream master, write data for this bank. Valid when `mem_req_o`. + output mem_data_t [NumBanks-1:0] mem_wdata_o, + /// Memory stream master, byte-wise strobe (byte enable). + output mem_strb_t [NumBanks-1:0] mem_strb_o, + /// Memory stream master, `axi_pkg::atop_t` signal associated with this request. + output axi_pkg::atop_t [NumBanks-1:0] mem_atop_o, + /// Memory stream master, write enable. Then asserted store of `mem_w_data` is requested. + output logic [NumBanks-1:0] mem_we_o, + /// Memory stream master, response is valid. This module expects always a response valid for a + /// request regardless if the request was a write or a read. + input logic [NumBanks-1:0] mem_rvalid_i, + /// Memory stream master, read response data. + input mem_data_t [NumBanks-1:0] mem_rdata_i +); + + typedef logic [DataWidth-1:0] axi_data_t; + typedef logic [DataWidth/8-1:0] axi_strb_t; + typedef logic [IdWidth-1:0] axi_id_t; + + typedef struct packed { + addr_t addr; + axi_pkg::atop_t atop; + axi_strb_t strb; + axi_data_t wdata; + logic we; + } mem_req_t; + + typedef struct packed { + addr_t addr; + axi_pkg::atop_t atop; + axi_id_t id; + logic last; + axi_pkg::qos_t qos; + axi_pkg::size_t size; + logic write; + } meta_t; + + axi_data_t mem_rdata, + m2s_resp; + axi_pkg::len_t r_cnt_d, r_cnt_q, + w_cnt_d, w_cnt_q; + logic arb_valid, arb_ready, + rd_valid, rd_ready, + wr_valid, wr_ready, + sel_b, sel_buf_b, + sel_r, sel_buf_r, + sel_valid, sel_ready, + sel_buf_valid, sel_buf_ready, + sel_lock_d, sel_lock_q, + meta_valid, meta_ready, + meta_buf_valid, meta_buf_ready, + meta_sel_d, meta_sel_q, + m2s_req_valid, m2s_req_ready, + m2s_resp_valid, m2s_resp_ready, + mem_req_valid, mem_req_ready, + mem_rvalid; + mem_req_t m2s_req, + mem_req; + meta_t rd_meta, + rd_meta_d, rd_meta_q, + wr_meta, + wr_meta_d, wr_meta_q, + meta, meta_buf; + + assign busy_o = axi_req_i.aw_valid | axi_req_i.ar_valid | axi_req_i.w_valid | + axi_resp_o.b_valid | axi_resp_o.r_valid | + (r_cnt_q > 0) | (w_cnt_q > 0); + + // Handle reads. + always_comb begin + // Default assignments + axi_resp_o.ar_ready = 1'b0; + rd_meta_d = rd_meta_q; + rd_meta = meta_t'{default: '0}; + rd_valid = 1'b0; + r_cnt_d = r_cnt_q; + // Handle R burst in progress. + if (r_cnt_q > '0) begin + rd_meta_d.last = (r_cnt_q == 8'd1); + rd_meta = rd_meta_d; + rd_meta.addr = rd_meta_q.addr + axi_pkg::num_bytes(rd_meta_q.size); + rd_valid = 1'b1; + if (rd_ready) begin + r_cnt_d--; + rd_meta_d.addr = rd_meta.addr; + end + // Handle new AR if there is one. + end else if (axi_req_i.ar_valid) begin + rd_meta_d = '{ + addr: addr_t'(axi_pkg::aligned_addr(axi_req_i.ar.addr, axi_req_i.ar.size)), + atop: '0, + id: axi_req_i.ar.id, + last: (axi_req_i.ar.len == '0), + qos: axi_req_i.ar.qos, + size: axi_req_i.ar.size, + write: 1'b0 + }; + rd_meta = rd_meta_d; + rd_meta.addr = addr_t'(axi_req_i.ar.addr); + rd_valid = 1'b1; + if (rd_ready) begin + r_cnt_d = axi_req_i.ar.len; + axi_resp_o.ar_ready = 1'b1; + end + end + end + + // Handle writes. + always_comb begin + // Default assignments + axi_resp_o.aw_ready = 1'b0; + axi_resp_o.w_ready = 1'b0; + wr_meta_d = wr_meta_q; + wr_meta = meta_t'{default: '0}; + wr_valid = 1'b0; + w_cnt_d = w_cnt_q; + // Handle W bursts in progress. + if (w_cnt_q > '0) begin + wr_meta_d.last = (w_cnt_q == 8'd1); + wr_meta = wr_meta_d; + wr_meta.addr = wr_meta_q.addr + axi_pkg::num_bytes(wr_meta_q.size); + if (axi_req_i.w_valid) begin + wr_valid = 1'b1; + if (wr_ready) begin + axi_resp_o.w_ready = 1'b1; + w_cnt_d--; + wr_meta_d.addr = wr_meta.addr; + end + end + // Handle new AW if there is one. + end else if (axi_req_i.aw_valid && axi_req_i.w_valid) begin + wr_meta_d = '{ + addr: addr_t'(axi_pkg::aligned_addr(axi_req_i.aw.addr, axi_req_i.aw.size)), + atop: axi_req_i.aw.atop, + id: axi_req_i.aw.id, + last: (axi_req_i.aw.len == '0), + qos: axi_req_i.aw.qos, + size: axi_req_i.aw.size, + write: 1'b1 + }; + wr_meta = wr_meta_d; + wr_meta.addr = addr_t'(axi_req_i.aw.addr); + wr_valid = 1'b1; + if (wr_ready) begin + w_cnt_d = axi_req_i.aw.len; + axi_resp_o.aw_ready = 1'b1; + axi_resp_o.w_ready = 1'b1; + end + end + end + + // Arbitrate between reads and writes. + stream_mux #( + .DATA_T ( meta_t ), + .N_INP ( 32'd2 ) + ) i_ax_mux ( + .inp_data_i ({wr_meta, rd_meta }), + .inp_valid_i ({wr_valid, rd_valid}), + .inp_ready_o ({wr_ready, rd_ready}), + .inp_sel_i ( meta_sel_d ), + .oup_data_o ( meta ), + .oup_valid_o ( arb_valid ), + .oup_ready_i ( arb_ready ) + ); + always_comb begin + meta_sel_d = meta_sel_q; + sel_lock_d = sel_lock_q; + if (sel_lock_q) begin + meta_sel_d = meta_sel_q; + if (arb_valid && arb_ready) begin + sel_lock_d = 1'b0; + end + end else begin + if (wr_valid ^ rd_valid) begin + // If either write or read is valid but not both, select the valid one. + meta_sel_d = wr_valid; + end else if (wr_valid && rd_valid) begin + // If both write and read are valid, decide according to QoS then burst properties. + // Prioritize higher QoS. + if (wr_meta.qos > rd_meta.qos) begin + meta_sel_d = 1'b1; + end else if (rd_meta.qos > wr_meta.qos) begin + meta_sel_d = 1'b0; + // Decide requests with identical QoS. + end else if (wr_meta.qos == rd_meta.qos) begin + // 1. Prioritize individual writes over read bursts. + // Rationale: Read bursts can be interleaved on AXI but write bursts cannot. + if (wr_meta.last && !rd_meta.last) begin + meta_sel_d = 1'b1; + // 2. Prioritize ongoing burst. + // Rationale: Stalled bursts create back-pressure or require costly buffers. + end else if (w_cnt_q > '0) begin + meta_sel_d = 1'b1; + end else if (r_cnt_q > '0) begin + meta_sel_d = 1'b0; + // 3. Otherwise arbitrate round robin to prevent starvation. + end else begin + meta_sel_d = ~meta_sel_q; + end + end + end + // Lock arbitration if valid but not yet ready. + if (arb_valid && !arb_ready) begin + sel_lock_d = 1'b1; + end + end + end + + // Fork arbitrated stream to meta data, memory requests, and R/B channel selection. + stream_fork #( + .N_OUP ( 32'd3 ) + ) i_fork ( + .clk_i, + .rst_ni, + .valid_i ( arb_valid ), + .ready_o ( arb_ready ), + .valid_o ({sel_valid, meta_valid, m2s_req_valid}), + .ready_i ({sel_ready, meta_ready, m2s_req_ready}) + ); + + assign sel_b = meta.write & meta.last; + assign sel_r = ~meta.write | meta.atop[5]; + + stream_fifo #( + .FALL_THROUGH ( 1'b1 ), + .DEPTH ( 32'd1 + BufDepth ), + .T ( logic[1:0] ) + ) i_sel_buf ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .data_i ({sel_b, sel_r }), + .valid_i ( sel_valid ), + .ready_o ( sel_ready ), + .data_o ({sel_buf_b, sel_buf_r}), + .valid_o ( sel_buf_valid ), + .ready_i ( sel_buf_ready ), + .usage_o ( /* unused */ ) + ); + + stream_fifo #( + .FALL_THROUGH ( 1'b1 ), + .DEPTH ( 32'd1 + BufDepth ), + .T ( meta_t ) + ) i_meta_buf ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .data_i ( meta ), + .valid_i ( meta_valid ), + .ready_o ( meta_ready ), + .data_o ( meta_buf ), + .valid_o ( meta_buf_valid ), + .ready_i ( meta_buf_ready ), + .usage_o ( /* unused */ ) + ); + + // Assemble the actual memory request from meta information and write data. + assign m2s_req = mem_req_t'{ + addr: meta.addr, + atop: meta.atop, + strb: axi_req_i.w.strb, + wdata: axi_req_i.w.data, + we: meta.write + }; + + // Interface memory as stream. + stream_to_mem #( + .mem_req_t ( mem_req_t ), + .mem_resp_t ( axi_data_t ), + .BufDepth ( BufDepth ) + ) i_stream_to_mem ( + .clk_i, + .rst_ni, + .req_i ( m2s_req ), + .req_valid_i ( m2s_req_valid ), + .req_ready_o ( m2s_req_ready ), + .resp_o ( m2s_resp ), + .resp_valid_o ( m2s_resp_valid ), + .resp_ready_i ( m2s_resp_ready ), + .mem_req_o ( mem_req ), + .mem_req_valid_o ( mem_req_valid ), + .mem_req_ready_i ( mem_req_ready ), + .mem_resp_i ( mem_rdata ), + .mem_resp_valid_i ( mem_rvalid ) + ); + + // Split single memory request to desired number of banks. + mem_to_banks #( + .AddrWidth ( AddrWidth ), + .DataWidth ( DataWidth ), + .NumBanks ( NumBanks ), + .HideStrb ( HideStrb ), + .MaxTrans ( BufDepth ), + .FifoDepth ( OutFifoDepth ) + ) i_mem_to_banks ( + .clk_i, + .rst_ni, + .req_i ( mem_req_valid ), + .gnt_o ( mem_req_ready ), + .addr_i ( mem_req.addr ), + .wdata_i ( mem_req.wdata ), + .strb_i ( mem_req.strb ), + .atop_i ( mem_req.atop ), + .we_i ( mem_req.we ), + .rvalid_o ( mem_rvalid ), + .rdata_o ( mem_rdata ), + .bank_req_o ( mem_req_o ), + .bank_gnt_i ( mem_gnt_i ), + .bank_addr_o ( mem_addr_o ), + .bank_wdata_o ( mem_wdata_o ), + .bank_strb_o ( mem_strb_o ), + .bank_atop_o ( mem_atop_o ), + .bank_we_o ( mem_we_o ), + .bank_rvalid_i ( mem_rvalid_i ), + .bank_rdata_i ( mem_rdata_i ) + ); + + // Join memory read data and meta data stream. + logic mem_join_valid, mem_join_ready; + stream_join #( + .N_INP ( 32'd2 ) + ) i_join ( + .inp_valid_i ({m2s_resp_valid, meta_buf_valid}), + .inp_ready_o ({m2s_resp_ready, meta_buf_ready}), + .oup_valid_o ( mem_join_valid ), + .oup_ready_i ( mem_join_ready ) + ); + + // Dynamically fork the joined stream to B and R channels. + stream_fork_dynamic #( + .N_OUP ( 32'd2 ) + ) i_fork_dynamic ( + .clk_i, + .rst_ni, + .valid_i ( mem_join_valid ), + .ready_o ( mem_join_ready ), + .sel_i ({sel_buf_b, sel_buf_r }), + .sel_valid_i ( sel_buf_valid ), + .sel_ready_o ( sel_buf_ready ), + .valid_o ({axi_resp_o.b_valid, axi_resp_o.r_valid}), + .ready_i ({axi_req_i.b_ready, axi_req_i.r_ready }) + ); + + // Compose B responses. + assign axi_resp_o.b = '{ + id: meta_buf.id, + resp: axi_pkg::RESP_OKAY, + user: '0 + }; + + // Compose R responses. + assign axi_resp_o.r = '{ + data: m2s_resp, + id: meta_buf.id, + last: meta_buf.last, + resp: axi_pkg::RESP_OKAY, + user: '0 + }; + + // Registers + `FFARN(meta_sel_q, meta_sel_d, 1'b0, clk_i, rst_ni) + `FFARN(sel_lock_q, sel_lock_d, 1'b0, clk_i, rst_ni) + `FFARN(rd_meta_q, rd_meta_d, meta_t'{default: '0}, clk_i, rst_ni) + `FFARN(wr_meta_q, wr_meta_d, meta_t'{default: '0}, clk_i, rst_ni) + `FFARN(r_cnt_q, r_cnt_d, '0, clk_i, rst_ni) + `FFARN(w_cnt_q, w_cnt_d, '0, clk_i, rst_ni) + + // Assertions + // pragma translate_off + `ifndef VERILATOR + default disable iff (!rst_ni); + assume property (@(posedge clk_i) + axi_req_i.ar_valid && !axi_resp_o.ar_ready |=> $stable(axi_req_i.ar)) + else $error("AR must remain stable until handshake has happened!"); + assert property (@(posedge clk_i) + axi_resp_o.r_valid && !axi_req_i.r_ready |=> $stable(axi_resp_o.r)) + else $error("R must remain stable until handshake has happened!"); + assume property (@(posedge clk_i) + axi_req_i.aw_valid && !axi_resp_o.aw_ready |=> $stable(axi_req_i.aw)) + else $error("AW must remain stable until handshake has happened!"); + assume property (@(posedge clk_i) + axi_req_i.w_valid && !axi_resp_o.w_ready |=> $stable(axi_req_i.w)) + else $error("W must remain stable until handshake has happened!"); + assert property (@(posedge clk_i) + axi_resp_o.b_valid && !axi_req_i.b_ready |=> $stable(axi_resp_o.b)) + else $error("B must remain stable until handshake has happened!"); + assert property (@(posedge clk_i) axi_req_i.ar_valid && axi_req_i.ar.len > 0 |-> + axi_req_i.ar.burst == axi_pkg::BURST_INCR) + else $error("Non-incrementing bursts are not supported!"); + assert property (@(posedge clk_i) axi_req_i.aw_valid && axi_req_i.aw.len > 0 |-> + axi_req_i.aw.burst == axi_pkg::BURST_INCR) + else $error("Non-incrementing bursts are not supported!"); + assert property (@(posedge clk_i) meta_valid && meta.atop != '0 |-> meta.write) + else $warning("Unexpected atomic operation on read."); + `endif + // pragma translate_on +endmodule + + +`include "../include/axi/assign.svh" +`include "../include/axi/typedef.svh" +/// Interface wrapper for module `axi_to_mem`. +module axi_to_mem_intf #( + /// See `axi_to_mem`, parameter `AddrWidth`. + parameter int unsigned ADDR_WIDTH = 32'd0, + /// See `axi_to_mem`, parameter `DataWidth`. + parameter int unsigned DATA_WIDTH = 32'd0, + /// AXI4+ATOP ID width. + parameter int unsigned ID_WIDTH = 32'd0, + /// AXI4+ATOP user width. + parameter int unsigned USER_WIDTH = 32'd0, + /// See `axi_to_mem`, parameter `NumBanks`. + parameter int unsigned NUM_BANKS = 32'd0, + /// See `axi_to_mem`, parameter `BufDepth`. + parameter int unsigned BUF_DEPTH = 32'd1, + /// Hide write requests if the strb == '0 + parameter bit HIDE_STRB = 1'b0, + /// Depth of output fifo/fall_through_register. Increase for asymmetric backpressure (contention) on banks. + parameter int unsigned OUT_FIFO_DEPTH = 32'd1, + /// Dependent parameter, do not override. See `axi_to_mem`, parameter `addr_t`. + localparam type addr_t = logic [ADDR_WIDTH-1:0], + /// Dependent parameter, do not override. See `axi_to_mem`, parameter `mem_data_t`. + localparam type mem_data_t = logic [DATA_WIDTH/NUM_BANKS-1:0], + /// Dependent parameter, do not override. See `axi_to_mem`, parameter `mem_strb_t`. + localparam type mem_strb_t = logic [DATA_WIDTH/NUM_BANKS/8-1:0] +) ( + /// Clock input. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// See `axi_to_mem`, port `busy_o`. + output logic busy_o, + /// AXI4+ATOP slave interface port. + AXI_BUS.Slave slv, + /// See `axi_to_mem`, port `mem_req_o`. + output logic [NUM_BANKS-1:0] mem_req_o, + /// See `axi_to_mem`, port `mem_gnt_i`. + input logic [NUM_BANKS-1:0] mem_gnt_i, + /// See `axi_to_mem`, port `mem_addr_o`. + output addr_t [NUM_BANKS-1:0] mem_addr_o, + /// See `axi_to_mem`, port `mem_wdata_o`. + output mem_data_t [NUM_BANKS-1:0] mem_wdata_o, + /// See `axi_to_mem`, port `mem_strb_o`. + output mem_strb_t [NUM_BANKS-1:0] mem_strb_o, + /// See `axi_to_mem`, port `mem_atop_o`. + output axi_pkg::atop_t [NUM_BANKS-1:0] mem_atop_o, + /// See `axi_to_mem`, port `mem_we_o`. + output logic [NUM_BANKS-1:0] mem_we_o, + /// See `axi_to_mem`, port `mem_rvalid_i`. + input logic [NUM_BANKS-1:0] mem_rvalid_i, + /// See `axi_to_mem`, port `mem_rdata_i`. + input mem_data_t [NUM_BANKS-1:0] mem_rdata_i +); + typedef logic [ID_WIDTH-1:0] id_t; + typedef logic [DATA_WIDTH-1:0] data_t; + typedef logic [DATA_WIDTH/8-1:0] strb_t; + typedef logic [USER_WIDTH-1:0] user_t; + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) + `AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) + `AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) + `AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) + req_t req; + resp_t resp; + `AXI_ASSIGN_TO_REQ(req, slv) + `AXI_ASSIGN_FROM_RESP(slv, resp) + axi_to_mem #( + .axi_req_t ( req_t ), + .axi_resp_t ( resp_t ), + .AddrWidth ( ADDR_WIDTH ), + .DataWidth ( DATA_WIDTH ), + .IdWidth ( ID_WIDTH ), + .NumBanks ( NUM_BANKS ), + .BufDepth ( BUF_DEPTH ), + .HideStrb ( HIDE_STRB ), + .OutFifoDepth ( OUT_FIFO_DEPTH ) + ) i_axi_to_mem ( + .clk_i, + .rst_ni, + .busy_o, + .axi_req_i ( req ), + .axi_resp_o ( resp ), + .mem_req_o, + .mem_gnt_i, + .mem_addr_o, + .mem_wdata_o, + .mem_strb_o, + .mem_atop_o, + .mem_we_o, + .mem_rvalid_i, + .mem_rdata_i + ); +endmodule + +/// Split memory access over multiple parallel banks, where each bank has its own req/gnt +/// request and valid response direction. +module mem_to_banks #( + /// Input address width. + parameter int unsigned AddrWidth = 32'd0, + /// Input data width, must be a power of two. + parameter int unsigned DataWidth = 32'd0, + /// Number of banks at output, must evenly divide `DataWidth`. + parameter int unsigned NumBanks = 32'd0, + /// Remove transactions that have zero strobe + parameter bit HideStrb = 1'b0, + /// Number of outstanding transactions + parameter int unsigned MaxTrans = 32'b1, + /// FIFO depth, must be >=1 + parameter int unsigned FifoDepth = 1, + /// Dependent parameter, do not override! Address type. + localparam type addr_t = logic [AddrWidth-1:0], + /// Dependent parameter, do not override! Input data type. + localparam type inp_data_t = logic [DataWidth-1:0], + /// Dependent parameter, do not override! Input write strobe type. + localparam type inp_strb_t = logic [DataWidth/8-1:0], + /// Dependent parameter, do not override! Output data type. + localparam type oup_data_t = logic [DataWidth/NumBanks-1:0], + /// Dependent parameter, do not override! Output write strobe type. + localparam type oup_strb_t = logic [DataWidth/NumBanks/8-1:0] +) ( + /// Clock input. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// Memory request to split, request is valid. + input logic req_i, + /// Memory request to split, request can be granted. + output logic gnt_o, + /// Memory request to split, request address, byte-wise. + input addr_t addr_i, + /// Memory request to split, request write data. + input inp_data_t wdata_i, + /// Memory request to split, request write strobe. + input inp_strb_t strb_i, + /// Memory request to split, request Atomic signal from AXI4+ATOP. + input axi_pkg::atop_t atop_i, + /// Memory request to split, request write enable, active high. + input logic we_i, + /// Memory request to split, response is valid. Required for read and write requests + output logic rvalid_o, + /// Memory request to split, response read data. + output inp_data_t rdata_o, + /// Memory bank request, request is valid. + output logic [NumBanks-1:0] bank_req_o, + /// Memory bank request, request can be granted. + input logic [NumBanks-1:0] bank_gnt_i, + /// Memory bank request, request address, byte-wise. Will be different for each bank. + output addr_t [NumBanks-1:0] bank_addr_o, + /// Memory bank request, request write data. + output oup_data_t [NumBanks-1:0] bank_wdata_o, + /// Memory bank request, request write strobe. + output oup_strb_t [NumBanks-1:0] bank_strb_o, + /// Memory bank request, request Atomic signal from AXI4+ATOP. + output axi_pkg::atop_t [NumBanks-1:0] bank_atop_o, + /// Memory bank request, request write enable, active high. + output logic [NumBanks-1:0] bank_we_o, + /// Memory bank request, response is valid. Required for read and write requests + input logic [NumBanks-1:0] bank_rvalid_i, + /// Memory bank request, response read data. + input oup_data_t [NumBanks-1:0] bank_rdata_i +); + + localparam DataBytes = $bits(inp_strb_t); + localparam BitsPerBank = $bits(oup_data_t); + localparam BytesPerBank = $bits(oup_strb_t); + + typedef struct packed { + addr_t addr; + oup_data_t wdata; + oup_strb_t strb; + axi_pkg::atop_t atop; + logic we; + } req_t; + + logic req_valid; + logic [NumBanks-1:0] req_ready, + resp_valid, resp_ready; + req_t [NumBanks-1:0] bank_req, + bank_oup; + logic [NumBanks-1:0] bank_req_internal, bank_gnt_internal, zero_strobe, dead_response; + logic dead_write_fifo_full; + + function automatic addr_t align_addr(input addr_t addr); + return (addr >> $clog2(DataBytes)) << $clog2(DataBytes); + endfunction + + // Handle requests. + assign req_valid = req_i & gnt_o; + for (genvar i = 0; unsigned'(i) < NumBanks; i++) begin : gen_reqs + assign bank_req[i].addr = align_addr(addr_i) + i * BytesPerBank; + assign bank_req[i].wdata = wdata_i[i*BitsPerBank+:BitsPerBank]; + assign bank_req[i].strb = strb_i[i*BytesPerBank+:BytesPerBank]; + assign bank_req[i].atop = atop_i; + assign bank_req[i].we = we_i; + stream_fifo #( + .FALL_THROUGH ( 1'b1 ), + .DATA_WIDTH ( $bits(req_t) ), + .DEPTH ( FifoDepth ), + .T ( req_t ) + ) i_ft_reg ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .usage_o (), + .data_i ( bank_req[i] ), + .valid_i ( req_valid ), + .ready_o ( req_ready[i] ), + .data_o ( bank_oup[i] ), + .valid_o ( bank_req_internal[i] ), + .ready_i ( bank_gnt_internal[i] ) + ); + assign bank_addr_o[i] = bank_oup[i].addr; + assign bank_wdata_o[i] = bank_oup[i].wdata; + assign bank_strb_o[i] = bank_oup[i].strb; + assign bank_atop_o[i] = bank_oup[i].atop; + assign bank_we_o[i] = bank_oup[i].we; + + assign zero_strobe[i] = (bank_oup[i].strb == '0); + + if (HideStrb) begin + assign bank_req_o[i] = (bank_oup[i].we && zero_strobe[i]) ? 1'b0 : bank_req_internal[i]; + assign bank_gnt_internal[i] = (bank_oup[i].we && zero_strobe[i]) ? 1'b1 : bank_gnt_i[i]; + end else begin + assign bank_req_o[i] = bank_req_internal[i]; + assign bank_gnt_internal[i] = bank_gnt_i[i]; + end + end + + // Grant output if all our requests have been granted. + assign gnt_o = (&req_ready) & (&resp_ready) & !dead_write_fifo_full; + + if (HideStrb) begin : gen_dead_write_fifo + fifo_v3 #( + .FALL_THROUGH ( 1'b1 ), + .DEPTH ( MaxTrans+1 ), + .DATA_WIDTH ( NumBanks ) + ) i_dead_write_fifo ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .full_o ( dead_write_fifo_full ), + .empty_o (), + .usage_o (), + .data_i ( bank_we_o & zero_strobe ), + .push_i ( req_i & gnt_o ), + .data_o ( dead_response ), + .pop_i ( rvalid_o ) + ); + end else begin + assign dead_response = '0; + assign dead_write_fifo_full = 1'b0; + end + + // Handle responses. + for (genvar i = 0; unsigned'(i) < NumBanks; i++) begin : gen_resp_regs + stream_fifo #( + .FALL_THROUGH ( 1'b1 ), + .DATA_WIDTH ( $bits(oup_data_t) ), + .DEPTH ( FifoDepth ), + .T ( oup_data_t ) + ) i_ft_reg ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .usage_o (), + .data_i ( bank_rdata_i[i] ), + .valid_i ( bank_rvalid_i[i] ), + .ready_o ( resp_ready[i] ), + .data_o ( rdata_o[i*BitsPerBank+:BitsPerBank] ), + .valid_o ( resp_valid[i] ), + .ready_i ( rvalid_o & !dead_response[i] ) + ); + end + assign rvalid_o = &(resp_valid | dead_response); + + // Assertions + // pragma translate_off + `ifndef VERILATOR + initial begin + assume (DataWidth != 0 && (DataWidth & (DataWidth - 1)) == 0) + else $fatal(1, "Data width must be a power of two!"); + assume (DataWidth % NumBanks == 0) + else $fatal(1, "Data width must be evenly divisible over banks!"); + assume ((DataWidth / NumBanks) % 8 == 0) + else $fatal(1, "Data width of each bank must be divisible into 8-bit bytes!"); + end + `endif + // pragma translate_on +endmodule diff --git a/hw/ip/iDMA/axi/axi_to_mem_split.sv b/hw/ip/iDMA/axi/axi_to_mem_split.sv new file mode 100644 index 000000000..01140641f --- /dev/null +++ b/hw/ip/iDMA/axi/axi_to_mem_split.sv @@ -0,0 +1,258 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Michael Rogenmoser + +`include "../include/axi/assign.svh" +/// AXI4+ATOP to memory-protocol interconnect. Completely separates the read and write channel to +/// individual mem ports. This can only be used when addresses for the same bank are accessible +/// from different memory ports. +module axi_to_mem_split #( + /// AXI4+ATOP request type. See `include/axi/typedef.svh`. + parameter type axi_req_t = logic, + /// AXI4+ATOP response type. See `include/axi/typedef.svh`. + parameter type axi_resp_t = logic, + /// Address width, has to be less or equal than the width off the AXI address field. + /// Determines the width of `mem_addr_o`. Has to be wide enough to emit the memory region + /// which should be accessible. + parameter int unsigned AddrWidth = 0, + /// AXI4+ATOP data width. + parameter int unsigned AxiDataWidth = 0, + /// AXI4+ATOP ID width. + parameter int unsigned IdWidth = 0, + /// Memory data width, must evenly divide `DataWidth`. + parameter int unsigned MemDataWidth = 0, // must divide `AxiDataWidth` without remainder + /// Depth of memory response buffer. This should be equal to the memory response latency. + parameter int unsigned BufDepth = 0, + /// Hide write requests if the strb == '0 + parameter bit HideStrb = 1'b0, + /// Depth of output fifo/fall_through_register. Increase for asymmetric backpressure (contention) on banks. + parameter int unsigned OutFifoDepth = 1, + /// Dependent parameters, do not override. Number of memory ports. + parameter int unsigned NumMemPorts = 2*AxiDataWidth/MemDataWidth, + /// Dependent parameter, do not override. Memory address type. + parameter type addr_t = logic [AddrWidth-1:0], + /// Dependent parameter, do not override. Memory data type. + parameter type mem_data_t = logic [MemDataWidth-1:0], + /// Dependent parameter, do not override. Memory write strobe type. + parameter type mem_strb_t = logic [MemDataWidth/8-1:0] +) ( + /// Clock input. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// The unit is busy handling an AXI4+ATOP request. + output logic busy_o, + /// AXI4+ATOP slave port, request input. + input axi_req_t axi_req_i, + /// AXI4+ATOP slave port, response output. + output axi_resp_t axi_resp_o, + /// Memory stream master, request is valid for this bank. + output logic [NumMemPorts-1:0] mem_req_o, + /// Memory stream master, request can be granted by this bank. + input logic [NumMemPorts-1:0] mem_gnt_i, + /// Memory stream master, byte address of the request. + output addr_t [NumMemPorts-1:0] mem_addr_o, // byte address + /// Memory stream master, write data for this bank. Valid when `mem_req_o`. + output mem_data_t [NumMemPorts-1:0] mem_wdata_o, // write data + /// Memory stream master, byte-wise strobe (byte enable). + output mem_strb_t [NumMemPorts-1:0] mem_strb_o, // byte-wise strobe + /// Memory stream master, `axi_pkg::atop_t` signal associated with this request. + output axi_pkg::atop_t [NumMemPorts-1:0] mem_atop_o, // atomic operation + /// Memory stream master, write enable. Then asserted store of `mem_w_data` is requested. + output logic [NumMemPorts-1:0] mem_we_o, // write enable + /// Memory stream master, response is valid. This module expects always a response valid for a + /// request regardless if the request was a write or a read. + input logic [NumMemPorts-1:0] mem_rvalid_i, // response valid + /// Memory stream master, read response data. + input mem_data_t [NumMemPorts-1:0] mem_rdata_i // read data +); + + axi_req_t axi_read_req, axi_write_req; + axi_resp_t axi_read_resp, axi_write_resp; + + logic read_busy, write_busy; + + always_comb begin: proc_axi_rw_split + `AXI_SET_R_STRUCT(axi_resp_o.r, axi_read_resp.r) + axi_resp_o.r_valid = axi_read_resp.r_valid; + axi_resp_o.ar_ready = axi_read_resp.ar_ready; + `AXI_SET_B_STRUCT(axi_resp_o.b, axi_write_resp.b) + axi_resp_o.b_valid = axi_write_resp.b_valid; + axi_resp_o.aw_ready = axi_write_resp.aw_ready; + axi_resp_o.w_ready = axi_write_resp.w_ready; + + axi_write_req = '0; + `AXI_SET_AW_STRUCT(axi_write_req.aw, axi_req_i.aw) + axi_write_req.aw_valid = axi_req_i.aw_valid; + `AXI_SET_W_STRUCT(axi_write_req.w, axi_req_i.w) + axi_write_req.w_valid = axi_req_i.w_valid; + axi_write_req.b_ready = axi_req_i.b_ready; + + axi_read_req = '0; + `AXI_SET_AR_STRUCT(axi_read_req.ar, axi_req_i.ar) + axi_read_req.ar_valid = axi_req_i.ar_valid; + axi_read_req.r_ready = axi_req_i.r_ready; + end + + assign busy_o = read_busy || write_busy; + + axi_to_mem #( + .axi_req_t ( axi_req_t ), + .axi_resp_t ( axi_resp_t ), + .AddrWidth ( AddrWidth ), + .DataWidth ( AxiDataWidth ), + .IdWidth ( IdWidth ), + .NumBanks ( NumMemPorts/2 ), + .BufDepth ( BufDepth ), + .HideStrb ( 1'b0 ), + .OutFifoDepth ( OutFifoDepth ) + ) i_axi_to_mem_read ( + .clk_i, + .rst_ni, + .busy_o ( read_busy ), + .axi_req_i ( axi_read_req ), + .axi_resp_o ( axi_read_resp ), + .mem_req_o ( mem_req_o [NumMemPorts/2-1:0] ), + .mem_gnt_i ( mem_gnt_i [NumMemPorts/2-1:0] ), + .mem_addr_o ( mem_addr_o [NumMemPorts/2-1:0] ), + .mem_wdata_o ( mem_wdata_o [NumMemPorts/2-1:0] ), + .mem_strb_o ( mem_strb_o [NumMemPorts/2-1:0] ), + .mem_atop_o ( mem_atop_o [NumMemPorts/2-1:0] ), + .mem_we_o ( mem_we_o [NumMemPorts/2-1:0] ), + .mem_rvalid_i ( mem_rvalid_i [NumMemPorts/2-1:0] ), + .mem_rdata_i ( mem_rdata_i [NumMemPorts/2-1:0] ) + ); + + axi_to_mem #( + .axi_req_t ( axi_req_t ), + .axi_resp_t ( axi_resp_t ), + .AddrWidth ( AddrWidth ), + .DataWidth ( AxiDataWidth ), + .IdWidth ( IdWidth ), + .NumBanks ( NumMemPorts/2 ), + .BufDepth ( BufDepth ), + .HideStrb ( HideStrb ), + .OutFifoDepth ( OutFifoDepth ) + ) i_axi_to_mem_write ( + .clk_i, + .rst_ni, + .busy_o ( write_busy ), + .axi_req_i ( axi_write_req ), + .axi_resp_o ( axi_write_resp ), + .mem_req_o ( mem_req_o [NumMemPorts-1:NumMemPorts/2] ), + .mem_gnt_i ( mem_gnt_i [NumMemPorts-1:NumMemPorts/2] ), + .mem_addr_o ( mem_addr_o [NumMemPorts-1:NumMemPorts/2] ), + .mem_wdata_o ( mem_wdata_o [NumMemPorts-1:NumMemPorts/2] ), + .mem_strb_o ( mem_strb_o [NumMemPorts-1:NumMemPorts/2] ), + .mem_atop_o ( mem_atop_o [NumMemPorts-1:NumMemPorts/2] ), + .mem_we_o ( mem_we_o [NumMemPorts-1:NumMemPorts/2] ), + .mem_rvalid_i ( mem_rvalid_i [NumMemPorts-1:NumMemPorts/2] ), + .mem_rdata_i ( mem_rdata_i [NumMemPorts-1:NumMemPorts/2] ) + ); + +endmodule + +`include "../include/axi/typedef.svh" +/// AXI4+ATOP interface wrapper for `axi_to_mem_split` +module axi_to_mem_split_intf #( + /// AXI4+ATOP ID width + parameter int unsigned AXI_ID_WIDTH = 32'b0, + /// AXI4+ATOP address width + parameter int unsigned AXI_ADDR_WIDTH = 32'b0, + /// AXI4+ATOP data width + parameter int unsigned AXI_DATA_WIDTH = 32'b0, + /// AXI4+ATOP user width + parameter int unsigned AXI_USER_WIDTH = 32'b0, + /// Memory data width, must evenly divide `DataWidth`. + parameter int unsigned MEM_DATA_WIDTH = 32'b0, + /// See `axi_to_mem`, parameter `BufDepth`. + parameter int unsigned BUF_DEPTH = 0, + /// Hide write requests if the strb == '0 + parameter bit HIDE_STRB = 1'b0, + /// Depth of output fifo/fall_through_register. Increase for asymmetric backpressure (contention) on banks. + parameter int unsigned OUT_FIFO_DEPTH = 32'd1, + /// Dependent parameters, do not override. Number of memory ports. + parameter int unsigned NUM_MEM_PORTS = 2*AXI_DATA_WIDTH/MEM_DATA_WIDTH, + /// Dependent parameter, do not override. See `axi_to_mem`, parameter `addr_t`. + parameter type addr_t = logic [AXI_ADDR_WIDTH-1:0], + /// Dependent parameter, do not override. See `axi_to_mem`, parameter `mem_data_t`. + parameter type mem_data_t = logic [MEM_DATA_WIDTH-1:0], + /// Dependent parameter, do not override. See `axi_to_mem`, parameter `mem_strb_t`. + parameter type mem_strb_t = logic [MEM_DATA_WIDTH/8-1:0] +) ( + /// Clock input. + input logic clk_i, + /// Asynchronous reset, active low. + input logic rst_ni, + /// See `axi_to_mem_split`, port `busy_o`. + output logic busy_o, + /// AXI4+ATOP slave interface port. + AXI_BUS.Slave axi_bus, + /// See `axi_to_mem_split`, port `mem_req_o`. + output logic [NUM_MEM_PORTS-1:0] mem_req_o, + /// See `axi_to_mem_split`, port `mem_gnt_i`. + input logic [NUM_MEM_PORTS-1:0] mem_gnt_i, + /// See `axi_to_mem_split`, port `mem_addr_o`. + output addr_t [NUM_MEM_PORTS-1:0] mem_addr_o, + /// See `axi_to_mem_split`, port `mem_wdata_o`. + output mem_data_t [NUM_MEM_PORTS-1:0] mem_wdata_o, + /// See `axi_to_mem_split`, port `mem_strb_o`. + output mem_strb_t [NUM_MEM_PORTS-1:0] mem_strb_o, + /// See `axi_to_mem_split`, port `mem_atop_o`. + output axi_pkg::atop_t [NUM_MEM_PORTS-1:0] mem_atop_o, + /// See `axi_to_mem_split`, port `mem_we_o`. + output logic [NUM_MEM_PORTS-1:0] mem_we_o, + /// See `axi_to_mem_split`, port `mem_rvalid_i`. + input logic [NUM_MEM_PORTS-1:0] mem_rvalid_i, + /// See `axi_to_mem_split`, port `mem_rdata_i`. + input mem_data_t [NUM_MEM_PORTS-1:0] mem_rdata_i +); + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_DATA_WIDTH/8-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + `AXI_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t) + + axi_req_t axi_req; + axi_resp_t axi_resp; + `AXI_ASSIGN_TO_REQ(axi_req, axi_bus) + `AXI_ASSIGN_FROM_RESP(axi_bus, axi_resp) + + axi_to_mem_split #( + .axi_req_t ( axi_req_t ), + .axi_resp_t ( axi_resp_t ), + .AxiDataWidth ( AXI_DATA_WIDTH ), + .AddrWidth ( AXI_ADDR_WIDTH ), + .IdWidth ( AXI_ID_WIDTH ), + .MemDataWidth ( MEM_DATA_WIDTH ), // must divide `AxiDataWidth` without remainder + .BufDepth ( BUF_DEPTH ), + .HideStrb ( HIDE_STRB ), + .OutFifoDepth ( OUT_FIFO_DEPTH ) + ) i_axi_to_mem_split ( + .clk_i, + .rst_ni, + .busy_o, + .axi_req_i (axi_req), + .axi_resp_o (axi_resp), + .mem_req_o, + .mem_gnt_i, + .mem_addr_o, + .mem_wdata_o, + .mem_strb_o, + .mem_atop_o, + .mem_we_o, + .mem_rvalid_i, + .mem_rdata_i + ); + +endmodule diff --git a/hw/ip/iDMA/frontend/idma_reg32_frontend.h b/hw/ip/iDMA/frontend/idma_reg32_frontend.h new file mode 100644 index 000000000..e937446aa --- /dev/null +++ b/hw/ip/iDMA/frontend/idma_reg32_frontend.h @@ -0,0 +1,47 @@ +// Generated register defines for idma_reg32_frontend + +// Copyright information found in source file: +// Copyright 2022 ETH Zurich and University of Bologna. + +// Licensing information found in source file: +// Licensed under Solderpad Hardware License, Version 0.51 +// SPDX-License-Identifier: SHL-0.51 + +#ifndef _IDMA_REG32_FRONTEND_REG_DEFS_ +#define _IDMA_REG32_FRONTEND_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +// Register width +#define IDMA_REG32_FRONTEND_PARAM_REG_WIDTH 32 + +// Source Address +#define IDMA_REG32_FRONTEND_SRC_ADDR_REG_OFFSET 0x0 + +// Destination Address +#define IDMA_REG32_FRONTEND_DST_ADDR_REG_OFFSET 0x4 + +// Number of bytes +#define IDMA_REG32_FRONTEND_NUM_BYTES_REG_OFFSET 0x8 + +// Configuration Register for DMA settings +#define IDMA_REG32_FRONTEND_CONF_REG_OFFSET 0xc +#define IDMA_REG32_FRONTEND_CONF_DECOUPLE_BIT 0 +#define IDMA_REG32_FRONTEND_CONF_DEBURST_BIT 1 + +// DMA Status +#define IDMA_REG32_FRONTEND_STATUS_REG_OFFSET 0x10 +#define IDMA_REG32_FRONTEND_STATUS_BUSY_BIT 0 + +// Next ID, launches transfer, returns 0 if transfer not set up properly. +#define IDMA_REG32_FRONTEND_NEXT_ID_REG_OFFSET 0x14 + +// Get ID of finished transactions. +#define IDMA_REG32_FRONTEND_DONE_REG_OFFSET 0x18 + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _IDMA_REG32_FRONTEND_REG_DEFS_ +// End generated register defines for idma_reg32_frontend \ No newline at end of file diff --git a/hw/ip/iDMA/frontend/idma_reg32_frontend.hjson b/hw/ip/iDMA/frontend/idma_reg32_frontend.hjson new file mode 100644 index 000000000..ca6e9f73f --- /dev/null +++ b/hw/ip/iDMA/frontend/idma_reg32_frontend.hjson @@ -0,0 +1,104 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// Licensed under Solderpad Hardware License, Version 0.51 + +{ + name: "idma_reg32_frontend", + clock_primary: "clk_i", + reset_primary: "rst_ni", + bus_interfaces: [ + { protocol: "reg_iface", + direction: "device" + } + ], + regwidth: "32", + registers: [ + { name: "src_addr", + desc: "Source Address", + swaccess: "rw", + hwaccess: "hro", + fields: [ + { bits: "31:0", + name: "src_addr", + desc: "Source Address" + } + ] + }, + { name: "dst_addr", + desc: "Destination Address", + swaccess: "rw", + hwaccess: "hro", + fields: [ + { bits: "31:0", + name: "dst_addr", + desc: "Destination Address" + } + ] + }, + { name: "num_bytes", + desc: "Number of bytes", + swaccess: "rw", + hwaccess: "hro", + fields: [ + { bits: "31:0", + name: "num_bytes", + desc: "Number of bytes" + } + ] + }, + { name: "conf", + desc: "Configuration Register for DMA settings", + swaccess: "rw", + hwaccess: "hro", + fields: [ + { bits: "0", + name: "decouple", + desc: "Decouple enable" + }, + { bits: "1", + name: "deburst", + desc: "Deburst enable" + } + ] + }, + { name: "status", + desc: "DMA Status", + swaccess: "ro", + hwaccess: "hwo", + hwext: "true", + fields: [ + { bits: "0", + name: "busy", + desc: "DMA busy" + } + ] + }, + { name: "next_id", + desc: "Next ID, launches transfer, returns 0 if transfer not set up properly.", + swaccess: "ro", + hwaccess: "hrw", + hwext: "true", + hwre: "true", + fields: [ + { bits: "31:0", + name: "next_id", + desc: "Next ID, launches transfer, returns 0 if transfer not set up properly." + } + ] + }, + { name: "done", + desc: "Get ID of finished transactions.", + swaccess: "ro", + hwaccess: "hrw", + hwext: "true", + hwre: "true", + fields: [ + { bits: "31:0", + name: "done", + desc: "Get ID of finished transactions." + } + ] + } + ] +} diff --git a/hw/ip/iDMA/frontend/idma_reg32_frontend.html b/hw/ip/iDMA/frontend/idma_reg32_frontend.html new file mode 100644 index 000000000..5e64ae402 --- /dev/null +++ b/hw/ip/iDMA/frontend/idma_reg32_frontend.html @@ -0,0 +1,107 @@ + + + + + + + + + + +
+
idma_reg32_frontend.src_addr @ 0x0
+

Source Address

+
Reset default = 0x0, mask 0xffffffff
+
+ + +
31302928272625242322212019181716
src_addr...
1514131211109876543210
...src_addr
BitsTypeResetNameDescription
31:0rwxsrc_addr

Source Address

+
+ + + + + +
+
idma_reg32_frontend.dst_addr @ 0x4
+

Destination Address

+
Reset default = 0x0, mask 0xffffffff
+
+ + +
31302928272625242322212019181716
dst_addr...
1514131211109876543210
...dst_addr
BitsTypeResetNameDescription
31:0rwxdst_addr

Destination Address

+
+ + + + + +
+
idma_reg32_frontend.num_bytes @ 0x8
+

Number of bytes

+
Reset default = 0x0, mask 0xffffffff
+
+ + +
31302928272625242322212019181716
num_bytes...
1514131211109876543210
...num_bytes
BitsTypeResetNameDescription
31:0rwxnum_bytes

Number of bytes

+
+ + + + + +
+
idma_reg32_frontend.conf @ 0xc
+

Configuration Register for DMA settings

+
Reset default = 0x0, mask 0x3
+
+ + + + +
31302928272625242322212019181716
 
1514131211109876543210
 deburstdecouple
BitsTypeResetNameDescription
0rwxdecouple

Decouple enable

1rwxdeburst

Deburst enable

+
+ + + + + +
+
idma_reg32_frontend.status @ 0x10
+

DMA Status

+
Reset default = 0x0, mask 0x1
+
+ + + +
31302928272625242322212019181716
 
1514131211109876543210
 busy
BitsTypeResetNameDescription
0roxbusy

DMA busy

+
+ + + + + +
+
idma_reg32_frontend.next_id @ 0x14
+

Next ID, launches transfer, returns 0 if transfer not set up properly.

+
Reset default = 0x0, mask 0xffffffff
+
+ + +
31302928272625242322212019181716
next_id...
1514131211109876543210
...next_id
BitsTypeResetNameDescription
31:0roxnext_id

Next ID, launches transfer, returns 0 if transfer not set up properly.

+
+ + + + + +
+
idma_reg32_frontend.done @ 0x18
+

Get ID of finished transactions.

+
Reset default = 0x0, mask 0xffffffff
+
+ + +
31302928272625242322212019181716
done...
1514131211109876543210
...done
BitsTypeResetNameDescription
31:0roxdone

Get ID of finished transactions.

+
+ diff --git a/hw/ip/iDMA/frontend/idma_reg32_frontend.sv b/hw/ip/iDMA/frontend/idma_reg32_frontend.sv new file mode 100644 index 000000000..10c59f7ca --- /dev/null +++ b/hw/ip/iDMA/frontend/idma_reg32_frontend.sv @@ -0,0 +1,138 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Xinhu Liu +// +// Description: DMA frontend module that includes 32bit config and status reg handling + +module idma_reg32_frontend #( + /// address width of iDMA AXI Master Port + parameter int unsigned DMAAddrWidth = -1, + /// register_interface request type + parameter type dma_regs_req_t = logic, + /// register_interface response type + parameter type dma_regs_rsp_t = logic, + /// idma request type + parameter type idma_req_t = logic, + /// idma response type + parameter type idma_rsp_t = logic +) ( + input clk_i, // Clock + input rst_ni, // Asynchronous reset active low + /// register interface control slave + input dma_regs_req_t dma_ctrl_req_i, + output dma_regs_rsp_t dma_ctrl_rsp_o, + /// DMA backend signals + output idma_req_t idma_req_o, + output logic valid_o, + input logic ready_i, + input logic backend_idle_i, + input logic trans_complete_i + +); + localparam int unsigned DMARegisterWidth = 32; + + idma_reg32_frontend_reg_pkg::idma_reg32_frontend_reg2hw_t dma_reg2hw; + idma_reg32_frontend_reg_pkg::idma_reg32_frontend_hw2reg_t dma_hw2reg; + + logic [DMAAddrWidth-1:0] next_id, done_id; + logic issue; + + dma_regs_rsp_t dma_ctrl_rsp_tmp; + + idma_reg32_frontend_reg_top #( + .reg_req_t(dma_regs_req_t), + .reg_rsp_t(dma_regs_rsp_t) + ) i_dma_conf_regs ( + .clk_i, + .rst_ni, + .reg_req_i(dma_ctrl_req_i), + .reg_rsp_o(dma_ctrl_rsp_tmp), + .reg2hw (dma_reg2hw), + .hw2reg (dma_hw2reg), + .devmode_i(1'b0) // if 1, explicit error return for unmapped register access + ); + + + always_comb begin : proc_process_regs + + // reset state + valid_o = '0; + dma_hw2reg.next_id.d = '0; + dma_hw2reg.done.d = '0; + dma_hw2reg.status.d = ~backend_idle_i; + + dma_ctrl_rsp_o = dma_ctrl_rsp_tmp; + + // start transaction upon next_id read (and having a valid config) + if (dma_reg2hw.next_id.re) begin + if (dma_reg2hw.num_bytes.q != '0) begin + valid_o = 1'b1; + dma_hw2reg.next_id.d = next_id; + dma_ctrl_rsp_o.ready = ready_i; + end + end + + // use full width id from generator + dma_hw2reg.done.d = done_id; + end : proc_process_regs + + always_comb begin : hw_req_conv + idma_req_o = '0; + + idma_req_o.length = dma_reg2hw.num_bytes.q; + idma_req_o.src_addr = dma_reg2hw.src_addr.q; + idma_req_o.dst_addr = dma_reg2hw.dst_addr.q; + + // Current backend only supports one ID + idma_req_o.opt.axi_id = '0; + // DMA only supports incremental burst + idma_req_o.opt.src.burst = axi_pkg::BURST_INCR; + // this frontend currently does not support cache variations + idma_req_o.opt.src.cache = '0; + // AXI4 does not support locked transactions, use atomics + idma_req_o.opt.src.lock = '0; + // unpriviledged, secure, data access + idma_req_o.opt.src.prot = '0; + // not participating in qos + idma_req_o.opt.src.qos = '0; + // only one region + idma_req_o.opt.src.region = '0; + // DMA only supports incremental burst + idma_req_o.opt.dst.burst = axi_pkg::BURST_INCR; + // this frontend currently does not support cache variations + idma_req_o.opt.dst.cache = '0; + // AXI4 does not support locked transactions, use atomics + idma_req_o.opt.dst.lock = '0; + // unpriviledged, secure, data access + idma_req_o.opt.dst.prot = '0; + // not participating in qos + idma_req_o.opt.dst.qos = '0; + // only one region in system + idma_req_o.opt.dst.region = '0; + // ensure coupled AW to avoid deadlocks + idma_req_o.opt.beo.decouple_aw = '0; + idma_req_o.opt.beo.decouple_rw = dma_reg2hw.conf.decouple.q; + // this frontend currently only supports completely debursting + idma_req_o.opt.beo.src_max_llen = '0; + // this frontend currently only supports completely debursting + idma_req_o.opt.beo.dst_max_llen = '0; + idma_req_o.opt.beo.src_reduce_len = dma_reg2hw.conf.deburst.q; + idma_req_o.opt.beo.dst_reduce_len = dma_reg2hw.conf.deburst.q; + end : hw_req_conv + + idma_transfer_id_gen #( + .IdWidth(DMARegisterWidth) + ) i_idma_transfer_id_gen ( + .clk_i, + .rst_ni, + .issue_i (issue), + .retire_i (trans_complete_i), + .next_o (next_id), + .completed_o(done_id) + ); + assign done = done_id; + + +endmodule : idma_reg32_frontend diff --git a/hw/ip/iDMA/frontend/idma_reg32_frontend_reg_pkg.sv b/hw/ip/iDMA/frontend/idma_reg32_frontend_reg_pkg.sv new file mode 100644 index 000000000..e6e2794da --- /dev/null +++ b/hw/ip/iDMA/frontend/idma_reg32_frontend_reg_pkg.sv @@ -0,0 +1,97 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Register Package auto-generated by `reggen` containing data structure + +package idma_reg32_frontend_reg_pkg; + + // Address widths within the block + parameter int BlockAw = 5; + + //////////////////////////// + // Typedefs for registers // + //////////////////////////// + + typedef struct packed {logic [31:0] q;} idma_reg32_frontend_reg2hw_src_addr_reg_t; + + typedef struct packed {logic [31:0] q;} idma_reg32_frontend_reg2hw_dst_addr_reg_t; + + typedef struct packed {logic [31:0] q;} idma_reg32_frontend_reg2hw_num_bytes_reg_t; + + typedef struct packed { + struct packed {logic q;} decouple; + struct packed {logic q;} deburst; + } idma_reg32_frontend_reg2hw_conf_reg_t; + + typedef struct packed { + logic [31:0] q; + logic re; + } idma_reg32_frontend_reg2hw_next_id_reg_t; + + typedef struct packed { + logic [31:0] q; + logic re; + } idma_reg32_frontend_reg2hw_done_reg_t; + + typedef struct packed {logic d;} idma_reg32_frontend_hw2reg_status_reg_t; + + typedef struct packed {logic [31:0] d;} idma_reg32_frontend_hw2reg_next_id_reg_t; + + typedef struct packed {logic [31:0] d;} idma_reg32_frontend_hw2reg_done_reg_t; + + // Register -> HW type + typedef struct packed { + idma_reg32_frontend_reg2hw_src_addr_reg_t src_addr; // [163:132] + idma_reg32_frontend_reg2hw_dst_addr_reg_t dst_addr; // [131:100] + idma_reg32_frontend_reg2hw_num_bytes_reg_t num_bytes; // [99:68] + idma_reg32_frontend_reg2hw_conf_reg_t conf; // [67:66] + idma_reg32_frontend_reg2hw_next_id_reg_t next_id; // [65:33] + idma_reg32_frontend_reg2hw_done_reg_t done; // [32:0] + } idma_reg32_frontend_reg2hw_t; + + // HW -> register type + typedef struct packed { + idma_reg32_frontend_hw2reg_status_reg_t status; // [64:64] + idma_reg32_frontend_hw2reg_next_id_reg_t next_id; // [63:32] + idma_reg32_frontend_hw2reg_done_reg_t done; // [31:0] + } idma_reg32_frontend_hw2reg_t; + + // Register offsets + parameter logic [BlockAw-1:0] IDMA_REG32_FRONTEND_SRC_ADDR_OFFSET = 5'h0; + parameter logic [BlockAw-1:0] IDMA_REG32_FRONTEND_DST_ADDR_OFFSET = 5'h4; + parameter logic [BlockAw-1:0] IDMA_REG32_FRONTEND_NUM_BYTES_OFFSET = 5'h8; + parameter logic [BlockAw-1:0] IDMA_REG32_FRONTEND_CONF_OFFSET = 5'hc; + parameter logic [BlockAw-1:0] IDMA_REG32_FRONTEND_STATUS_OFFSET = 5'h10; + parameter logic [BlockAw-1:0] IDMA_REG32_FRONTEND_NEXT_ID_OFFSET = 5'h14; + parameter logic [BlockAw-1:0] IDMA_REG32_FRONTEND_DONE_OFFSET = 5'h18; + + // Reset values for hwext registers and their fields + parameter logic [0:0] IDMA_REG32_FRONTEND_STATUS_RESVAL = 1'h0; + parameter logic [31:0] IDMA_REG32_FRONTEND_NEXT_ID_RESVAL = 32'h0; + parameter logic [31:0] IDMA_REG32_FRONTEND_DONE_RESVAL = 32'h0; + + // Register index + typedef enum int { + IDMA_REG32_FRONTEND_SRC_ADDR, + IDMA_REG32_FRONTEND_DST_ADDR, + IDMA_REG32_FRONTEND_NUM_BYTES, + IDMA_REG32_FRONTEND_CONF, + IDMA_REG32_FRONTEND_STATUS, + IDMA_REG32_FRONTEND_NEXT_ID, + IDMA_REG32_FRONTEND_DONE + } idma_reg32_frontend_id_e; + + // Register width information to check illegal writes + parameter logic [3:0] IDMA_REG32_FRONTEND_PERMIT[7] = '{ + 4'b1111, // index[0] IDMA_REG32_FRONTEND_SRC_ADDR + 4'b1111, // index[1] IDMA_REG32_FRONTEND_DST_ADDR + 4'b1111, // index[2] IDMA_REG32_FRONTEND_NUM_BYTES + 4'b0001, // index[3] IDMA_REG32_FRONTEND_CONF + 4'b0001, // index[4] IDMA_REG32_FRONTEND_STATUS + 4'b1111, // index[5] IDMA_REG32_FRONTEND_NEXT_ID + 4'b1111 // index[6] IDMA_REG32_FRONTEND_DONE + }; + +endpackage + diff --git a/hw/ip/iDMA/frontend/idma_reg32_frontend_reg_top.sv b/hw/ip/iDMA/frontend/idma_reg32_frontend_reg_top.sv new file mode 100644 index 000000000..4c5f872c5 --- /dev/null +++ b/hw/ip/iDMA/frontend/idma_reg32_frontend_reg_top.sv @@ -0,0 +1,377 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Register Top module auto-generated by `reggen` + + +`include "common_cells/assertions.svh" + +module idma_reg32_frontend_reg_top #( + parameter type reg_req_t = logic, + parameter type reg_rsp_t = logic, + parameter int AW = 5 +) ( + input clk_i, + input rst_ni, + input reg_req_t reg_req_i, + output reg_rsp_t reg_rsp_o, + // To HW + output idma_reg32_frontend_reg_pkg::idma_reg32_frontend_reg2hw_t reg2hw, // Write + input idma_reg32_frontend_reg_pkg::idma_reg32_frontend_hw2reg_t hw2reg, // Read + + + // Config + input devmode_i // If 1, explicit error return for unmapped register access +); + + import idma_reg32_frontend_reg_pkg::*; + + localparam int DW = 32; + localparam int DBW = DW / 8; // Byte Width + + // register signals + logic reg_we; + logic reg_re; + logic [ AW-1:0] reg_addr; + logic [ DW-1:0] reg_wdata; + logic [DBW-1:0] reg_be; + logic [ DW-1:0] reg_rdata; + logic reg_error; + + logic addrmiss, wr_err; + + logic [DW-1:0] reg_rdata_next; + + // Below register interface can be changed + reg_req_t reg_intf_req; + reg_rsp_t reg_intf_rsp; + + + assign reg_intf_req = reg_req_i; + assign reg_rsp_o = reg_intf_rsp; + + + assign reg_we = reg_intf_req.valid & reg_intf_req.write; + assign reg_re = reg_intf_req.valid & ~reg_intf_req.write; + assign reg_addr = reg_intf_req.addr; + assign reg_wdata = reg_intf_req.wdata; + assign reg_be = reg_intf_req.wstrb; + assign reg_intf_rsp.rdata = reg_rdata; + assign reg_intf_rsp.error = reg_error; + assign reg_intf_rsp.ready = 1'b1; + + assign reg_rdata = reg_rdata_next; + assign reg_error = (devmode_i & addrmiss) | wr_err; + + + // Define SW related signals + // Format: __{wd|we|qs} + // or _{wd|we|qs} if field == 1 or 0 + logic [31:0] src_addr_qs; + logic [31:0] src_addr_wd; + logic src_addr_we; + logic [31:0] dst_addr_qs; + logic [31:0] dst_addr_wd; + logic dst_addr_we; + logic [31:0] num_bytes_qs; + logic [31:0] num_bytes_wd; + logic num_bytes_we; + logic conf_decouple_qs; + logic conf_decouple_wd; + logic conf_decouple_we; + logic conf_deburst_qs; + logic conf_deburst_wd; + logic conf_deburst_we; + logic status_qs; + logic status_re; + logic [31:0] next_id_qs; + logic next_id_re; + logic [31:0] done_qs; + logic done_re; + + // Register instances + // R[src_addr]: V(False) + + prim_subreg #( + .DW (32), + .SWACCESS("RW"), + .RESVAL (32'h0) + ) u_src_addr ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(src_addr_we), + .wd(src_addr_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.src_addr.q), + + // to register interface (read) + .qs(src_addr_qs) + ); + + + // R[dst_addr]: V(False) + + prim_subreg #( + .DW (32), + .SWACCESS("RW"), + .RESVAL (32'h0) + ) u_dst_addr ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(dst_addr_we), + .wd(dst_addr_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.dst_addr.q), + + // to register interface (read) + .qs(dst_addr_qs) + ); + + + // R[num_bytes]: V(False) + + prim_subreg #( + .DW (32), + .SWACCESS("RW"), + .RESVAL (32'h0) + ) u_num_bytes ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(num_bytes_we), + .wd(num_bytes_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.num_bytes.q), + + // to register interface (read) + .qs(num_bytes_qs) + ); + + + // R[conf]: V(False) + + // F[decouple]: 0:0 + prim_subreg #( + .DW (1), + .SWACCESS("RW"), + .RESVAL (1'h0) + ) u_conf_decouple ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(conf_decouple_we), + .wd(conf_decouple_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.conf.decouple.q), + + // to register interface (read) + .qs(conf_decouple_qs) + ); + + + // F[deburst]: 1:1 + prim_subreg #( + .DW (1), + .SWACCESS("RW"), + .RESVAL (1'h0) + ) u_conf_deburst ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(conf_deburst_we), + .wd(conf_deburst_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.conf.deburst.q), + + // to register interface (read) + .qs(conf_deburst_qs) + ); + + + // R[status]: V(True) + + prim_subreg_ext #( + .DW(1) + ) u_status ( + .re (status_re), + .we (1'b0), + .wd ('0), + .d (hw2reg.status.d), + .qre(), + .qe (), + .q (), + .qs (status_qs) + ); + + + // R[next_id]: V(True) + + prim_subreg_ext #( + .DW(32) + ) u_next_id ( + .re (next_id_re), + .we (1'b0), + .wd ('0), + .d (hw2reg.next_id.d), + .qre(reg2hw.next_id.re), + .qe (), + .q (reg2hw.next_id.q), + .qs (next_id_qs) + ); + + + // R[done]: V(True) + + prim_subreg_ext #( + .DW(32) + ) u_done ( + .re (done_re), + .we (1'b0), + .wd ('0), + .d (hw2reg.done.d), + .qre(reg2hw.done.re), + .qe (), + .q (reg2hw.done.q), + .qs (done_qs) + ); + + + + + logic [6:0] addr_hit; + always_comb begin + addr_hit = '0; + addr_hit[0] = (reg_addr == IDMA_REG32_FRONTEND_SRC_ADDR_OFFSET); + addr_hit[1] = (reg_addr == IDMA_REG32_FRONTEND_DST_ADDR_OFFSET); + addr_hit[2] = (reg_addr == IDMA_REG32_FRONTEND_NUM_BYTES_OFFSET); + addr_hit[3] = (reg_addr == IDMA_REG32_FRONTEND_CONF_OFFSET); + addr_hit[4] = (reg_addr == IDMA_REG32_FRONTEND_STATUS_OFFSET); + addr_hit[5] = (reg_addr == IDMA_REG32_FRONTEND_NEXT_ID_OFFSET); + addr_hit[6] = (reg_addr == IDMA_REG32_FRONTEND_DONE_OFFSET); + end + + assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0; + + // Check sub-word write is permitted + always_comb begin + wr_err = (reg_we & + ((addr_hit[0] & (|(IDMA_REG32_FRONTEND_PERMIT[0] & ~reg_be))) | + (addr_hit[1] & (|(IDMA_REG32_FRONTEND_PERMIT[1] & ~reg_be))) | + (addr_hit[2] & (|(IDMA_REG32_FRONTEND_PERMIT[2] & ~reg_be))) | + (addr_hit[3] & (|(IDMA_REG32_FRONTEND_PERMIT[3] & ~reg_be))) | + (addr_hit[4] & (|(IDMA_REG32_FRONTEND_PERMIT[4] & ~reg_be))) | + (addr_hit[5] & (|(IDMA_REG32_FRONTEND_PERMIT[5] & ~reg_be))) | + (addr_hit[6] & (|(IDMA_REG32_FRONTEND_PERMIT[6] & ~reg_be))))); + end + + assign src_addr_we = addr_hit[0] & reg_we & !reg_error; + assign src_addr_wd = reg_wdata[31:0]; + + assign dst_addr_we = addr_hit[1] & reg_we & !reg_error; + assign dst_addr_wd = reg_wdata[31:0]; + + assign num_bytes_we = addr_hit[2] & reg_we & !reg_error; + assign num_bytes_wd = reg_wdata[31:0]; + + assign conf_decouple_we = addr_hit[3] & reg_we & !reg_error; + assign conf_decouple_wd = reg_wdata[0]; + + assign conf_deburst_we = addr_hit[3] & reg_we & !reg_error; + assign conf_deburst_wd = reg_wdata[1]; + + assign status_re = addr_hit[4] & reg_re & !reg_error; + + assign next_id_re = addr_hit[5] & reg_re & !reg_error; + + assign done_re = addr_hit[6] & reg_re & !reg_error; + + // Read data return + always_comb begin + reg_rdata_next = '0; + unique case (1'b1) + addr_hit[0]: begin + reg_rdata_next[31:0] = src_addr_qs; + end + + addr_hit[1]: begin + reg_rdata_next[31:0] = dst_addr_qs; + end + + addr_hit[2]: begin + reg_rdata_next[31:0] = num_bytes_qs; + end + + addr_hit[3]: begin + reg_rdata_next[0] = conf_decouple_qs; + reg_rdata_next[1] = conf_deburst_qs; + end + + addr_hit[4]: begin + reg_rdata_next[0] = status_qs; + end + + addr_hit[5]: begin + reg_rdata_next[31:0] = next_id_qs; + end + + addr_hit[6]: begin + reg_rdata_next[31:0] = done_qs; + end + + default: begin + reg_rdata_next = '1; + end + endcase + end + + // Unused signal tieoff + + // wdata / byte enable are not always fully used + // add a blanket unused statement to handle lint waivers + logic unused_wdata; + logic unused_be; + assign unused_wdata = ^reg_wdata; + assign unused_be = ^reg_be; + + // Assertions for Register Interface + `ASSERT(en2addrHit, (reg_we || reg_re) |-> $onehot0(addr_hit)) + +endmodule diff --git a/hw/ip/iDMA/frontend/idma_transfer_id_gen.sv b/hw/ip/iDMA/frontend/idma_transfer_id_gen.sv new file mode 100644 index 000000000..5a801dd3d --- /dev/null +++ b/hw/ip/iDMA/frontend/idma_transfer_id_gen.sv @@ -0,0 +1,79 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Author: Thomas Benz + + +/// DMA transaction id generator. Increases the transaction id on every request. +module idma_transfer_id_gen #( + parameter int unsigned IdWidth = -1 +) ( + input logic clk_i, + input logic rst_ni, + // new request is pushed + input logic issue_i, + // request is popped + input logic retire_i, + // next id is + output logic [IdWidth-1:0] next_o, + // last id completed is + output logic [IdWidth-1:0] completed_o +); + + //-------------------------------------- + // counters + //-------------------------------------- + logic [IdWidth-1:0] next_d, next_q, completed_d, completed_q; + + // count up on events + always_comb begin : proc_next_id + // default + next_d = next_q; + // overflow + if (next_q == '1) begin + if (issue_i) + next_d = 'h2; + else + next_d = 'h1; + // request + end else begin + if (issue_i) + next_d = 'h1 + next_q; + end + end + + always_comb begin : proc_next_completed + // default + completed_d = completed_q; + // overflow + if (completed_q == '1) begin + if (retire_i) + completed_d = 'h2; + else + completed_d = 'h1; + // request + end else begin + if (retire_i) + completed_d = 'h1 + completed_q; + end + end + + // assign outputs + assign next_o = next_q; + assign completed_o = completed_q; + + //-------------------------------------- + // state + //-------------------------------------- + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_id_gen + if(~rst_ni) begin + next_q <= 2; + completed_q <= 1; + end else begin + next_q <= next_d; + completed_q <= completed_d; + end + end + +endmodule : idma_transfer_id_gen diff --git a/hw/ip/iDMA/frontend/reg_html.css b/hw/ip/iDMA/frontend/reg_html.css new file mode 100644 index 000000000..4cb48edb7 --- /dev/null +++ b/hw/ip/iDMA/frontend/reg_html.css @@ -0,0 +1,74 @@ +/* Stylesheet for reggen HTML register output */ +/* Copyright lowRISC contributors. */ +/* Licensed under the Apache License, Version 2.0, see LICENSE for details. */ +/* SPDX-License-Identifier: Apache-2.0 */ + +table.regpic { + width: 95%; + border-collapse: collapse; + margin-left:auto; + margin-right:auto; + table-layout:fixed; +} + +table.regdef { + border: 1px solid black; + width: 80%; + border-collapse: collapse; + margin-left:auto; + margin-right:auto; + table-layout:auto; +} + +table.regdef th { + border: 1px solid black; + font-family: sans-serif; + +} + +td.bitnum { + font-size: 60%; + text-align: center; +} + +td.unused { + border: 1px solid black; + background-color: gray; +} + +td.fname { + border: 1px solid black; + text-align: center; + font-family: sans-serif; +} + + +td.regbits, td.regperm, td.regrv { + border: 1px solid black; + text-align: center; + font-family: sans-serif; +} + +td.regde, td.regfn { + border: 1px solid black; +} + +table.cfgtable { + border: 1px solid black; + width: 80%; + border-collapse: collapse; + margin-left:auto; + margin-right:auto; + table-layout:auto; +} + +table.cfgtable th { + border: 1px solid black; + font-family: sans-serif; + font-weight: bold; +} + +table.cfgtable td { + border: 1px solid black; + font-family: sans-serif; +} diff --git a/hw/ip/iDMA/iDMA.core b/hw/ip/iDMA/iDMA.core new file mode 100644 index 000000000..23722ff8c --- /dev/null +++ b/hw/ip/iDMA/iDMA.core @@ -0,0 +1,40 @@ +CAPI=2: + +name: "x-heep:ip:idma" +description: "core-v-mini-mcu dma peripheral" + +# Copyright 2021 OpenHW Group +# Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +filesets: + files_rtl: + depend: + - pulp-platform.org::common_cells + - pulp-platform.org::register_interface + files: + - ./include/idma/typedef.svh: {is_include_file: true} + - ./include/axi/assign.svh: {is_include_file: true} + - ./include/axi/typedef.svh: {is_include_file: true} + + - ./include/axi/axi_pkg.sv + - ./include/idma/idma_pkg.sv + - ./frontend/idma_transfer_id_gen.sv + - ./frontend/idma_reg32_frontend_reg_pkg.sv + - ./frontend/idma_reg32_frontend_reg_top.sv + - ./frontend/idma_reg32_frontend.sv + - ./src/idma_axi_transport_layer.sv + - ./src/idma_channel_coupler.sv + - ./src/idma_stream_fifo.sv + - ./axi/axi_to_mem.sv + - ./axi/axi_to_mem_split.sv + - ./src/idma_buffer.sv + - ./src/idma_legalizer.sv + - ./src/idma_backend.sv + - ./idma_reg32_wrap.sv + file_type: systemVerilogSource + +targets: + default: + filesets: + - files_rtl diff --git a/hw/ip/iDMA/idma_reg32_wrap.sv b/hw/ip/iDMA/idma_reg32_wrap.sv new file mode 100644 index 000000000..40a465d37 --- /dev/null +++ b/hw/ip/iDMA/idma_reg32_wrap.sv @@ -0,0 +1,152 @@ + +`include "./include/axi/assign.svh" +`include "./include/axi/typedef.svh" +`include "./include/idma/typedef.svh" +// `include "register_interface/typedef.svh" + +module dma_reg32_wrap #( + parameter type reg_req_t = logic, + parameter type reg_rsp_t = logic, + parameter type obi_req_t = logic, + parameter type obi_resp_t = logic, + parameter int unsigned OBI_DATA_WIDTH = -1, + parameter int unsigned OBI_ADDR_WIDTH = -1, + parameter int unsigned OBI_USER_WIDTH = -1, + parameter int unsigned OBI_ID_WIDTH = -1 + +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + input reg_req_t reg_req_i, + output reg_rsp_t reg_rsp_o, + + output obi_req_t dma_master0_ch0_req_o, + input obi_resp_t dma_master0_ch0_resp_i, + + output obi_req_t dma_master1_ch0_req_o, + input obi_resp_t dma_master1_ch0_resp_i, + + input logic spi_rx_valid_i, + input logic spi_tx_ready_i, + input logic spi_flash_rx_valid_i, + input logic spi_flash_tx_ready_i, + + output logic dma_intr_o + +); + localparam int unsigned OBI_SLV_ID_WIDTH = OBI_ID_WIDTH; + typedef logic [OBI_ADDR_WIDTH-1:0] addr_t; + typedef logic [OBI_DATA_WIDTH-1:0] data_t; + typedef logic [(OBI_DATA_WIDTH/8)-1:0] strb_t; + typedef logic [OBI_USER_WIDTH-1:0] user_t; + typedef logic [OBI_ID_WIDTH-1:0] axi_id_t; + typedef logic [OBI_SLV_ID_WIDTH-1:0] axi_slv_id_t; + + `AXI_TYPEDEF_ALL(axi_mst, addr_t, axi_id_t, data_t, strb_t, user_t) + axi_mst_req_t axi_mst_req; + axi_mst_resp_t axi_mst_resp; + + localparam int unsigned TFLenWidth = OBI_ADDR_WIDTH; + typedef logic [TFLenWidth-1:0] tf_len_t; + + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, axi_slv_id_t, addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) + idma_req_t idma_req; + + logic valid, ready, be_trans_complete; + idma_pkg::idma_busy_t idma_busy; + + + // Frontend + idma_reg32_frontend #( + .DMAAddrWidth (OBI_ADDR_WIDTH), + .dma_regs_req_t(reg_req_t), + .dma_regs_rsp_t(reg_rsp_t), + .idma_req_t (idma_req_t) + ) i_idma_reg32_frontend ( + .clk_i, + .rst_ni, + .dma_ctrl_req_i (reg_req_i), + .dma_ctrl_rsp_o (reg_rsp_o), + .idma_req_o (idma_req), + .valid_o (valid), + .ready_i (ready), + .backend_idle_i (~|idma_busy), + .trans_complete_i(be_trans_complete) + ); + assign dma_intr_o = be_trans_complete; + // Backend + idma_backend #( + .DataWidth (OBI_DATA_WIDTH), + .AddrWidth (OBI_ADDR_WIDTH), + .UserWidth (OBI_USER_WIDTH), + .AxiIdWidth (OBI_ID_WIDTH), + .NumAxInFlight (4), + .BufferDepth (3), + .TFLenWidth (TFLenWidth), + .RAWCouplingAvail (1'b1), + .MaskInvalidData (1'b1), + .HardwareLegalizer (1'b1), + .RejectZeroTransfers(1'b1), + .MemSysDepth (32'd1), + .ErrorCap (idma_pkg::NO_ERROR_HANDLING), + .idma_req_t (idma_req_t), + .idma_rsp_t (idma_rsp_t), + .idma_eh_req_t (idma_pkg::idma_eh_req_t), + .idma_busy_t (idma_pkg::idma_busy_t), + .axi_req_t (axi_mst_req_t), + .axi_rsp_t (axi_mst_resp_t) + ) i_idma_backend ( + .clk_i, + .rst_ni, + .testmode_i('0), + + .idma_req_i (idma_req), + .req_valid_i(valid), + .req_ready_o(ready), + + .idma_rsp_o (), + .rsp_valid_o(be_trans_complete), + .rsp_ready_i(1'b1), + + .idma_eh_req_i ('0), + .eh_req_valid_i(1'b1), + .eh_req_ready_o(), + + .axi_req_o(axi_mst_req), + .axi_rsp_i(axi_mst_resp), + .busy_o (idma_busy) + ); + + // AXI to OBI + axi_to_mem_split #( + .axi_req_t (axi_mst_req_t), + .axi_resp_t (axi_mst_resp_t), + .AddrWidth (OBI_ADDR_WIDTH), + .AxiDataWidth(OBI_DATA_WIDTH), + .IdWidth (OBI_ID_WIDTH), + .MemDataWidth(OBI_DATA_WIDTH), + .BufDepth (2), + .HideStrb (1'b1) + ) i_axi_to_mem_1 ( + .clk_i, + .rst_ni, + + .mem_req_o ({dma_master1_ch0_req_o.req, dma_master0_ch0_req_o.req}), + .mem_gnt_i ({dma_master1_ch0_resp_i.gnt, dma_master0_ch0_resp_i.gnt}), + .mem_addr_o ({dma_master1_ch0_req_o.addr, dma_master0_ch0_req_o.addr}), + .mem_wdata_o ({dma_master1_ch0_req_o.wdata, dma_master0_ch0_req_o.wdata}), + .mem_strb_o ({dma_master1_ch0_req_o.be, dma_master0_ch0_req_o.be}), + .mem_atop_o (), + .mem_we_o ({dma_master1_ch0_req_o.we, dma_master0_ch0_req_o.we}), + .mem_rvalid_i({dma_master1_ch0_resp_i.rvalid, dma_master0_ch0_resp_i.rvalid}), + .mem_rdata_i ({dma_master1_ch0_resp_i.rdata, dma_master0_ch0_resp_i.rdata}), + + + .axi_req_i (axi_mst_req), + .axi_resp_o(axi_mst_resp), + .busy_o () + ); + +endmodule : dma_reg32_wrap diff --git a/hw/ip/iDMA/include/axi/assign.svh b/hw/ip/iDMA/include/axi/assign.svh new file mode 100644 index 000000000..80667eb09 --- /dev/null +++ b/hw/ip/iDMA/include/axi/assign.svh @@ -0,0 +1,655 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Nils Wistoff + +// Macros to assign AXI Interfaces and Structs + +`ifndef AXI_ASSIGN_SVH_ +`define AXI_ASSIGN_SVH_ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Internal implementation for assigning one AXI struct or interface to another struct or interface. +// The path to the signals on each side is defined by the `__sep*` arguments. The `__opt_as` +// argument allows to use this standalone (with `__opt_as = assign`) or in assignments inside +// processes (with `__opt_as` void). +`define __AXI_TO_AW(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``atop = __rhs``__rhs_sep``atop; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_W(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``strb = __rhs``__rhs_sep``strb; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_B(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_AR(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``len = __rhs``__rhs_sep``len; \ + __opt_as __lhs``__lhs_sep``size = __rhs``__rhs_sep``size; \ + __opt_as __lhs``__lhs_sep``burst = __rhs``__rhs_sep``burst; \ + __opt_as __lhs``__lhs_sep``lock = __rhs``__rhs_sep``lock; \ + __opt_as __lhs``__lhs_sep``cache = __rhs``__rhs_sep``cache; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; \ + __opt_as __lhs``__lhs_sep``qos = __rhs``__rhs_sep``qos; \ + __opt_as __lhs``__lhs_sep``region = __rhs``__rhs_sep``region; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``id = __rhs``__rhs_sep``id; \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; \ + __opt_as __lhs``__lhs_sep``last = __rhs``__rhs_sep``last; \ + __opt_as __lhs``__lhs_sep``user = __rhs``__rhs_sep``user; +`define __AXI_TO_REQ(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + `__AXI_TO_AW(__opt_as, __lhs.aw, __lhs_sep, __rhs.aw, __rhs_sep) \ + __opt_as __lhs.aw_valid = __rhs.aw_valid; \ + `__AXI_TO_W(__opt_as, __lhs.w, __lhs_sep, __rhs.w, __rhs_sep) \ + __opt_as __lhs.w_valid = __rhs.w_valid; \ + __opt_as __lhs.b_ready = __rhs.b_ready; \ + `__AXI_TO_AR(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \ + __opt_as __lhs.ar_valid = __rhs.ar_valid; \ + __opt_as __lhs.r_ready = __rhs.r_ready; +`define __AXI_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs.aw_ready = __rhs.aw_ready; \ + __opt_as __lhs.ar_ready = __rhs.ar_ready; \ + __opt_as __lhs.w_ready = __rhs.w_ready; \ + __opt_as __lhs.b_valid = __rhs.b_valid; \ + `__AXI_TO_B(__opt_as, __lhs.b, __lhs_sep, __rhs.b, __rhs_sep) \ + __opt_as __lhs.r_valid = __rhs.r_valid; \ + `__AXI_TO_R(__opt_as, __lhs.r, __lhs_sep, __rhs.r, __rhs_sep) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning one AXI4+ATOP interface to another, as if you would do `assign slv = mst;` +// +// The channel assignments `AXI_ASSIGN_XX(dst, src)` assign all payload and the valid signal of the +// `XX` channel from the `src` to the `dst` interface and they assign the ready signal from the +// `src` to the `dst` interface. +// The interface assignment `AXI_ASSIGN(dst, src)` assigns all channels including handshakes as if +// `src` was the master of `dst`. +// +// Usage Example: +// `AXI_ASSIGN(slv, mst) +// `AXI_ASSIGN_AW(dst, src) +// `AXI_ASSIGN_R(dst, src) +`define AXI_ASSIGN_AW(dst, src) \ + `__AXI_TO_AW(assign, dst.aw, _, src.aw, _) \ + assign dst.aw_valid = src.aw_valid; \ + assign src.aw_ready = dst.aw_ready; +`define AXI_ASSIGN_W(dst, src) \ + `__AXI_TO_W(assign, dst.w, _, src.w, _) \ + assign dst.w_valid = src.w_valid; \ + assign src.w_ready = dst.w_ready; +`define AXI_ASSIGN_B(dst, src) \ + `__AXI_TO_B(assign, dst.b, _, src.b, _) \ + assign dst.b_valid = src.b_valid; \ + assign src.b_ready = dst.b_ready; +`define AXI_ASSIGN_AR(dst, src) \ + `__AXI_TO_AR(assign, dst.ar, _, src.ar, _) \ + assign dst.ar_valid = src.ar_valid; \ + assign src.ar_ready = dst.ar_ready; +`define AXI_ASSIGN_R(dst, src) \ + `__AXI_TO_R(assign, dst.r, _, src.r, _) \ + assign dst.r_valid = src.r_valid; \ + assign src.r_ready = dst.r_ready; +`define AXI_ASSIGN(slv, mst) \ + `AXI_ASSIGN_AW(slv, mst) \ + `AXI_ASSIGN_W(slv, mst) \ + `AXI_ASSIGN_B(mst, slv) \ + `AXI_ASSIGN_AR(slv, mst) \ + `AXI_ASSIGN_R(mst, slv) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning a AXI4+ATOP interface to a monitor modport, as if you would do `assign mon = axi_if;` +// +// The channel assignment `AXI_ASSIGN_MONITOR(mon_dv, axi_if)` assigns all signals from `axi_if` +// to the `mon_dv` interface. +// +// Usage Example: +// `AXI_ASSIGN_MONITOR(mon_dv, axi_if) +`define AXI_ASSIGN_MONITOR(mon_dv, axi_if) \ + `__AXI_TO_AW(assign, mon_dv.aw, _, axi_if.aw, _) \ + assign mon_dv.aw_valid = axi_if.aw_valid; \ + assign mon_dv.aw_ready = axi_if.aw_ready; \ + `__AXI_TO_W(assign, mon_dv.w, _, axi_if.w, _) \ + assign mon_dv.w_valid = axi_if.w_valid; \ + assign mon_dv.w_ready = axi_if.w_ready; \ + `__AXI_TO_B(assign, mon_dv.b, _, axi_if.b, _) \ + assign mon_dv.b_valid = axi_if.b_valid; \ + assign mon_dv.b_ready = axi_if.b_ready; \ + `__AXI_TO_AR(assign, mon_dv.ar, _, axi_if.ar, _) \ + assign mon_dv.ar_valid = axi_if.ar_valid; \ + assign mon_dv.ar_ready = axi_if.ar_ready; \ + `__AXI_TO_R(assign, mon_dv.r, _, axi_if.r, _) \ + assign mon_dv.r_valid = axi_if.r_valid; \ + assign mon_dv.r_ready = axi_if.r_ready; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting an interface from channel or request/response structs inside a process. +// +// The channel macros `AXI_SET_FROM_XX(axi_if, xx_struct)` set the payload signals of the `axi_if` +// interface from the signals in `xx_struct`. They do not set the handshake signals. +// The request macro `AXI_SET_FROM_REQ(axi_if, req_struct)` sets all request channels (AW, W, AR) +// and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the `axi_if` +// interface from the signals in `req_struct`. +// The response macro `AXI_SET_FROM_RESP(axi_if, resp_struct)` sets both response channels (B and R) +// and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the `axi_if` +// interface from the signals in `resp_struct`. +// +// Usage Example: +// always_comb begin +// `AXI_SET_FROM_REQ(my_if, my_req_struct) +// end +`define AXI_SET_FROM_AW(axi_if, aw_struct) `__AXI_TO_AW(, axi_if.aw, _, aw_struct, .) +`define AXI_SET_FROM_W(axi_if, w_struct) `__AXI_TO_W(, axi_if.w, _, w_struct, .) +`define AXI_SET_FROM_B(axi_if, b_struct) `__AXI_TO_B(, axi_if.b, _, b_struct, .) +`define AXI_SET_FROM_AR(axi_if, ar_struct) `__AXI_TO_AR(, axi_if.ar, _, ar_struct, .) +`define AXI_SET_FROM_R(axi_if, r_struct) `__AXI_TO_R(, axi_if.r, _, r_struct, .) +`define AXI_SET_FROM_REQ(axi_if, req_struct) `__AXI_TO_REQ(, axi_if, _, req_struct, .) +`define AXI_SET_FROM_RESP(axi_if, resp_struct) `__AXI_TO_RESP(, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning an interface from channel or request/response structs outside a process. +// +// The channel macros `AXI_ASSIGN_FROM_XX(axi_if, xx_struct)` assign the payload signals of the +// `axi_if` interface from the signals in `xx_struct`. They do not assign the handshake signals. +// The request macro `AXI_ASSIGN_FROM_REQ(axi_if, req_struct)` assigns all request channels (AW, W, +// AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the +// `axi_if` interface from the signals in `req_struct`. +// The response macro `AXI_ASSIGN_FROM_RESP(axi_if, resp_struct)` assigns both response channels (B +// and R) and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the +// `axi_if` interface from the signals in `resp_struct`. +// +// Usage Example: +// `AXI_ASSIGN_FROM_REQ(my_if, my_req_struct) +`define AXI_ASSIGN_FROM_AW(axi_if, aw_struct) `__AXI_TO_AW(assign, axi_if.aw, _, aw_struct, .) +`define AXI_ASSIGN_FROM_W(axi_if, w_struct) `__AXI_TO_W(assign, axi_if.w, _, w_struct, .) +`define AXI_ASSIGN_FROM_B(axi_if, b_struct) `__AXI_TO_B(assign, axi_if.b, _, b_struct, .) +`define AXI_ASSIGN_FROM_AR(axi_if, ar_struct) `__AXI_TO_AR(assign, axi_if.ar, _, ar_struct, .) +`define AXI_ASSIGN_FROM_R(axi_if, r_struct) `__AXI_TO_R(assign, axi_if.r, _, r_struct, .) +`define AXI_ASSIGN_FROM_REQ(axi_if, req_struct) `__AXI_TO_REQ(assign, axi_if, _, req_struct, .) +`define AXI_ASSIGN_FROM_RESP(axi_if, resp_struct) `__AXI_TO_RESP(assign, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from an interface inside a process. +// +// The channel macros `AXI_SET_TO_XX(xx_struct, axi_if)` set the signals of `xx_struct` to the +// payload signals of that channel in the `axi_if` interface. They do not set the handshake +// signals. +// The request macro `AXI_SET_TO_REQ(axi_if, req_struct)` sets all signals of `req_struct` (i.e., +// request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR valid and +// B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_SET_TO_RESP(axi_if, resp_struct)` sets all signals of `resp_struct` +// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and +// AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// always_comb begin +// `AXI_SET_TO_REQ(my_req_struct, my_if) +// end +`define AXI_SET_TO_AW(aw_struct, axi_if) `__AXI_TO_AW(, aw_struct, ., axi_if.aw, _) +`define AXI_SET_TO_W(w_struct, axi_if) `__AXI_TO_W(, w_struct, ., axi_if.w, _) +`define AXI_SET_TO_B(b_struct, axi_if) `__AXI_TO_B(, b_struct, ., axi_if.b, _) +`define AXI_SET_TO_AR(ar_struct, axi_if) `__AXI_TO_AR(, ar_struct, ., axi_if.ar, _) +`define AXI_SET_TO_R(r_struct, axi_if) `__AXI_TO_R(, r_struct, ., axi_if.r, _) +`define AXI_SET_TO_REQ(req_struct, axi_if) `__AXI_TO_REQ(, req_struct, ., axi_if, _) +`define AXI_SET_TO_RESP(resp_struct, axi_if) `__AXI_TO_RESP(, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from an interface outside a process. +// +// The channel macros `AXI_ASSIGN_TO_XX(xx_struct, axi_if)` assign the signals of `xx_struct` to the +// payload signals of that channel in the `axi_if` interface. They do not assign the handshake +// signals. +// The request macro `AXI_ASSIGN_TO_REQ(axi_if, req_struct)` assigns all signals of `req_struct` +// (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR +// valid and B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_ASSIGN_TO_RESP(axi_if, resp_struct)` assigns all signals of `resp_struct` +// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and +// AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// `AXI_ASSIGN_TO_REQ(my_req_struct, my_if) +`define AXI_ASSIGN_TO_AW(aw_struct, axi_if) `__AXI_TO_AW(assign, aw_struct, ., axi_if.aw, _) +`define AXI_ASSIGN_TO_W(w_struct, axi_if) `__AXI_TO_W(assign, w_struct, ., axi_if.w, _) +`define AXI_ASSIGN_TO_B(b_struct, axi_if) `__AXI_TO_B(assign, b_struct, ., axi_if.b, _) +`define AXI_ASSIGN_TO_AR(ar_struct, axi_if) `__AXI_TO_AR(assign, ar_struct, ., axi_if.ar, _) +`define AXI_ASSIGN_TO_R(r_struct, axi_if) `__AXI_TO_R(assign, r_struct, ., axi_if.r, _) +`define AXI_ASSIGN_TO_REQ(req_struct, axi_if) `__AXI_TO_REQ(assign, req_struct, ., axi_if, _) +`define AXI_ASSIGN_TO_RESP(resp_struct, axi_if) `__AXI_TO_RESP(assign, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from another struct inside a process. +// +// The channel macros `AXI_SET_XX_STRUCT(lhs, rhs)` set the fields of the `lhs` channel struct to +// the fields of the `rhs` channel struct. They do not set the handshake signals, which are not +// part of channel structs. +// The request macro `AXI_SET_REQ_STRUCT(lhs, rhs)` sets all fields of the `lhs` request struct to +// the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) payload +// and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_SET_RESP_STRUCT(lhs, rhs)` sets all fields of the `lhs` response struct +// to the fields of the `rhs` response struct. This includes all response channel (B and R) payload +// and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// always_comb begin +// `AXI_SET_REQ_STRUCT(my_req_struct, another_req_struct) +// end +`define AXI_SET_AW_STRUCT(lhs, rhs) `__AXI_TO_AW(, lhs, ., rhs, .) +`define AXI_SET_W_STRUCT(lhs, rhs) `__AXI_TO_W(, lhs, ., rhs, .) +`define AXI_SET_B_STRUCT(lhs, rhs) `__AXI_TO_B(, lhs, ., rhs, .) +`define AXI_SET_AR_STRUCT(lhs, rhs) `__AXI_TO_AR(, lhs, ., rhs, .) +`define AXI_SET_R_STRUCT(lhs, rhs) `__AXI_TO_R(, lhs, ., rhs, .) +`define AXI_SET_REQ_STRUCT(lhs, rhs) `__AXI_TO_REQ(, lhs, ., rhs, .) +`define AXI_SET_RESP_STRUCT(lhs, rhs) `__AXI_TO_RESP(, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from another struct outside a process. +// +// The channel macros `AXI_ASSIGN_XX_STRUCT(lhs, rhs)` assign the fields of the `lhs` channel struct +// to the fields of the `rhs` channel struct. They do not assign the handshake signals, which are +// not part of the channel structs. +// The request macro `AXI_ASSIGN_REQ_STRUCT(lhs, rhs)` assigns all fields of the `lhs` request +// struct to the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) +// payload and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_ASSIGN_RESP_STRUCT(lhs, rhs)` assigns all fields of the `lhs` response +// struct to the fields of the `rhs` response struct. This includes all response channel (B and R) +// payload and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// `AXI_ASSIGN_REQ_STRUCT(my_req_struct, another_req_struct) +`define AXI_ASSIGN_AW_STRUCT(lhs, rhs) `__AXI_TO_AW(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_W_STRUCT(lhs, rhs) `__AXI_TO_W(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_B_STRUCT(lhs, rhs) `__AXI_TO_B(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_AR_STRUCT(lhs, rhs) `__AXI_TO_AR(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_R_STRUCT(lhs, rhs) `__AXI_TO_R(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_REQ_STRUCT(lhs, rhs) `__AXI_TO_REQ(assign, lhs, ., rhs, .) +`define AXI_ASSIGN_RESP_STRUCT(lhs, rhs) `__AXI_TO_RESP(assign, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Internal implementation for assigning one Lite structs or interface to another struct or +// interface. The path to the signals on each side is defined by the `__sep*` arguments. The +// `__opt_as` argument allows to use this standalne (with `__opt_as = assign`) or in assignments +// inside processes (with `__opt_as` void). +`define __AXI_LITE_TO_AX(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``addr = __rhs``__rhs_sep``addr; \ + __opt_as __lhs``__lhs_sep``prot = __rhs``__rhs_sep``prot; +`define __AXI_LITE_TO_W(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``strb = __rhs``__rhs_sep``strb; +`define __AXI_LITE_TO_B(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; +`define __AXI_LITE_TO_R(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs``__lhs_sep``data = __rhs``__rhs_sep``data; \ + __opt_as __lhs``__lhs_sep``resp = __rhs``__rhs_sep``resp; +`define __AXI_LITE_TO_REQ(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + `__AXI_LITE_TO_AX(__opt_as, __lhs.aw, __lhs_sep, __rhs.aw, __rhs_sep) \ + __opt_as __lhs.aw_valid = __rhs.aw_valid; \ + `__AXI_LITE_TO_W(__opt_as, __lhs.w, __lhs_sep, __rhs.w, __rhs_sep) \ + __opt_as __lhs.w_valid = __rhs.w_valid; \ + __opt_as __lhs.b_ready = __rhs.b_ready; \ + `__AXI_LITE_TO_AX(__opt_as, __lhs.ar, __lhs_sep, __rhs.ar, __rhs_sep) \ + __opt_as __lhs.ar_valid = __rhs.ar_valid; \ + __opt_as __lhs.r_ready = __rhs.r_ready; +`define __AXI_LITE_TO_RESP(__opt_as, __lhs, __lhs_sep, __rhs, __rhs_sep) \ + __opt_as __lhs.aw_ready = __rhs.aw_ready; \ + __opt_as __lhs.ar_ready = __rhs.ar_ready; \ + __opt_as __lhs.w_ready = __rhs.w_ready; \ + __opt_as __lhs.b_valid = __rhs.b_valid; \ + `__AXI_LITE_TO_B(__opt_as, __lhs.b, __lhs_sep, __rhs.b, __rhs_sep) \ + __opt_as __lhs.r_valid = __rhs.r_valid; \ + `__AXI_LITE_TO_R(__opt_as, __lhs.r, __lhs_sep, __rhs.r, __rhs_sep) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning one AXI-Lite interface to another, as if you would do `assign slv = mst;` +// +// The channel assignments `AXI_LITE_ASSIGN_XX(dst, src)` assign all payload and the valid signal of +// the `XX` channel from the `src` to the `dst` interface and they assign the ready signal from the +// `src` to the `dst` interface. +// The interface assignment `AXI_LITE_ASSIGN(dst, src)` assigns all channels including handshakes as +// if `src` was the master of `dst`. +// +// Usage Example: +// `AXI_LITE_ASSIGN(slv, mst) +// `AXI_LITE_ASSIGN_AW(dst, src) +// `AXI_LITE_ASSIGN_R(dst, src) +`define AXI_LITE_ASSIGN_AW(dst, src) \ + `__AXI_LITE_TO_AX(assign, dst.aw, _, src.aw, _) \ + assign dst.aw_valid = src.aw_valid; \ + assign src.aw_ready = dst.aw_ready; +`define AXI_LITE_ASSIGN_W(dst, src) \ + `__AXI_LITE_TO_W(assign, dst.w, _, src.w, _) \ + assign dst.w_valid = src.w_valid; \ + assign src.w_ready = dst.w_ready; +`define AXI_LITE_ASSIGN_B(dst, src) \ + `__AXI_LITE_TO_B(assign, dst.b, _, src.b, _) \ + assign dst.b_valid = src.b_valid; \ + assign src.b_ready = dst.b_ready; +`define AXI_LITE_ASSIGN_AR(dst, src) \ + `__AXI_LITE_TO_AX(assign, dst.ar, _, src.ar, _) \ + assign dst.ar_valid = src.ar_valid; \ + assign src.ar_ready = dst.ar_ready; +`define AXI_LITE_ASSIGN_R(dst, src) \ + `__AXI_LITE_TO_R(assign, dst.r, _, src.r, _) \ + assign dst.r_valid = src.r_valid; \ + assign src.r_ready = dst.r_ready; +`define AXI_LITE_ASSIGN(slv, mst) \ + `AXI_LITE_ASSIGN_AW(slv, mst) \ + `AXI_LITE_ASSIGN_W(slv, mst) \ + `AXI_LITE_ASSIGN_B(mst, slv) \ + `AXI_LITE_ASSIGN_AR(slv, mst) \ + `AXI_LITE_ASSIGN_R(mst, slv) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting a Lite interface from channel or request/response structs inside a process. +// +// The channel macros `AXI_LITE_SET_FROM_XX(axi_if, xx_struct)` set the payload signals of the +// `axi_if` interface from the signals in `xx_struct`. They do not set the handshake signals. +// The request macro `AXI_LITE_SET_FROM_REQ(axi_if, req_struct)` sets all request channels (AW, W, +// AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the +// `axi_if` interface from the signals in `req_struct`. +// The response macro `AXI_LITE_SET_FROM_RESP(axi_if, resp_struct)` sets both response channels (B +// and R) and the response-side handshake signals (B and R valid and AW, W, and AR ready) of the +// `axi_if` interface from the signals in `resp_struct`. +// +// Usage Example: +// always_comb begin +// `AXI_LITE_SET_FROM_REQ(my_if, my_req_struct) +// end +`define AXI_LITE_SET_FROM_AW(axi_if, aw_struct) `__AXI_LITE_TO_AX(, axi_if.aw, _, aw_struct, .) +`define AXI_LITE_SET_FROM_W(axi_if, w_struct) `__AXI_LITE_TO_W(, axi_if.w, _, w_struct, .) +`define AXI_LITE_SET_FROM_B(axi_if, b_struct) `__AXI_LITE_TO_B(, axi_if.b, _, b_struct, .) +`define AXI_LITE_SET_FROM_AR(axi_if, ar_struct) `__AXI_LITE_TO_AX(, axi_if.ar, _, ar_struct, .) +`define AXI_LITE_SET_FROM_R(axi_if, r_struct) `__AXI_LITE_TO_R(, axi_if.r, _, r_struct, .) +`define AXI_LITE_SET_FROM_REQ(axi_if, req_struct) `__AXI_LITE_TO_REQ(, axi_if, _, req_struct, .) +`define AXI_LITE_SET_FROM_RESP(axi_if, resp_struct) `__AXI_LITE_TO_RESP(, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning a Lite interface from channel or request/response structs outside a process. +// +// The channel macros `AXI_LITE_ASSIGN_FROM_XX(axi_if, xx_struct)` assign the payload signals of the +// `axi_if` interface from the signals in `xx_struct`. They do not assign the handshake signals. +// The request macro `AXI_LITE_ASSIGN_FROM_REQ(axi_if, req_struct)` assigns all request channels +// (AW, W, AR) and the request-side handshake signals (AW, W, and AR valid and B and R ready) of the +// `axi_if` interface from the signals in `req_struct`. +// The response macro `AXI_LITE_ASSIGN_FROM_RESP(axi_if, resp_struct)` assigns both response +// channels (B and R) and the response-side handshake signals (B and R valid and AW, W, and AR +// ready) of the `axi_if` interface from the signals in `resp_struct`. +// +// Usage Example: +// `AXI_LITE_ASSIGN_FROM_REQ(my_if, my_req_struct) +`define AXI_LITE_ASSIGN_FROM_AW(axi_if, aw_struct) `__AXI_LITE_TO_AX(assign, axi_if.aw, _, aw_struct, .) +`define AXI_LITE_ASSIGN_FROM_W(axi_if, w_struct) `__AXI_LITE_TO_W(assign, axi_if.w, _, w_struct, .) +`define AXI_LITE_ASSIGN_FROM_B(axi_if, b_struct) `__AXI_LITE_TO_B(assign, axi_if.b, _, b_struct, .) +`define AXI_LITE_ASSIGN_FROM_AR(axi_if, ar_struct) `__AXI_LITE_TO_AX(assign, axi_if.ar, _, ar_struct, .) +`define AXI_LITE_ASSIGN_FROM_R(axi_if, r_struct) `__AXI_LITE_TO_R(assign, axi_if.r, _, r_struct, .) +`define AXI_LITE_ASSIGN_FROM_REQ(axi_if, req_struct) `__AXI_LITE_TO_REQ(assign, axi_if, _, req_struct, .) +`define AXI_LITE_ASSIGN_FROM_RESP(axi_if, resp_struct) `__AXI_LITE_TO_RESP(assign, axi_if, _, resp_struct, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from an interface inside a process. +// +// The channel macros `AXI_LITE_SET_TO_XX(xx_struct, axi_if)` set the signals of `xx_struct` to the +// payload signals of that channel in the `axi_if` interface. They do not set the handshake +// signals. +// The request macro `AXI_LITE_SET_TO_REQ(axi_if, req_struct)` sets all signals of `req_struct` +// (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, W, and AR +// valid and B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_LITE_SET_TO_RESP(axi_if, resp_struct)` sets all signals of `resp_struct` +// (i.e., response channel (B and R) payload and response-side handshake signals (B and R valid and +// AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// always_comb begin +// `AXI_LITE_SET_TO_REQ(my_req_struct, my_if) +// end +`define AXI_LITE_SET_TO_AW(aw_struct, axi_if) `__AXI_LITE_TO_AX(, aw_struct, ., axi_if.aw, _) +`define AXI_LITE_SET_TO_W(w_struct, axi_if) `__AXI_LITE_TO_W(, w_struct, ., axi_if.w, _) +`define AXI_LITE_SET_TO_B(b_struct, axi_if) `__AXI_LITE_TO_B(, b_struct, ., axi_if.b, _) +`define AXI_LITE_SET_TO_AR(ar_struct, axi_if) `__AXI_LITE_TO_AX(, ar_struct, ., axi_if.ar, _) +`define AXI_LITE_SET_TO_R(r_struct, axi_if) `__AXI_LITE_TO_R(, r_struct, ., axi_if.r, _) +`define AXI_LITE_SET_TO_REQ(req_struct, axi_if) `__AXI_LITE_TO_REQ(, req_struct, ., axi_if, _) +`define AXI_LITE_SET_TO_RESP(resp_struct, axi_if) `__AXI_LITE_TO_RESP(, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from an interface outside a process. +// +// The channel macros `AXI_LITE_ASSIGN_TO_XX(xx_struct, axi_if)` assign the signals of `xx_struct` +// to the payload signals of that channel in the `axi_if` interface. They do not assign the +// handshake signals. +// The request macro `AXI_LITE_ASSIGN_TO_REQ(axi_if, req_struct)` assigns all signals of +// `req_struct` (i.e., request channel (AW, W, AR) payload and request-side handshake signals (AW, +// W, and AR valid and B and R ready)) to the signals in the `axi_if` interface. +// The response macro `AXI_LITE_ASSIGN_TO_RESP(axi_if, resp_struct)` assigns all signals of +// `resp_struct` (i.e., response channel (B and R) payload and response-side handshake signals (B +// and R valid and AW, W, and AR ready)) to the signals in the `axi_if` interface. +// +// Usage Example: +// `AXI_LITE_ASSIGN_TO_REQ(my_req_struct, my_if) +`define AXI_LITE_ASSIGN_TO_AW(aw_struct, axi_if) `__AXI_LITE_TO_AX(assign, aw_struct, ., axi_if.aw, _) +`define AXI_LITE_ASSIGN_TO_W(w_struct, axi_if) `__AXI_LITE_TO_W(assign, w_struct, ., axi_if.w, _) +`define AXI_LITE_ASSIGN_TO_B(b_struct, axi_if) `__AXI_LITE_TO_B(assign, b_struct, ., axi_if.b, _) +`define AXI_LITE_ASSIGN_TO_AR(ar_struct, axi_if) `__AXI_LITE_TO_AX(assign, ar_struct, ., axi_if.ar, _) +`define AXI_LITE_ASSIGN_TO_R(r_struct, axi_if) `__AXI_LITE_TO_R(assign, r_struct, ., axi_if.r, _) +`define AXI_LITE_ASSIGN_TO_REQ(req_struct, axi_if) `__AXI_LITE_TO_REQ(assign, req_struct, ., axi_if, _) +`define AXI_LITE_ASSIGN_TO_RESP(resp_struct, axi_if) `__AXI_LITE_TO_RESP(assign, resp_struct, ., axi_if, _) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Setting channel or request/response structs from another struct inside a process. +// +// The channel macros `AXI_LITE_SET_XX_STRUCT(lhs, rhs)` set the fields of the `lhs` channel struct +// to the fields of the `rhs` channel struct. They do not set the handshake signals, which are not +// part of channel structs. +// The request macro `AXI_LITE_SET_REQ_STRUCT(lhs, rhs)` sets all fields of the `lhs` request struct +// to the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) payload +// and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_LITE_SET_RESP_STRUCT(lhs, rhs)` sets all fields of the `lhs` response +// struct to the fields of the `rhs` response struct. This includes all response channel (B and R) +// payload and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// always_comb begin +// `AXI_LITE_SET_REQ_STRUCT(my_req_struct, another_req_struct) +// end +`define AXI_LITE_SET_AW_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(, lhs, ., rhs, .) +`define AXI_LITE_SET_W_STRUCT(lhs, rhs) `__AXI_LITE_TO_W(, lhs, ., rhs, .) +`define AXI_LITE_SET_B_STRUCT(lhs, rhs) `__AXI_LITE_TO_B(, lhs, ., rhs, .) +`define AXI_LITE_SET_AR_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(, lhs, ., rhs, .) +`define AXI_LITE_SET_R_STRUCT(lhs, rhs) `__AXI_LITE_TO_R(, lhs, ., rhs, .) +`define AXI_LITE_SET_REQ_STRUCT(lhs, rhs) `__AXI_LITE_TO_REQ(, lhs, ., rhs, .) +`define AXI_LITE_SET_RESP_STRUCT(lhs, rhs) `__AXI_LITE_TO_RESP(, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Assigning channel or request/response structs from another struct outside a process. +// +// The channel macros `AXI_LITE_ASSIGN_XX_STRUCT(lhs, rhs)` assign the fields of the `lhs` channel +// struct to the fields of the `rhs` channel struct. They do not assign the handshake signals, +// which are not part of the channel structs. +// The request macro `AXI_LITE_ASSIGN_REQ_STRUCT(lhs, rhs)` assigns all fields of the `lhs` request +// struct to the fields of the `rhs` request struct. This includes all request channel (AW, W, AR) +// payload and request-side handshake signals (AW, W, and AR valid and B and R ready). +// The response macro `AXI_LITE_ASSIGN_RESP_STRUCT(lhs, rhs)` assigns all fields of the `lhs` +// response struct to the fields of the `rhs` response struct. This includes all response channel +// (B and R) payload and response-side handshake signals (B and R valid and AW, W, and R ready). +// +// Usage Example: +// `AXI_LITE_ASSIGN_REQ_STRUCT(my_req_struct, another_req_struct) +`define AXI_LITE_ASSIGN_AW_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_W_STRUCT(lhs, rhs) `__AXI_LITE_TO_W(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_B_STRUCT(lhs, rhs) `__AXI_LITE_TO_B(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_AR_STRUCT(lhs, rhs) `__AXI_LITE_TO_AX(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_R_STRUCT(lhs, rhs) `__AXI_LITE_TO_R(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_REQ_STRUCT(lhs, rhs) `__AXI_LITE_TO_REQ(assign, lhs, ., rhs, .) +`define AXI_LITE_ASSIGN_RESP_STRUCT(lhs, rhs) `__AXI_LITE_TO_RESP(assign, lhs, ., rhs, .) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Macros for assigning flattened AXI ports to req/resp AXI structs +// Flat AXI ports are required by the Vivado IP Integrator. Vivado naming convention is followed. +// +// Usage Example: +// `AXI_ASSIGN_MASTER_TO_FLAT("my_bus", my_req_struct, my_rsp_struct) +`define AXI_ASSIGN_MASTER_TO_FLAT(pat, req, rsp) \ + assign m_axi_``pat``_awvalid = req.aw_valid; \ + assign m_axi_``pat``_awid = req.aw.id; \ + assign m_axi_``pat``_awaddr = req.aw.addr; \ + assign m_axi_``pat``_awlen = req.aw.len; \ + assign m_axi_``pat``_awsize = req.aw.size; \ + assign m_axi_``pat``_awburst = req.aw.burst; \ + assign m_axi_``pat``_awlock = req.aw.lock; \ + assign m_axi_``pat``_awcache = req.aw.cache; \ + assign m_axi_``pat``_awprot = req.aw.prot; \ + assign m_axi_``pat``_awqos = req.aw.qos; \ + assign m_axi_``pat``_awregion = req.aw.region; \ + assign m_axi_``pat``_awuser = req.aw.user; \ + \ + assign m_axi_``pat``_wvalid = req.w_valid; \ + assign m_axi_``pat``_wdata = req.w.data; \ + assign m_axi_``pat``_wstrb = req.w.strb; \ + assign m_axi_``pat``_wlast = req.w.last; \ + assign m_axi_``pat``_wuser = req.w.user; \ + \ + assign m_axi_``pat``_bready = req.b_ready; \ + \ + assign m_axi_``pat``_arvalid = req.ar_valid; \ + assign m_axi_``pat``_arid = req.ar.id; \ + assign m_axi_``pat``_araddr = req.ar.addr; \ + assign m_axi_``pat``_arlen = req.ar.len; \ + assign m_axi_``pat``_arsize = req.ar.size; \ + assign m_axi_``pat``_arburst = req.ar.burst; \ + assign m_axi_``pat``_arlock = req.ar.lock; \ + assign m_axi_``pat``_arcache = req.ar.cache; \ + assign m_axi_``pat``_arprot = req.ar.prot; \ + assign m_axi_``pat``_arqos = req.ar.qos; \ + assign m_axi_``pat``_arregion = req.ar.region; \ + assign m_axi_``pat``_aruser = req.ar.user; \ + \ + assign m_axi_``pat``_rready = req.r_ready; \ + \ + assign rsp.aw_ready = m_axi_``pat``_awready; \ + assign rsp.ar_ready = m_axi_``pat``_arready; \ + assign rsp.w_ready = m_axi_``pat``_wready; \ + \ + assign rsp.b_valid = m_axi_``pat``_bvalid; \ + assign rsp.b.id = m_axi_``pat``_bid; \ + assign rsp.b.resp = m_axi_``pat``_bresp; \ + assign rsp.b.user = m_axi_``pat``_buser; \ + \ + assign rsp.r_valid = m_axi_``pat``_rvalid; \ + assign rsp.r.id = m_axi_``pat``_rid; \ + assign rsp.r.data = m_axi_``pat``_rdata; \ + assign rsp.r.resp = m_axi_``pat``_rresp; \ + assign rsp.r.last = m_axi_``pat``_rlast; \ + assign rsp.r.user = m_axi_``pat``_ruser; + +`define AXI_ASSIGN_SLAVE_TO_FLAT(pat, req, rsp) \ + assign req.aw_valid = s_axi_``pat``_awvalid; \ + assign req.aw.id = s_axi_``pat``_awid; \ + assign req.aw.addr = s_axi_``pat``_awaddr; \ + assign req.aw.len = s_axi_``pat``_awlen; \ + assign req.aw.size = s_axi_``pat``_awsize; \ + assign req.aw.burst = s_axi_``pat``_awburst; \ + assign req.aw.lock = s_axi_``pat``_awlock; \ + assign req.aw.cache = s_axi_``pat``_awcache; \ + assign req.aw.prot = s_axi_``pat``_awprot; \ + assign req.aw.qos = s_axi_``pat``_awqos; \ + assign req.aw.region = s_axi_``pat``_awregion; \ + assign req.aw.user = s_axi_``pat``_awuser; \ + \ + assign req.w_valid = s_axi_``pat``_wvalid; \ + assign req.w.data = s_axi_``pat``_wdata; \ + assign req.w.strb = s_axi_``pat``_wstrb; \ + assign req.w.last = s_axi_``pat``_wlast; \ + assign req.w.user = s_axi_``pat``_wuser; \ + \ + assign req.b_ready = s_axi_``pat``_bready; \ + \ + assign req.ar_valid = s_axi_``pat``_arvalid; \ + assign req.ar.id = s_axi_``pat``_arid; \ + assign req.ar.addr = s_axi_``pat``_araddr; \ + assign req.ar.len = s_axi_``pat``_arlen; \ + assign req.ar.size = s_axi_``pat``_arsize; \ + assign req.ar.burst = s_axi_``pat``_arburst; \ + assign req.ar.lock = s_axi_``pat``_arlock; \ + assign req.ar.cache = s_axi_``pat``_arcache; \ + assign req.ar.prot = s_axi_``pat``_arprot; \ + assign req.ar.qos = s_axi_``pat``_arqos; \ + assign req.ar.region = s_axi_``pat``_arregion; \ + assign req.ar.user = s_axi_``pat``_aruser; \ + \ + assign req.r_ready = s_axi_``pat``_rready; \ + \ + assign s_axi_``pat``_awready = rsp.aw_ready; \ + assign s_axi_``pat``_arready = rsp.ar_ready; \ + assign s_axi_``pat``_wready = rsp.w_ready; \ + \ + assign s_axi_``pat``_bvalid = rsp.b_valid; \ + assign s_axi_``pat``_bid = rsp.b.id; \ + assign s_axi_``pat``_bresp = rsp.b.resp; \ + assign s_axi_``pat``_buser = rsp.b.user; \ + \ + assign s_axi_``pat``_rvalid = rsp.r_valid; \ + assign s_axi_``pat``_rid = rsp.r.id; \ + assign s_axi_``pat``_rdata = rsp.r.data; \ + assign s_axi_``pat``_rresp = rsp.r.resp; \ + assign s_axi_``pat``_rlast = rsp.r.last; \ + assign s_axi_``pat``_ruser = rsp.r.user; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +`endif diff --git a/hw/ip/iDMA/include/axi/axi_pkg.sv b/hw/ip/iDMA/include/axi/axi_pkg.sv new file mode 100644 index 000000000..92ede558c --- /dev/null +++ b/hw/ip/iDMA/include/axi/axi_pkg.sv @@ -0,0 +1,423 @@ +// Copyright (c) 2014-2020 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Florian Zaruba +// - Wolfgang Roenninger +// - Fabian Schuiki +// - Matheus Cavalcante + +//! AXI Package +/// Contains all necessary type definitions, constants, and generally useful functions. +package axi_pkg; + /// AXI Transaction Burst Type. + typedef logic [1:0] burst_t; + /// AXI Transaction Response Type. + typedef logic [1:0] resp_t; + /// AXI Transaction Cacheability Type. + typedef logic [3:0] cache_t; + /// AXI Transaction Protection Type. + typedef logic [2:0] prot_t; + /// AXI Transaction Quality of Service Type. + typedef logic [3:0] qos_t; + /// AXI Transaction Region Type. + typedef logic [3:0] region_t; + /// AXI Transaction Length Type. + typedef logic [7:0] len_t; + /// AXI Transaction Size Type. + typedef logic [2:0] size_t; + /// AXI5 Atomic Operation Type. + typedef logic [5:0] atop_t; // atomic operations + /// AXI5 Non-Secure Address Identifier. + typedef logic [3:0] nsaid_t; + + /// In a fixed burst: + /// - The address is the same for every transfer in the burst. + /// - The byte lanes that are valid are constant for all beats in the burst. However, within + /// those byte lanes, the actual bytes that have `wstrb` asserted can differ for each beat in + /// the burst. + /// This burst type is used for repeated accesses to the same location such as when loading or + /// emptying a FIFO. + localparam BURST_FIXED = 2'b00; + /// In an incrementing burst, the address for each transfer in the burst is an increment of the + /// address for the previous transfer. The increment value depends on the size of the transfer. + /// For example, the address for each transfer in a burst with a size of 4 bytes is the previous + /// address plus four. + /// This burst type is used for accesses to normal sequential memory. + localparam BURST_INCR = 2'b01; + /// A wrapping burst is similar to an incrementing burst, except that the address wraps around to + /// a lower address if an upper address limit is reached. + /// The following restrictions apply to wrapping bursts: + /// - The start address must be aligned to the size of each transfer. + /// - The length of the burst must be 2, 4, 8, or 16 transfers. + localparam BURST_WRAP = 2'b10; + + /// Normal access success. Indicates that a normal access has been successful. Can also indicate + /// that an exclusive access has failed. + localparam RESP_OKAY = 2'b00; + /// Exclusive access okay. Indicates that either the read or write portion of an exclusive access + /// has been successful. + localparam RESP_EXOKAY = 2'b01; + /// Slave error. Used when the access has reached the slave successfully, but the slave wishes to + /// return an error condition to the originating master. + localparam RESP_SLVERR = 2'b10; + /// Decode error. Generated, typically by an interconnect component, to indicate that there is no + /// slave at the transaction address. + localparam RESP_DECERR = 2'b11; + + /// When this bit is asserted, the interconnect, or any component, can delay the transaction + /// reaching its final destination for any number of cycles. + localparam CACHE_BUFFERABLE = 4'b0001; + /// When HIGH, Modifiable indicates that the characteristics of the transaction can be modified. + /// When Modifiable is LOW, the transaction is Non-modifiable. + localparam CACHE_MODIFIABLE = 4'b0010; + /// When this bit is asserted, read allocation of the transaction is recommended but is not + /// mandatory. + localparam CACHE_RD_ALLOC = 4'b0100; + /// When this bit is asserted, write allocation of the transaction is recommended but is not + /// mandatory. + localparam CACHE_WR_ALLOC = 4'b1000; + + /// Maximum number of bytes per burst, as specified by `size` (see Table A3-2). + function automatic shortint unsigned num_bytes(size_t size); + return 1 << size; + endfunction + + /// An overly long address type. + /// It lets us define functions that work generically for shorter addresses. We rely on the + /// synthesizer to optimize the unused bits away. + typedef logic [127:0] largest_addr_t; + + /// Aligned address of burst (see A3-51). + function automatic largest_addr_t aligned_addr(largest_addr_t addr, size_t size); + return (addr >> size) << size; + endfunction + + /// Warp boundary of a `BURST_WRAP` transfer (see A3-51). + /// This is the lowest address accessed within a wrapping burst. + /// This address is aligned to the size and length of the burst. + /// The length of a `BURST_WRAP` has to be 2, 4, 8, or 16 transfers. + function automatic largest_addr_t wrap_boundary (largest_addr_t addr, size_t size, len_t len); + largest_addr_t wrap_addr; + + // pragma translate_off + `ifndef VERILATOR + assume (len == len_t'(4'b1) || len == len_t'(4'b11) || len == len_t'(4'b111) || + len == len_t'(4'b1111)) else + $error("AXI BURST_WRAP with not allowed len of: %0h", len); + `endif + // pragma translate_on + + // In A3-51 the wrap boundary is defined as: + // `Wrap_Boundary = (INT(Start_Address / (Number_Bytes × Burst_Length))) × + // (Number_Bytes × Burst_Length)` + // Whereas the aligned address is defined as: + // `Aligned_Address = (INT(Start_Address / Number_Bytes)) × Number_Bytes` + // This leads to the wrap boundary using the same calculation as the aligned address, difference + // being the additional dependency on the burst length. The addition in the case statement + // is equal to the multiplication with `Burst_Length` as a shift (used by `aligned_addr`) is + // equivalent with multiplication and division by a power of two, which conveniently are the + // only allowed values for `len` of a `BURST_WRAP`. + unique case (len) + 4'b1 : wrap_addr = (addr >> (unsigned'(size) + 1)) << (unsigned'(size) + 1); // multiply `Number_Bytes` by `2` + 4'b11 : wrap_addr = (addr >> (unsigned'(size) + 2)) << (unsigned'(size) + 2); // multiply `Number_Bytes` by `4` + 4'b111 : wrap_addr = (addr >> (unsigned'(size) + 3)) << (unsigned'(size) + 3); // multiply `Number_Bytes` by `8` + 4'b1111 : wrap_addr = (addr >> (unsigned'(size) + 4)) << (unsigned'(size) + 4); // multiply `Number_Bytes` by `16` + default : wrap_addr = '0; + endcase + return wrap_addr; + endfunction + + /// Address of beat (see A3-51). + function automatic largest_addr_t + beat_addr(largest_addr_t addr, size_t size, len_t len, burst_t burst, shortint unsigned i_beat); + largest_addr_t ret_addr = addr; + largest_addr_t wrp_bond = '0; + if (burst == BURST_WRAP) begin + // do not trigger the function if there is no wrapping burst, to prevent assumptions firing + wrp_bond = wrap_boundary(addr, size, len); + end + if (i_beat != 0 && burst != BURST_FIXED) begin + // From A3-51: + // For an INCR burst, and for a WRAP burst for which the address has not wrapped, this + // equation determines the address of any transfer after the first transfer in a burst: + // `Address_N = Aligned_Address + (N – 1) × Number_Bytes` (N counts from 1 to len!) + ret_addr = aligned_addr(addr, size) + i_beat * num_bytes(size); + // From A3-51: + // For a WRAP burst, if Address_N = Wrap_Boundary + (Number_Bytes × Burst_Length), then: + // * Use this equation for the current transfer: + // `Address_N = Wrap_Boundary` + // * Use this equation for any subsequent transfers: + // `Address_N = Start_Address + ((N – 1) × Number_Bytes) – (Number_Bytes × Burst_Length)` + // This means that the address calculation of a `BURST_WRAP` fundamentally works the same + // as for a `BURST_INC`, the difference is when the calculated address increments + // over the wrap threshold, the address wraps around by subtracting the accessed address + // space from the normal `BURST_INCR` address. The lower wrap boundary is equivalent to + // The wrap trigger condition minus the container size (`num_bytes(size) * (len + 1)`). + if (burst == BURST_WRAP && ret_addr >= wrp_bond + (num_bytes(size) * (len + 1))) begin + ret_addr = ret_addr - (num_bytes(size) * (len + 1)); + end + end + return ret_addr; + endfunction + + /// Index of lowest byte in beat (see A3-51). + function automatic shortint unsigned + beat_lower_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst, + shortint unsigned strobe_width, shortint unsigned i_beat); + largest_addr_t _addr = beat_addr(addr, size, len, burst, i_beat); + return _addr - (_addr / strobe_width) * strobe_width; + endfunction + + /// Index of highest byte in beat (see A3-51). + function automatic shortint unsigned + beat_upper_byte(largest_addr_t addr, size_t size, len_t len, burst_t burst, + shortint unsigned strobe_width, shortint unsigned i_beat); + if (i_beat == 0) begin + return aligned_addr(addr, size) + (num_bytes(size) - 1) - (addr / strobe_width) * strobe_width; + end else begin + return beat_lower_byte(addr, size, len, burst, strobe_width, i_beat) + num_bytes(size) - 1; + end + endfunction + + /// Is the bufferable bit set? + function automatic logic bufferable(cache_t cache); + return |(cache & CACHE_BUFFERABLE); + endfunction + + /// Is the modifiable bit set? + function automatic logic modifiable(cache_t cache); + return |(cache & CACHE_MODIFIABLE); + endfunction + + /// Memory Type. + typedef enum logic [3:0] { + DEVICE_NONBUFFERABLE, + DEVICE_BUFFERABLE, + NORMAL_NONCACHEABLE_NONBUFFERABLE, + NORMAL_NONCACHEABLE_BUFFERABLE, + WTHRU_NOALLOCATE, + WTHRU_RALLOCATE, + WTHRU_WALLOCATE, + WTHRU_RWALLOCATE, + WBACK_NOALLOCATE, + WBACK_RALLOCATE, + WBACK_WALLOCATE, + WBACK_RWALLOCATE + } mem_type_t; + + /// Create an `AR_CACHE` field from a `mem_type_t` type. + function automatic logic [3:0] get_arcache(mem_type_t mtype); + unique case (mtype) + DEVICE_NONBUFFERABLE : return 4'b0000; + DEVICE_BUFFERABLE : return 4'b0001; + NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010; + NORMAL_NONCACHEABLE_BUFFERABLE : return 4'b0011; + WTHRU_NOALLOCATE : return 4'b1010; + WTHRU_RALLOCATE : return 4'b1110; + WTHRU_WALLOCATE : return 4'b1010; + WTHRU_RWALLOCATE : return 4'b1110; + WBACK_NOALLOCATE : return 4'b1011; + WBACK_RALLOCATE : return 4'b1111; + WBACK_WALLOCATE : return 4'b1011; + WBACK_RWALLOCATE : return 4'b1111; + endcase // mtype + endfunction + + /// Create an `AW_CACHE` field from a `mem_type_t` type. + function automatic logic [3:0] get_awcache(mem_type_t mtype); + unique case (mtype) + DEVICE_NONBUFFERABLE : return 4'b0000; + DEVICE_BUFFERABLE : return 4'b0001; + NORMAL_NONCACHEABLE_NONBUFFERABLE : return 4'b0010; + NORMAL_NONCACHEABLE_BUFFERABLE : return 4'b0011; + WTHRU_NOALLOCATE : return 4'b0110; + WTHRU_RALLOCATE : return 4'b0110; + WTHRU_WALLOCATE : return 4'b1110; + WTHRU_RWALLOCATE : return 4'b1110; + WBACK_NOALLOCATE : return 4'b0111; + WBACK_RALLOCATE : return 4'b0111; + WBACK_WALLOCATE : return 4'b1111; + WBACK_RWALLOCATE : return 4'b1111; + endcase // mtype + endfunction + + /// RESP precedence: DECERR > SLVERR > OKAY > EXOKAY. This is not defined in the AXI standard but + /// depends on the implementation. We consistently use the precedence above. Rationale: + /// - EXOKAY means an exclusive access was successful, whereas OKAY means it was not. Thus, if + /// OKAY and EXOKAY are to be merged, OKAY precedes because the exclusive access was not fully + /// successful. + /// - Both DECERR and SLVERR mean (part of) a transaction were unsuccessful, whereas OKAY means an + /// entire transaction was successful. Thus both DECERR and SLVERR precede OKAY. + /// - DECERR means (part of) a transactions could not be routed to a slave component, whereas + /// SLVERR means the transaction reached a slave component but lead to an error condition there. + /// Thus DECERR precedes SLVERR because DECERR happens earlier in the handling of a transaction. + function automatic resp_t resp_precedence(resp_t resp_a, resp_t resp_b); + unique case (resp_a) + RESP_OKAY: begin + // Any response except EXOKAY precedes OKAY. + if (resp_b == RESP_EXOKAY) begin + return resp_a; + end else begin + return resp_b; + end + end + RESP_EXOKAY: begin + // Any response precedes EXOKAY. + return resp_b; + end + RESP_SLVERR: begin + // Only DECERR precedes SLVERR. + if (resp_b == RESP_DECERR) begin + return resp_b; + end else begin + return resp_a; + end + end + RESP_DECERR: begin + // No response precedes DECERR. + return resp_a; + end + endcase + endfunction + + // ATOP[5:0] + /// - Sends a single data value with an address. + /// - The target swaps the value at the addressed location with the data value that is supplied in + /// the transaction. + /// - The original data value at the addressed location is returned. + /// - Outbound data size is 1, 2, 4, or 8 bytes. + /// - Inbound data size is the same as the outbound data size. + localparam ATOP_ATOMICSWAP = 6'b110000; + /// - Sends two data values, the compare value and the swap value, to the addressed location. + /// The compare and swap values are of equal size. + /// - The data value at the addressed location is checked against the compare value: + /// - If the values match, the swap value is written to the addressed location. + /// - If the values do not match, the swap value is not written to the addressed location. + /// - The original data value at the addressed location is returned. + /// - Outbound data size is 2, 4, 8, 16, or 32 bytes. + /// - Inbound data size is half of the outbound data size because the outbound data contains both + /// compare and swap values, whereas the inbound data has only the original data value. + localparam ATOP_ATOMICCMP = 6'b110001; + // ATOP[5:4] + /// Perform no atomic operation. + localparam ATOP_NONE = 2'b00; + /// - Sends a single data value with an address and the atomic operation to be performed. + /// - The target performs the operation using the sent data and value at the addressed location as + /// operands. + /// - The result is stored in the address location. + /// - A single response is given without data. + /// - Outbound data size is 1, 2, 4, or 8 bytes. + localparam ATOP_ATOMICSTORE = 2'b01; + /// Sends a single data value with an address and the atomic operation to be performed. + /// - The original data value at the addressed location is returned. + /// - The target performs the operation using the sent data and value at the addressed location as + /// operands. + /// - The result is stored in the address location. + /// - Outbound data size is 1, 2, 4, or 8 bytes. + /// - Inbound data size is the same as the outbound data size. + localparam ATOP_ATOMICLOAD = 2'b10; + // ATOP[3] + /// For AtomicStore and AtomicLoad transactions `AWATOP[3]` indicates the endianness that is + /// required for the atomic operation. The value of `AWATOP[3]` applies to arithmetic operations + /// only and is ignored for bitwise logical operations. + /// When deasserted, this bit indicates that the operation is little-endian. + localparam ATOP_LITTLE_END = 1'b0; + /// When asserted, this bit indicates that the operation is big-endian. + localparam ATOP_BIG_END = 1'b1; + // ATOP[2:0] + /// The value in memory is added to the sent data and the result stored in memory. + localparam ATOP_ADD = 3'b000; + /// Every set bit in the sent data clears the corresponding bit of the data in memory. + localparam ATOP_CLR = 3'b001; + /// Bitwise exclusive OR of the sent data and value in memory. + localparam ATOP_EOR = 3'b010; + /// Every set bit in the sent data sets the corresponding bit of the data in memory. + localparam ATOP_SET = 3'b011; + /// The value stored in memory is the maximum of the existing value and sent data. This operation + /// assumes signed data. + localparam ATOP_SMAX = 3'b100; + /// The value stored in memory is the minimum of the existing value and sent data. This operation + /// assumes signed data. + localparam ATOP_SMIN = 3'b101; + /// The value stored in memory is the maximum of the existing value and sent data. This operation + /// assumes unsigned data. + localparam ATOP_UMAX = 3'b110; + /// The value stored in memory is the minimum of the existing value and sent data. This operation + /// assumes unsigned data. + localparam ATOP_UMIN = 3'b111; + // ATOP[5] == 1'b1 indicated that an atomic transaction has a read response + // Ussage eg: if (req_i.aw.atop[axi_pkg::ATOP_R_RESP]) begin + localparam ATOP_R_RESP = 32'd5; + + // `xbar_latency_e` and `xbar_cfg_t` are documented in `doc/axi_xbar.md`. + /// Slice on Demux AW channel. + localparam logic [9:0] DemuxAw = (1 << 9); + /// Slice on Demux W channel. + localparam logic [9:0] DemuxW = (1 << 8); + /// Slice on Demux B channel. + localparam logic [9:0] DemuxB = (1 << 7); + /// Slice on Demux AR channel. + localparam logic [9:0] DemuxAr = (1 << 6); + /// Slice on Demux R channel. + localparam logic [9:0] DemuxR = (1 << 5); + /// Slice on Mux AW channel. + localparam logic [9:0] MuxAw = (1 << 4); + /// Slice on Mux W channel. + localparam logic [9:0] MuxW = (1 << 3); + /// Slice on Mux B channel. + localparam logic [9:0] MuxB = (1 << 2); + /// Slice on Mux AR channel. + localparam logic [9:0] MuxAr = (1 << 1); + /// Slice on Mux R channel. + localparam logic [9:0] MuxR = (1 << 0); + /// Latency configuration for `axi_xbar`. + typedef enum logic [9:0] { + NO_LATENCY = 10'b000_00_000_00, + CUT_SLV_AX = DemuxAw | DemuxAr, + CUT_MST_AX = MuxAw | MuxAr, + CUT_ALL_AX = DemuxAw | DemuxAr | MuxAw | MuxAr, + CUT_SLV_PORTS = DemuxAw | DemuxW | DemuxB | DemuxAr | DemuxR, + CUT_MST_PORTS = MuxAw | MuxW | MuxB | MuxAr | MuxR, + CUT_ALL_PORTS = 10'b111_11_111_11 + } xbar_latency_e; + + /// Configuration for `axi_xbar`. + typedef struct packed { + int unsigned NoSlvPorts; + int unsigned NoMstPorts; + int unsigned MaxMstTrans; + int unsigned MaxSlvTrans; + bit FallThrough; + xbar_latency_e LatencyMode; + int unsigned AxiIdWidthSlvPorts; + int unsigned AxiIdUsedSlvPorts; + bit UniqueIds; + int unsigned AxiAddrWidth; + int unsigned AxiDataWidth; + int unsigned NoAddrRules; + } xbar_cfg_t; + + /// Commonly used rule types for `axi_xbar` (64-bit addresses). + typedef struct packed { + int unsigned idx; + logic [63:0] start_addr; + logic [63:0] end_addr; + } xbar_rule_64_t; + + /// Commonly used rule types for `axi_xbar` (32-bit addresses). + typedef struct packed { + int unsigned idx; + logic [31:0] start_addr; + logic [31:0] end_addr; + } xbar_rule_32_t; +endpackage diff --git a/hw/ip/iDMA/include/axi/typedef.svh b/hw/ip/iDMA/include/axi/typedef.svh new file mode 100644 index 000000000..a2a860e50 --- /dev/null +++ b/hw/ip/iDMA/include/axi/typedef.svh @@ -0,0 +1,211 @@ +// Copyright (c) 2019 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: +// - Andreas Kurth +// - Florian Zaruba +// - Wolfgang Roenninger + +// Macros to define AXI and AXI-Lite Channel and Request/Response Structs + +`ifndef AXI_TYPEDEF_SVH_ +`define AXI_TYPEDEF_SVH_ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// AXI4+ATOP Channel and Request/Response Structs +// +// Usage Example: +// `AXI_TYPEDEF_AW_CHAN_T(axi_aw_t, axi_addr_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_W_CHAN_T(axi_w_t, axi_data_t, axi_strb_t, axi_user_t) +// `AXI_TYPEDEF_B_CHAN_T(axi_b_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_AR_CHAN_T(axi_ar_t, axi_addr_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_R_CHAN_T(axi_r_t, axi_data_t, axi_id_t, axi_user_t) +// `AXI_TYPEDEF_REQ_T(axi_req_t, axi_aw_t, axi_w_t, axi_ar_t) +// `AXI_TYPEDEF_RESP_T(axi_resp_t, axi_b_t, axi_r_t) +`define AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + addr_t addr; \ + axi_pkg::len_t len; \ + axi_pkg::size_t size; \ + axi_pkg::burst_t burst; \ + logic lock; \ + axi_pkg::cache_t cache; \ + axi_pkg::prot_t prot; \ + axi_pkg::qos_t qos; \ + axi_pkg::region_t region; \ + axi_pkg::atop_t atop; \ + user_t user; \ + } aw_chan_t; +`define AXI_TYPEDEF_W_CHAN_T(w_chan_t, data_t, strb_t, user_t) \ + typedef struct packed { \ + data_t data; \ + strb_t strb; \ + logic last; \ + user_t user; \ + } w_chan_t; +`define AXI_TYPEDEF_B_CHAN_T(b_chan_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + axi_pkg::resp_t resp; \ + user_t user; \ + } b_chan_t; +`define AXI_TYPEDEF_AR_CHAN_T(ar_chan_t, addr_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + addr_t addr; \ + axi_pkg::len_t len; \ + axi_pkg::size_t size; \ + axi_pkg::burst_t burst; \ + logic lock; \ + axi_pkg::cache_t cache; \ + axi_pkg::prot_t prot; \ + axi_pkg::qos_t qos; \ + axi_pkg::region_t region; \ + user_t user; \ + } ar_chan_t; +`define AXI_TYPEDEF_R_CHAN_T(r_chan_t, data_t, id_t, user_t) \ + typedef struct packed { \ + id_t id; \ + data_t data; \ + axi_pkg::resp_t resp; \ + logic last; \ + user_t user; \ + } r_chan_t; +`define AXI_TYPEDEF_REQ_T(req_t, aw_chan_t, w_chan_t, ar_chan_t) \ + typedef struct packed { \ + aw_chan_t aw; \ + logic aw_valid; \ + w_chan_t w; \ + logic w_valid; \ + logic b_ready; \ + ar_chan_t ar; \ + logic ar_valid; \ + logic r_ready; \ + } req_t; +`define AXI_TYPEDEF_RESP_T(resp_t, b_chan_t, r_chan_t) \ + typedef struct packed { \ + logic aw_ready; \ + logic ar_ready; \ + logic w_ready; \ + logic b_valid; \ + b_chan_t b; \ + logic r_valid; \ + r_chan_t r; \ + } resp_t; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// All AXI4+ATOP Channels and Request/Response Structs in One Macro +// +// This can be used whenever the user is not interested in "precise" control of the naming of the +// individual channels. +// +// Usage Example: +// `AXI_TYPEDEF_ALL(axi, addr_t, id_t, data_t, strb_t, user_t) +// +// This defines `axi_req_t` and `axi_resp_t` request/response structs as well as `axi_aw_chan_t`, +// `axi_w_chan_t`, `axi_b_chan_t`, `axi_ar_chan_t`, and `axi_r_chan_t` channel structs. +`define AXI_TYPEDEF_ALL(__name, __addr_t, __id_t, __data_t, __strb_t, __user_t) \ + `AXI_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t, __id_t, __user_t) \ + `AXI_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t, __user_t) \ + `AXI_TYPEDEF_B_CHAN_T(__name``_b_chan_t, __id_t, __user_t) \ + `AXI_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t, __id_t, __user_t) \ + `AXI_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t, __id_t, __user_t) \ + `AXI_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ + `AXI_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// AXI4-Lite Channel and Request/Response Structs +// +// Usage Example: +// `AXI_LITE_TYPEDEF_AW_CHAN_T(axi_lite_aw_t, axi_lite_addr_t) +// `AXI_LITE_TYPEDEF_W_CHAN_T(axi_lite_w_t, axi_lite_data_t, axi_lite_strb_t) +// `AXI_LITE_TYPEDEF_B_CHAN_T(axi_lite_b_t) +// `AXI_LITE_TYPEDEF_AR_CHAN_T(axi_lite_ar_t, axi_lite_addr_t) +// `AXI_LITE_TYPEDEF_R_CHAN_T(axi_lite_r_t, axi_lite_data_t) +// `AXI_LITE_TYPEDEF_REQ_T(axi_lite_req_t, axi_lite_aw_t, axi_lite_w_t, axi_lite_ar_t) +// `AXI_LITE_TYPEDEF_RESP_T(axi_lite_resp_t, axi_lite_b_t, axi_lite_r_t) +`define AXI_LITE_TYPEDEF_AW_CHAN_T(aw_chan_lite_t, addr_t) \ + typedef struct packed { \ + addr_t addr; \ + axi_pkg::prot_t prot; \ + } aw_chan_lite_t; +`define AXI_LITE_TYPEDEF_W_CHAN_T(w_chan_lite_t, data_t, strb_t) \ + typedef struct packed { \ + data_t data; \ + strb_t strb; \ + } w_chan_lite_t; +`define AXI_LITE_TYPEDEF_B_CHAN_T(b_chan_lite_t) \ + typedef struct packed { \ + axi_pkg::resp_t resp; \ + } b_chan_lite_t; +`define AXI_LITE_TYPEDEF_AR_CHAN_T(ar_chan_lite_t, addr_t) \ + typedef struct packed { \ + addr_t addr; \ + axi_pkg::prot_t prot; \ + } ar_chan_lite_t; +`define AXI_LITE_TYPEDEF_R_CHAN_T(r_chan_lite_t, data_t) \ + typedef struct packed { \ + data_t data; \ + axi_pkg::resp_t resp; \ + } r_chan_lite_t; +`define AXI_LITE_TYPEDEF_REQ_T(req_lite_t, aw_chan_lite_t, w_chan_lite_t, ar_chan_lite_t) \ + typedef struct packed { \ + aw_chan_lite_t aw; \ + logic aw_valid; \ + w_chan_lite_t w; \ + logic w_valid; \ + logic b_ready; \ + ar_chan_lite_t ar; \ + logic ar_valid; \ + logic r_ready; \ + } req_lite_t; +`define AXI_LITE_TYPEDEF_RESP_T(resp_lite_t, b_chan_lite_t, r_chan_lite_t) \ + typedef struct packed { \ + logic aw_ready; \ + logic w_ready; \ + b_chan_lite_t b; \ + logic b_valid; \ + logic ar_ready; \ + r_chan_lite_t r; \ + logic r_valid; \ + } resp_lite_t; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// All AXI4-Lite Channels and Request/Response Structs in One Macro +// +// This can be used whenever the user is not interested in "precise" control of the naming of the +// individual channels. +// +// Usage Example: +// `AXI_LITE_TYPEDEF_ALL(axi_lite, addr_t, data_t, strb_t) +// +// This defines `axi_lite_req_t` and `axi_lite_resp_t` request/response structs as well as +// `axi_lite_aw_chan_t`, `axi_lite_w_chan_t`, `axi_lite_b_chan_t`, `axi_lite_ar_chan_t`, and +// `axi_lite_r_chan_t` channel structs. +`define AXI_LITE_TYPEDEF_ALL(__name, __addr_t, __data_t, __strb_t) \ + `AXI_LITE_TYPEDEF_AW_CHAN_T(__name``_aw_chan_t, __addr_t) \ + `AXI_LITE_TYPEDEF_W_CHAN_T(__name``_w_chan_t, __data_t, __strb_t) \ + `AXI_LITE_TYPEDEF_B_CHAN_T(__name``_b_chan_t) \ + `AXI_LITE_TYPEDEF_AR_CHAN_T(__name``_ar_chan_t, __addr_t) \ + `AXI_LITE_TYPEDEF_R_CHAN_T(__name``_r_chan_t, __data_t) \ + `AXI_LITE_TYPEDEF_REQ_T(__name``_req_t, __name``_aw_chan_t, __name``_w_chan_t, __name``_ar_chan_t) \ + `AXI_LITE_TYPEDEF_RESP_T(__name``_resp_t, __name``_b_chan_t, __name``_r_chan_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +`endif diff --git a/hw/ip/iDMA/include/idma/idma_pkg.sv b/hw/ip/iDMA/include/idma/idma_pkg.sv new file mode 100644 index 000000000..bbd87bb0f --- /dev/null +++ b/hw/ip/iDMA/include/idma/idma_pkg.sv @@ -0,0 +1,82 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +/// iDMA Package +/// Contains all static type definitions +package idma_pkg; + + /// Error Handling Capabilities + /// - `NO_ERROR_HANDLING`: No error handling hardware is present + /// - `ERROR_HANDLING`: Error handling hardware is present + typedef enum logic [0:0] { + NO_ERROR_HANDLING, + ERROR_HANDLING + } error_cap_e; + + /// Error Handling Type + typedef logic [0:0] idma_eh_req_t; + + /// Error Handling Action + /// - `CONTINUE`: The current 1D transfer will just be continued + /// - `ABORT`: The current 1D transfer will be aborted + typedef enum logic [0:0] { + CONTINUE, + ABORT + } eh_action_e; + + /// Error Type type + typedef logic [1:0] err_type_t; + + /// Error Type + /// - `BUS_READ`: Error happened during a manager bus read + /// - `BUS_WRITE`: Error happened during a manager bus write + /// - `BACKEND`: Internal error to the backend; currently only transfer length == 0 + /// - `ND_MIDEND`: Internal error to the nd-midend; currently all number of repetitions are + /// zero + typedef enum logic [1:0] { + BUS_READ, + BUS_WRITE, + BACKEND, + ND_MIDEND + } err_type_e; + + /// iDMA busy type: contains the busy fields of the various sub units + typedef struct packed { + logic buffer_busy; + logic r_dp_busy; + logic w_dp_busy; + logic r_leg_busy; + logic w_leg_busy; + logic eh_fsm_busy; + logic eh_cnt_busy; + logic raw_coupler_busy; + } idma_busy_t; + + /// AXI4 option type: contains the AXI4 options fields + typedef struct packed { + axi_pkg::burst_t burst; + axi_pkg::cache_t cache; + logic lock; + axi_pkg::prot_t prot; + axi_pkg::qos_t qos; + axi_pkg::region_t region; + } axi_options_t; + + /// Backend option type: + /// - `decouple_aw`: `AWs` will only be sent after the first corresponding `R` is received + /// - `decouple_rw`: decouples the `R` and `W` channels completely: can cause deadlocks + /// - `*_max_llen`: the maximum log length of a burst + /// - `*_reduce_len`: should bursts be reduced in length? + typedef struct packed { + logic decouple_aw; + logic decouple_rw; + logic [2:0] src_max_llen; + logic [2:0] dst_max_llen; + logic src_reduce_len; + logic dst_reduce_len; + } backend_options_t; + +endpackage : idma_pkg diff --git a/hw/ip/iDMA/include/idma/typedef.svh b/hw/ip/iDMA/include/idma/typedef.svh new file mode 100644 index 000000000..936421e0b --- /dev/null +++ b/hw/ip/iDMA/include/idma/typedef.svh @@ -0,0 +1,95 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +// Macros to define iDMA structs + +`ifndef IDMA_TYPEDEF_SVH_ +`define IDMA_TYPEDEF_SVH_ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// iDMA Request and Response Structs +// +// Usage Example: +// `IDMA_TYPEDEF_OPTIONS_T(options_t, axi_id_t) +// `IDMA_TYPEDEF_ERR_PAYLOAD_T(err_payload_t, axi_addr_t) +// `IDMA_TYPEDEF_REQ_T(idma_req_t, tf_len_t, axi_addr_t, options_t) +// `IDMA_TYPEDEF_RSP_T(idma_rsp_t, err_payload_t) +`define IDMA_TYPEDEF_OPTIONS_T(options_t, axi_id_t) \ + typedef struct packed { \ + axi_id_t axi_id; \ + idma_pkg::axi_options_t src; \ + idma_pkg::axi_options_t dst; \ + idma_pkg::backend_options_t beo; \ + logic last; \ + } options_t; +`define IDMA_TYPEDEF_ERR_PAYLOAD_T(err_payload_t, axi_addr_t) \ + typedef struct packed { \ + axi_pkg::resp_t cause; \ + idma_pkg::err_type_t err_type; \ + axi_addr_t burst_addr; \ + } err_payload_t; +`define IDMA_TYPEDEF_REQ_T(idma_req_t, tf_len_t, axi_addr_t, options_t) \ + typedef struct packed { \ + tf_len_t length; \ + axi_addr_t src_addr; \ + axi_addr_t dst_addr; \ + options_t opt; \ + } idma_req_t; +`define IDMA_TYPEDEF_RSP_T(idma_rsp_t, err_payload_t) \ + typedef struct packed { \ + logic last; \ + logic error; \ + err_payload_t pld; \ + } idma_rsp_t; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// iDMA Full Request and Response Structs +// +// Usage Example: +// `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, axi_id_t, axi_addr_t, tf_len_t) +// `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, axi_addr_t) +`define IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, axi_id_t, axi_addr_t, tf_len_t) \ + `IDMA_TYPEDEF_OPTIONS_T(options_t, axi_id_t) \ + `IDMA_TYPEDEF_REQ_T(idma_req_t, tf_len_t, axi_addr_t, options_t) +`define IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, axi_addr_t) \ + `IDMA_TYPEDEF_ERR_PAYLOAD_T(err_payload_t, axi_addr_t) \ + `IDMA_TYPEDEF_RSP_T(idma_rsp_t, err_payload_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// iDMA n-dimensional Request Struct +// +// Usage Example: +// `IDMA_TYPEDEF_D_REQ_T(idma_d_req_t, reps_t, strides_t) +// `IDMA_TYPEDEF_ND_REQ_T(idma_nd_req_t, idma_req_t, idma_d_req_t) +`define IDMA_TYPEDEF_D_REQ_T(idma_d_req_t, reps_t, strides_t) \ + typedef struct packed { \ + reps_t reps; \ + strides_t src_strides; \ + strides_t dst_strides; \ + } idma_d_req_t; +`define IDMA_TYPEDEF_ND_REQ_T(idma_nd_req_t, idma_req_t, idma_d_req_t) \ + typedef struct packed { \ + idma_req_t burst_req; \ + idma_d_req_t [NumDim-2:0] d_req; \ + } idma_nd_req_t; +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// iDMA Full n-dimensional Request Struct +// +// Usage Example: +// `IDMA_TYPEDEF_FULL_ND_REQ_T(idma_nd_req_t, idma_req_t, reps_t, strides_t) +`define IDMA_TYPEDEF_FULL_ND_REQ_T(idma_nd_req_t, idma_req_t, reps_t, strides_t) \ + `IDMA_TYPEDEF_D_REQ_T(idma_d_req_t, reps_t, strides_t) \ + `IDMA_TYPEDEF_ND_REQ_T(idma_nd_req_t, idma_req_t, idma_d_req_t) +//////////////////////////////////////////////////////////////////////////////////////////////////// + +`endif diff --git a/hw/ip/iDMA/src/idma_axi_transport_layer.sv b/hw/ip/iDMA/src/idma_axi_transport_layer.sv new file mode 100644 index 000000000..0e02f7e8b --- /dev/null +++ b/hw/ip/iDMA/src/idma_axi_transport_layer.sv @@ -0,0 +1,469 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +`include "common_cells/registers.svh" + +/// Implementing the AXI4 transport layer in the iDMA backend. +module idma_axi_transport_layer #( + /// Data width + parameter int unsigned DataWidth = 32'd16, + /// The depth of the internal reorder buffer: + /// - '2': minimal possible configuration + /// - '3': efficiently handle misaligned transfers (recommended) + parameter int unsigned BufferDepth = 32'd3, + /// Mask invalid data on the manager interface + parameter bit MaskInvalidData = 1'b1, + /// Print the info of the FIFO configuration + parameter bit PrintFifoInfo = 1'b0, + /// `r_dp_req_t` type: + parameter type r_dp_req_t = logic, + /// `r_dp_req_t` type: + parameter type w_dp_req_t = logic, + /// `r_dp_req_t` type: + parameter type r_dp_rsp_t = logic, + /// `r_dp_req_t` type: + parameter type w_dp_rsp_t = logic, + /// AXI 4 `AW` channel type + parameter type axi_aw_chan_t = logic, + /// AXI 4 `AR` channel type + parameter type axi_ar_chan_t = logic, + /// AXI 4 Request channel type + parameter type axi_req_t = logic, + /// AXI 4 Response channel type + parameter type axi_rsp_t = logic +)( + /// Clock + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + /// Testmode in + input logic testmode_i, + + /// AXI4+ATOP manager port request + output axi_req_t axi_req_o, + /// AXI4+ATOP manager port response + input axi_rsp_t axi_rsp_i, + + /// Read datapath request + input r_dp_req_t r_dp_req_i, + /// Read datapath request valid + input logic r_dp_valid_i, + /// Read datapath request ready + output logic r_dp_ready_o, + + /// Read datapath response + output r_dp_rsp_t r_dp_rsp_o, + /// Read datapath response valid + output logic r_dp_valid_o, + /// Read datapath response valid + input logic r_dp_ready_i, + + /// Write datapath request + input w_dp_req_t w_dp_req_i, + /// Write datapath request valid + input logic w_dp_valid_i, + /// Write datapath request ready + output logic w_dp_ready_o, + + /// Write datapath response + output w_dp_rsp_t w_dp_rsp_o, + /// Write datapath response valid + output logic w_dp_valid_o, + /// Write datapath response valid + input logic w_dp_ready_i, + + /// Read meta request + input axi_ar_chan_t ar_req_i, + /// Read meta request valid + input logic ar_valid_i, + /// Read meta request ready + output logic ar_ready_o, + + /// Write meta request + input axi_aw_chan_t aw_req_i, + /// Write meta request valid + input logic aw_valid_i, + /// Write meta request ready + output logic aw_ready_o, + + /// Datapath poison signal + input logic dp_poison_i, + + /// Read part of the datapath is busy + output logic r_dp_busy_o, + /// Write part of the datapath is busy + output logic w_dp_busy_o, + /// Buffer is busy + output logic buffer_busy_o +); + + /// Stobe width + localparam int unsigned StrbWidth = DataWidth / 8; + + /// Data type + typedef logic [DataWidth-1:0] data_t; + /// Offset type + typedef logic [StrbWidth-1:0] strb_t; + /// Byte type + typedef logic [7:0] byte_t; + + // offsets needed for masks to fill/empty buffer + strb_t r_first_mask; + strb_t r_last_mask; + strb_t w_first_mask; + strb_t w_last_mask; + + // hold one bit state: it this the first read? + logic first_r_d, first_r_q; + + // shifted data flowing into the buffer + byte_t [StrbWidth-1:0] buffer_in; + + // read aligned in mask. needs to be shifted together with the data before + // it can be used to mask valid data flowing into the buffer + strb_t read_aligned_in_mask; + + // in mask is write aligned: it is the result of the read aligned in mask + // that is shifted together with the data in the barrel shifter + strb_t mask_in; + + // inbound control signals to the read buffer: controlled by the read process + strb_t buffer_in_valid; + strb_t buffer_in_ready; + logic in_valid; + logic in_ready; + + // corresponds to the strobe: the write aligned data that is currently valid in the buffer + strb_t mask_out; + + // write signals: is this the first / last element in a burst? + logic first_w; + logic last_w; + + // aligned and coalesced data leaving the buffer + byte_t [StrbWidth-1:0] buffer_out; + + // A temporary signal required to write the output of the buffer to before assigning it to + // the AXI bus. This is required to be compatible with some of the Questasim Versions and some + // of the parametrizations (e.g. DataWidth = 16) + data_t buffer_data_masked; + + // outbound control signals of the buffer: controlled by the write process + strb_t buffer_out_valid; + strb_t buffer_out_ready; + + // write happens + logic write_happening; + // buffer is ready to write the requested data + logic ready_to_write; + // first transfer is possible - this signal is used to detect + // the first write transfer in a burst + logic first_possible; + // buffer is completely empty + logic buffer_clean; + + // we require a counter to hold the current beat in the burst + logic [7:0] w_num_beats_d, w_num_beats_q; + logic w_cnt_valid_d, w_cnt_valid_q; + + + //-------------------------------------- + // Mask pre-calculation + //-------------------------------------- + // in contiguous transfers that are unaligned, there will be some + // invalid bytes at the beginning and the end of the stream + // example: 25B in 64 bit system + // iiiivvvv|vvvvvvvv|vvvvvvvv|vvvvviii + // first msk|----full mask----|last msk + + // read align masks + assign r_first_mask = '1 << r_dp_req_i.offset; + assign r_last_mask = '1 >> (StrbWidth - r_dp_req_i.tailer); + + // write align masks + assign w_first_mask = '1 << w_dp_req_i.offset; + assign w_last_mask = '1 >> (StrbWidth - w_dp_req_i.tailer); + + + //-------------------------------------- + // Read meta channel + //-------------------------------------- + // connect the ar requests to the AXI bus + assign axi_req_o.ar = ar_req_i; + assign axi_req_o.ar_valid = ar_valid_i; + assign ar_ready_o = axi_rsp_i.ar_ready; + + + //-------------------------------------- + // In mask generation + //-------------------------------------- + // in the case of unaligned reads -> not all data is valid + always_comb begin : proc_in_mask_generator + // default case: all ones + read_aligned_in_mask = '1; + // is first word: some bytes at the beginning may be invalid + read_aligned_in_mask = first_r_q ? + read_aligned_in_mask & r_first_mask : read_aligned_in_mask; + // is last word in write burst: some bytes at the end may be invalid + if (r_dp_req_i.tailer != '0) begin + read_aligned_in_mask = axi_rsp_i.r.last ? + read_aligned_in_mask & r_last_mask : read_aligned_in_mask; + end + end + + + //-------------------------------------- + // Barrel shifter + //-------------------------------------- + // data arrives in chunks of length DATA_WDITH, the buffer will be filled with + // the realigned data. StrbWidth bytes will be inserted starting from the + // provided address, overflows will naturally wrap + + // a barrel shifter is a concatenation of the same array with twice and a normal + // shift. Optimized for Synopsys DesignWare. + assign buffer_in = {axi_rsp_i.r.data, axi_rsp_i.r.data} >> (r_dp_req_i.shift * 8); + assign mask_in = {read_aligned_in_mask, read_aligned_in_mask} >> r_dp_req_i.shift; + + + //-------------------------------------- + // Read control + //-------------------------------------- + // controls the next state of the read flag + always_comb begin : proc_first_read + // sticky is first bit for read + if (!axi_rsp_i.r.last & axi_rsp_i.r_valid & axi_req_o.r_ready) begin + // new transfer has started + first_r_d = 1'b0; + end else if (axi_rsp_i.r.last & axi_rsp_i.r_valid & axi_req_o.r_ready) begin + // finish read burst + first_r_d = 1'b1; + end else begin + // no change + first_r_d = first_r_q; + end + end + + // the buffer can be pushed to if all the masked FIFO buffers (mask_in) are ready. + assign in_ready = &(buffer_in_ready | ~mask_in); + // the read can accept data if the buffer is ready and the response channel is ready + assign axi_req_o.r_ready = in_ready & r_dp_ready_i; + + // once valid data is applied, it can be pushed in all the selected (mask_in) buffers + // be sure the response channel is ready + assign in_valid = axi_rsp_i.r_valid & in_ready & r_dp_ready_i; + assign buffer_in_valid = in_valid ? mask_in : '0; + + // r_dp_ready_o is triggered by the last element arriving from the read + assign r_dp_ready_o = r_dp_valid_i & r_dp_ready_i & + axi_rsp_i.r.last & axi_rsp_i.r_valid & in_ready; + + // connect r_dp response payload + assign r_dp_rsp_o.resp = axi_rsp_i.r.resp; + assign r_dp_rsp_o.last = axi_rsp_i.r.last; + assign r_dp_rsp_o.first = first_r_q; + + // r_dp_valid_o is triggered once the last element is here or an error occurs + assign r_dp_valid_o = axi_rsp_i.r_valid & in_ready & (axi_rsp_i.r.last | (|axi_rsp_i.r.resp)); + + + //-------------------------------------- + // Write meta channel + //-------------------------------------- + // connect the aw requests to the AXI bus + assign axi_req_o.aw = aw_req_i; + assign axi_req_o.aw_valid = aw_valid_i; + assign aw_ready_o = axi_rsp_i.aw_ready; + + + //-------------------------------------- + // Out mask generation -> (wstrb mask) + //-------------------------------------- + // only pop the data actually needed for write from the buffer, + // determine valid data to pop by calculation the wstrb + always_comb begin : proc_out_mask_generator + // default case: all ones + mask_out = '1; + // is first word: some bytes at the beginning may be invalid + mask_out = first_w ? (mask_out & w_first_mask) : mask_out; + // is last word in write burst: some bytes at the end may be invalid + if (w_dp_req_i.tailer != '0 & last_w) begin + mask_out = mask_out & w_last_mask; + end + end + + + //-------------------------------------- + // Write control + //-------------------------------------- + // write is decoupled from read, due to misalignment in the read/write + // addresses, page crossing can be encountered at any time. + // To handle this efficiently, a 2-to-1 or 1-to-2 mapping of r/w beats + // is required. The write unit needs to keep track of progress through + // a counter and cannot use `r last` for that. + + // Once buffer contains a full line -> all FIFOs are non-empty push it out. + + // all elements needed (defined by the mask) are in the buffer and the buffer is non-empty + assign ready_to_write = ((buffer_out_valid & mask_out) == mask_out) & (buffer_out_valid != '0); + + // data needed by the first mask is available in the buffer -> r_first happened for sure + // this signal can be high during a transfer as well, it needs to be masked + assign first_possible = ((buffer_out_valid & w_first_mask) == w_first_mask) & + (buffer_out_valid != '0); + + // the buffer is completely empty and idle + assign buffer_clean = &(~buffer_out_valid); + + // write happening: both the bus (w_ready) and the buffer (ready_to_write) is high + assign write_happening = ready_to_write & axi_rsp_i.w_ready; + + // the main buffer is conditionally to the write mask popped + assign buffer_out_ready = write_happening ? mask_out : '0; + + // signal the bus that we are ready + assign axi_req_o.w_valid = ready_to_write; + + // connect data and strobe either directly or mask invalid data + if (MaskInvalidData) begin : gen_mask_invalid_data + + // always_comb process implements masking of invalid data + always_comb begin : proc_mask + // defaults + axi_req_o.w.data = '0; + axi_req_o.w.strb = '0; + buffer_data_masked = '0; + // control the write to the bus apply data to the bus only if data should be written + if (ready_to_write == 1'b1 & !dp_poison_i) begin + // assign data from buffers, mask non valid entries + for (int i = 0; i < StrbWidth; i++) begin + buffer_data_masked[i*8 +: 8] = mask_out[i] ? buffer_out[i] : 8'b0; + end + // assign the output + axi_req_o.w.data = buffer_data_masked; + // assign the out mask to the strobe + axi_req_o.w.strb = mask_out; + end + end + + end else begin : gen_direct_connect + // not used signal + assign buffer_data_masked = '0; + // simpler: direct connection + assign axi_req_o.w.data = buffer_out; + assign axi_req_o.w.strb = dp_poison_i ? '0 : mask_out; + end + + // the w last signal should only be applied to the bus if an actual transfer happens + assign axi_req_o.w.last = last_w & ready_to_write; + + // we are ready for the next transfer internally, once the w last signal is applied + assign w_dp_ready_o = last_w & write_happening; + + // the write process: keeps track of remaining beats in burst + always_comb begin : proc_write_control + // defaults: + // beat counter + w_num_beats_d = w_num_beats_q; + w_cnt_valid_d = w_cnt_valid_q; + // mask control + first_w = 1'b0; + last_w = 1'b0; + + // differentiate between the burst and non-burst case. If a transfer + // consists just of one beat the counters are disabled + if (w_dp_req_i.is_single) begin + // in the single case the transfer is both first and last. + first_w = 1'b1; + last_w = 1'b1; + + // in the burst case the counters are needed to keep track of the progress of sending + // beats. The w_last_o depends on the state of the counter + end else begin + // first transfer happens as soon as a) the buffer is ready for a first transfer and b) + // the counter is currently invalid + first_w = first_possible & ~w_cnt_valid_q; + + // last happens as soon as a) the counter is valid and b) the counter is now down to 1 + last_w = w_cnt_valid_q & (w_num_beats_q == 8'h01); + + // load the counter with data in a first cycle, only modifying state if bus is ready + if (first_w && write_happening) begin + w_num_beats_d = w_dp_req_i.num_beats; + w_cnt_valid_d = 1'b1; + end + + // if we hit the last element, invalidate the counter, only modifying state + // if bus is ready + if (last_w && write_happening) begin + w_cnt_valid_d = 1'b0; + end + + // count down the beats if the counter is valid and valid data is written to the bus + if (w_cnt_valid_q && write_happening) w_num_beats_d = w_num_beats_q - 8'h01; + end + end + + + //-------------------------------------- + // Write response + //-------------------------------------- + // connect w_dp response payload + assign w_dp_rsp_o.resp = axi_rsp_i.b.resp; + assign w_dp_rsp_o.user = axi_rsp_i.b.user; + + // w_dp_valid_o is triggered once the write answer is here + assign w_dp_valid_o = axi_rsp_i.b_valid; + + // create back pressure on the b channel if the higher parts of the DMA cannot accept more + // write responses + assign axi_req_o.b_ready = w_dp_ready_i; + + + //-------------------------------------- + // Write user signals + //-------------------------------------- + // in the default implementation: no need for the write user signals + assign axi_req_o.w.user = '0; + + + //-------------------------------------- + // Buffer + //-------------------------------------- + idma_buffer #( + .BufferDepth ( BufferDepth ), + .StrbWidth ( StrbWidth ), + .PrintFifoInfo ( PrintFifoInfo ), + .strb_t ( strb_t ), + .byte_t ( byte_t ) + ) i_idma_buffer ( + .clk_i, + .rst_ni, + .testmode_i, + .data_i ( buffer_in ), + .valid_i ( buffer_in_valid ), + .ready_o ( buffer_in_ready ), + .data_o ( buffer_out ), + .valid_o ( buffer_out_valid ), + .ready_i ( buffer_out_ready ) + ); + + + //-------------------------------------- + // Module Control + //-------------------------------------- + assign r_dp_busy_o = r_dp_valid_i | r_dp_ready_o; + assign w_dp_busy_o = w_dp_valid_i | w_dp_ready_o; + assign buffer_busy_o = !buffer_clean; + + + //-------------------------------------- + // State + //-------------------------------------- + `FF(first_r_q, first_r_d, '1, clk_i, rst_ni) + `FF(w_cnt_valid_q, w_cnt_valid_d, '0, clk_i, rst_ni) + `FF(w_num_beats_q, w_num_beats_d, '0, clk_i, rst_ni) + +endmodule : idma_axi_transport_layer diff --git a/hw/ip/iDMA/src/idma_backend.sv b/hw/ip/iDMA/src/idma_backend.sv new file mode 100644 index 000000000..73f6c7834 --- /dev/null +++ b/hw/ip/iDMA/src/idma_backend.sv @@ -0,0 +1,699 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +`include "../include/axi/typedef.svh" + +/// The iDMA backend implements an arbitrary 1D copy engine using the AXI4 protocol. +module idma_backend #( + /// Data width + parameter int unsigned DataWidth = 32'd16, + /// Address width + parameter int unsigned AddrWidth = 32'd24, + /// AXI user width + parameter int unsigned UserWidth = 32'd1, + /// AXI ID width + parameter int unsigned AxiIdWidth = 32'd1, + /// Number of transaction that can be in-flight concurrently + parameter int unsigned NumAxInFlight = 32'd2, + /// The depth of the internal reorder buffer: + /// - '2': minimal possible configuration + /// - '3': efficiently handle misaligned transfers (recommended) + parameter int unsigned BufferDepth = 32'd2, + /// With of a transfer: max transfer size is `2**TFLenWidth` bytes + parameter int unsigned TFLenWidth = 32'd24, + /// The depth of the memory system the backend is attached to + parameter int unsigned MemSysDepth = 32'd0, + /// Should the `R`-`AW` coupling hardware be present? (recommended) + parameter bit RAWCouplingAvail = 1'b1, + /// Mask invalid data on the manager interface + parameter bit MaskInvalidData = 1'b1, + /// Should hardware legalization be present? (recommended) + /// If not, software legalization is required to ensure the transfers are + /// AXI4-conformal + parameter bit HardwareLegalizer = 1'b1, + /// Reject zero-length transfers + parameter bit RejectZeroTransfers = 1'b1, + /// Should the error handler be present? + parameter idma_pkg::error_cap_e ErrorCap = idma_pkg::ERROR_HANDLING, + /// Print the info of the FIFO configuration + parameter bit PrintFifoInfo = 1'b0, + /// 1D iDMA request type + parameter type idma_req_t = logic, + /// iDMA response type + parameter type idma_rsp_t = logic, + /// Error Handler request type + parameter type idma_eh_req_t = logic, + /// iDMA busy signal + parameter type idma_busy_t = logic, + /// AXI4+ATOP request type + parameter type axi_req_t = logic, + /// AXI4+ATOP response type + parameter type axi_rsp_t = logic, + /// Strobe Width (do not override!) + parameter int unsigned StrbWidth = DataWidth / 8, + /// Offset Width (do not override!) + parameter int unsigned OffsetWidth = $clog2(StrbWidth) +)( + /// Clock + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + /// Testmode in + input logic testmode_i, + + /// 1D iDMA request + input idma_req_t idma_req_i, + /// 1D iDMA request valid + input logic req_valid_i, + /// 1D iDMA request ready + output logic req_ready_o, + + /// iDMA response + output idma_rsp_t idma_rsp_o, + /// iDMA response valid + output logic rsp_valid_o, + /// iDMA response ready + input logic rsp_ready_i, + + /// Error handler request + input idma_eh_req_t idma_eh_req_i, + /// Error handler request valid + input logic eh_req_valid_i, + /// Error handler request ready + output logic eh_req_ready_o, + + /// AXI4+ATOP manager port request + output axi_req_t axi_req_o, + /// AXI4+ATOP manager port response + input axi_rsp_t axi_rsp_i, + + /// iDMA busy flags + output idma_busy_t busy_o +); + + /// The localparam MetaFifoDepth holds the maximum number of transfers that can be + /// in-flight under any circumstances. + localparam int unsigned MetaFifoDepth = BufferDepth + NumAxInFlight + MemSysDepth; + + /// Address type + typedef logic [AddrWidth-1:0] addr_t; + /// DAta type + typedef logic [DataWidth-1:0] data_t; + /// Strobe type + typedef logic [StrbWidth-1:0] strb_t; + /// User type + typedef logic [UserWidth-1:0] user_t; + /// ID type + typedef logic [AxiIdWidth-1:0] id_t; + /// Offset type + typedef logic [OffsetWidth-1:0] offset_t; + /// Transfer length type + typedef logic [TFLenWidth-1:0] tf_len_t; + + // AXI4+ATOP define macros for the AX channels + `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, addr_t, id_t, user_t) + `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, addr_t, id_t, user_t) + + /// The datapath read request type holds all the information required to configure the read + /// part of the datapath. The type consists of: + /// - `offset`: The bus offset of the read + /// - `trailer`: How many empty bytes are required to pad the transfer to a multiple of the + /// bus width. + /// - `shift`: The amount the data needs to be shifted + /// - `decouple_aw`: If the transfer has the AW decoupled from the R + typedef struct packed { + offset_t offset; + offset_t tailer; + offset_t shift; + logic decouple_aw; + } r_dp_req_t; + + /// The datapath read response type provides feedback from the read part of the datapath: + /// - `resp`: The response from the R channel of the AXI4 manager interface + /// - `last`: The last flag from the R channel of the AXI4 manager interface + /// - `first`: Is the current item first beat in the burst + typedef struct packed { + axi_pkg::resp_t resp; + logic last; + logic first; + } r_dp_rsp_t; + + /// The datapath write request type holds all the information required to configure the write + /// part of the datapath. The type consists of: + /// - `offset`: The bus offset of the write + /// - `trailer`: How many empty bytes are required to pad the transfer to a multiple of the + /// bus width. + /// - `num_beats`: The number of beats this burst consist of + /// - `is_single`: Is this transfer just one beat long? `(len == 0)` + typedef struct packed { + offset_t offset; + offset_t tailer; + axi_pkg::len_t num_beats; + logic is_single; + } w_dp_req_t; + + /// The datapath write response type provides feedback from the write part of the datapath: + /// - `resp`: The response from the B channel of the AXI4 manager interface + /// - `user`: The user field from the B channel of the AXI4 manager interface + typedef struct packed { + axi_pkg::resp_t resp; + user_t user; + } w_dp_rsp_t; + + /// The iDMA read request bundles an `AR` type and a datapath read response type together. + typedef struct packed { + r_dp_req_t r_dp_req; + axi_ar_chan_t ar_req; + } idma_r_req_t; + + /// The iDMA write request bundles an `AW` type and a datapath write response type together. It + /// has an additional flags: + /// - `last`: indicating the current burst is the last one of the generic 1D transfer currently + /// being processed + /// - `midend_last`: The current transfer is marked by the controlling as last + /// - `decouple_aw`: indicates this is an R-AW decoupled transfer + typedef struct packed { + w_dp_req_t w_dp_req; + axi_aw_chan_t aw_req; + logic last; + logic super_last; + logic decouple_aw; + } idma_w_req_t; + + /// The mutable transfer options type holds important information that is mutated by the + /// `legalizer` block. + typedef struct packed { + offset_t shift; + logic decouple_rw; + logic decouple_aw; + logic [2:0] src_max_llen; + logic [2:0] dst_max_llen; + logic src_reduce_len; + logic dst_reduce_len; + id_t axi_id; + idma_pkg::axi_options_t src_axi_opt; + idma_pkg::axi_options_t dst_axi_opt; + logic super_last; + } idma_mut_tf_opt_t; + + /// The mutable transfer type holds important information that is mutated by the + /// `legalizer` block. + typedef struct packed { + tf_len_t length; + addr_t addr; + logic valid; + } idma_mut_tf_t; + + + // datapath busy indicates the datapath is actively working on a transfer. It is composed of + // the activity of the buffer as well as both the read and write machines + logic dp_busy; + // blanks invalid data + logic dp_poison; + + // read and write requests and their handshaking signals + idma_r_req_t r_req; + idma_w_req_t w_req; + logic r_valid, w_valid; + logic r_ready, w_ready; + + // It the current transfer the last burst in the 1D transfer? + logic w_last_burst; + logic w_last_ready; + + // Super last flag: The current transfer is indicated as the last one by the controlling + // unit; e.g. by a midend + logic w_super_last; + + // Datapath FIFO signals -> used to decouple legalizer and datapath + logic r_dp_req_in_ready , w_dp_req_in_ready; + logic r_dp_req_out_valid, w_dp_req_out_valid; + logic r_dp_req_out_ready, w_dp_req_out_ready; + r_dp_req_t r_dp_req_out; + w_dp_req_t w_dp_req_out; + + // datapah responses + r_dp_rsp_t r_dp_rsp; + w_dp_rsp_t w_dp_rsp; + logic r_dp_rsp_valid, w_dp_rsp_valid; + logic r_dp_rsp_ready, w_dp_rsp_ready; + + // Ax handshaking + logic ar_ready, ar_ready_dp; + logic aw_ready, aw_ready_dp; + logic aw_valid_dp, ar_valid_dp; + + // Ax request from R-AW coupler to datapath + axi_aw_chan_t aw_req_dp; + + // Ax request from the decoupling stage to the datapath + axi_ar_chan_t ar_req_dp; + + // flush and preemptively empty the legalizer + logic legalizer_flush, legalizer_kill; + + /// intermediate signals to reject zero length transfers + logic is_length_zero; + logic req_valid; + idma_rsp_t idma_rsp; + logic rsp_valid; + logic rsp_ready; + + + //-------------------------------------- + // Reject Zero Length Transfers + //-------------------------------------- + if (RejectZeroTransfers) begin : gen_reject_zero_transfers + // is the current transfer length 0? + assign is_length_zero = idma_req_i.length == '0; + + // bypass valid as long as length is not zero, otherwise suppress it + assign req_valid = is_length_zero ? 1'b0 : req_valid_i; + + // modify response + always_comb begin : proc_modify_response_zero_length + // default: bypass + idma_rsp_o = idma_rsp; + rsp_ready = rsp_ready_i; + rsp_valid_o = rsp_valid; + + // a zero transfer happens + if (is_length_zero & req_valid_i & req_ready_o) begin + // block backend + rsp_ready = 1'b0; + // generate new response + rsp_valid_o = 1'b1; + idma_rsp_o = '0; + idma_rsp_o.error = 1'b1; + idma_rsp_o.pld.err_type = idma_pkg::BACKEND; + end + end + + // just bypass signals + end else begin : gen_bypass_zero_transfers + // bypass + assign req_valid = req_valid_i; + assign idma_rsp_o = idma_rsp; + assign rsp_ready = rsp_ready_i; + assign rsp_valid_o = rsp_valid; + end + + + //-------------------------------------- + // Legalization + //-------------------------------------- + if (HardwareLegalizer) begin : gen_hw_legalizer + // hardware legalizer is present + idma_legalizer #( + .DataWidth ( DataWidth ), + .AddrWidth ( AddrWidth ), + .idma_req_t ( idma_req_t ), + .idma_r_req_t ( idma_r_req_t ), + .idma_w_req_t ( idma_w_req_t ), + .idma_mut_tf_t ( idma_mut_tf_t ), + .idma_mut_tf_opt_t ( idma_mut_tf_opt_t ) + ) i_idma_legalizer ( + .clk_i, + .rst_ni, + .req_i ( idma_req_i ), + .valid_i ( req_valid ), + .ready_o ( req_ready_o ), + .r_req_o ( r_req ), + .w_req_o ( w_req ), + .r_valid_o ( r_valid ), + .w_valid_o ( w_valid ), + .r_ready_i ( r_ready ), + .w_ready_i ( w_ready ), + .flush_i ( legalizer_flush ), + .kill_i ( legalizer_kill ), + .r_busy_o ( busy_o.r_leg_busy ), + .w_busy_o ( busy_o.w_leg_busy ) + ); + + end else begin : gen_no_hw_legalizer + // stream fork is used to synchronize the two decoupled channels without the need for a + // FIFO here. + stream_fork #( + .N_OUP ( 32'd2 ) + ) i_stream_fork ( + .clk_i, + .rst_ni, + .valid_i ( req_valid ), + .ready_o ( req_ready_o ), + .valid_o ( { r_valid, w_valid } ), + .ready_i ( { r_ready, w_ready } ) + ); + + // local signal holding the length -> explicitly only doing the computation once + axi_pkg::len_t len; + assign len = ((idma_req_i.length + idma_req_i.src_addr[OffsetWidth-1:0] - + 'd1) >> OffsetWidth); + + // assemble AR request + assign r_req.ar_req = '{ + id: idma_req_i.opt.axi_id, + addr: { idma_req_i.src_addr[AddrWidth-1:OffsetWidth], {{OffsetWidth}{1'b0}} }, + len: len, + size: axi_pkg::size_t'(OffsetWidth), + burst: idma_req_i.opt.src.burst, + lock: idma_req_i.opt.src.lock, + cache: idma_req_i.opt.src.cache, + prot: idma_req_i.opt.src.prot, + qos: idma_req_i.opt.src.qos, + region: idma_req_i.opt.src.region, + user: '0 + }; + + // assemble AW request + assign w_req.aw_req = '{ + id: idma_req_i.opt.axi_id, + addr: { idma_req_i.dst_addr[AddrWidth-1:OffsetWidth], {{OffsetWidth}{1'b0}} }, + len: len, + size: axi_pkg::size_t'(OffsetWidth), + burst: idma_req_i.opt.dst.burst, + lock: idma_req_i.opt.dst.lock, + cache: idma_req_i.opt.dst.cache, + prot: idma_req_i.opt.dst.prot, + qos: idma_req_i.opt.dst.qos, + region: idma_req_i.opt.dst.region, + user: '0, + atop: '0 + }; + + // assemble read datapath request + assign r_req.r_dp_req = '{ + offset: idma_req_i.src_addr[OffsetWidth-1:0], + tailer: OffsetWidth'(idma_req_i.length + idma_req_i.src_addr[OffsetWidth-1:0]), + shift: OffsetWidth'(idma_req_i.src_addr[OffsetWidth-1:0] - + idma_req_i.dst_addr[OffsetWidth-1:0]), + decouple_aw: idma_req_i.opt.beo.decouple_aw + }; + + // assemble write datapath request + assign w_req.w_dp_req = '{ + offset: idma_req_i.dst_addr[OffsetWidth-1:0], + tailer: OffsetWidth'(idma_req_i.length + idma_req_i.dst_addr[OffsetWidth-1:0]), + num_beats: len, + is_single: len == '0 + }; + + // if the legalizer is bypassed; every burst is the last of the 1D transfer + assign w_req.last = 1'b1; + + // assign the last flag of the controlling unit + assign w_req.super_last = idma_req_i.opt.last; + + // bypass decouple signal + assign w_req.decouple_aw = idma_req_i.opt.beo.decouple_aw; + + // there is no unit to be busy + assign busy_o.r_leg_busy = 1'b0; + assign busy_o.w_leg_busy = 1'b0; + end + + // data path, meta channels, and last queues have to be ready for the legalizer to be ready + assign r_ready = r_dp_req_in_ready & ar_ready; + assign w_ready = w_dp_req_in_ready & aw_ready & w_last_ready; + + + //-------------------------------------- + // Error handler + //-------------------------------------- + if (ErrorCap == idma_pkg::ERROR_HANDLING) begin : gen_error_handler + idma_error_handler #( + .MetaFifoDepth ( MetaFifoDepth ), + .PrintFifoInfo ( PrintFifoInfo ), + .idma_rsp_t ( idma_rsp_t ), + .idma_eh_req_t ( idma_eh_req_t ), + .addr_t ( addr_t ), + .r_dp_rsp_t ( r_dp_rsp_t ), + .w_dp_rsp_t ( w_dp_rsp_t ) + ) i_idma_error_handler ( + .clk_i, + .rst_ni, + .testmode_i, + .rsp_o ( idma_rsp ), + .rsp_valid_o ( rsp_valid ), + .rsp_ready_i ( rsp_ready ), + .req_valid_i ( req_valid ), + .req_ready_i ( req_ready_o ), + .eh_i ( idma_eh_req_i ), + .eh_valid_i ( eh_req_valid_i ), + .eh_ready_o ( eh_req_ready_o ), + .r_addr_i ( r_req.ar_req.addr ), + .r_consume_i ( r_valid & r_ready ), + .w_addr_i ( w_req.aw_req.addr ), + .w_consume_i ( w_valid & w_ready ), + .legalizer_flush_o ( legalizer_flush ), + .legalizer_kill_o ( legalizer_kill ), + .dp_busy_i ( dp_busy ), + .dp_poison_o ( dp_poison ), + .r_dp_rsp_i ( r_dp_rsp ), + .r_dp_valid_i ( r_dp_rsp_valid ), + .r_dp_ready_o ( r_dp_rsp_ready ), + .w_dp_rsp_i ( w_dp_rsp ), + .w_dp_valid_i ( w_dp_rsp_valid ), + .w_dp_ready_o ( w_dp_rsp_ready ), + .w_last_burst_i ( w_last_burst ), + .w_super_last_i ( w_super_last ), + .fsm_busy_o ( busy_o.eh_fsm_busy ), + .cnt_busy_o ( busy_o.eh_cnt_busy ) + ); + end else if (ErrorCap == idma_pkg::NO_ERROR_HANDLING) begin : gen_no_error_handler + // bypass the signals, assign their neutral values + assign idma_rsp.error = 1'b0; + assign idma_rsp.pld = 1'b0; + assign idma_rsp.last = w_super_last; + assign rsp_valid = w_dp_rsp_valid & w_last_burst; + assign eh_req_ready_o = 1'b0; + assign legalizer_flush = 1'b0; + assign legalizer_kill = 1'b0; + assign dp_poison = 1'b0; + assign r_dp_rsp_ready = rsp_ready; + assign w_dp_rsp_ready = rsp_ready; + assign busy_o.eh_fsm_busy = 1'b0; + assign busy_o.eh_cnt_busy = 1'b0; + + end else begin : gen_param_error + $fatal(1, "Unexpected Error Capability"); + end + + + //-------------------------------------- + // Datapath busy signal + //-------------------------------------- + assign dp_busy = busy_o.buffer_busy | + busy_o.r_dp_busy | + busy_o.w_dp_busy; + + + //-------------------------------------- + // Datapath decoupling + //-------------------------------------- + idma_stream_fifo #( + .Depth ( NumAxInFlight ), + .type_t ( r_dp_req_t ), + .PrintInfo ( PrintFifoInfo ) + ) i_r_dp_req ( + .clk_i, + .rst_ni, + .testmode_i, + .flush_i ( 1'b0 ), + .usage_o ( /* NOT CONNECTED */ ), + .data_i ( r_req.r_dp_req ), + .valid_i ( r_valid ), + .ready_o ( r_dp_req_in_ready ), + .data_o ( r_dp_req_out ), + .valid_o ( r_dp_req_out_valid ), + .ready_i ( r_dp_req_out_ready ) + ); + + idma_stream_fifo #( + .Depth ( NumAxInFlight ), + .type_t ( w_dp_req_t ), + .PrintInfo ( PrintFifoInfo ) + ) i_w_dp_req ( + .clk_i, + .rst_ni, + .testmode_i, + .flush_i ( 1'b0 ), + .usage_o ( /* NOT CONNECTED */ ), + .data_i ( w_req.w_dp_req ), + .valid_i ( w_valid ), + .ready_o ( w_dp_req_in_ready ), + .data_o ( w_dp_req_out ), + .valid_o ( w_dp_req_out_valid ), + .ready_i ( w_dp_req_out_ready ) + ); + + // Add fall-through register to allow the input to be ready if the output is not. This + // does not add a cycle of delay + fall_through_register #( + .T ( axi_ar_chan_t ) + ) i_ar_fall_through_register ( + .clk_i, + .rst_ni, + .testmode_i, + .clr_i ( 1'b0 ), + .valid_i ( r_valid ), + .ready_o ( ar_ready ), + .data_i ( r_req.ar_req ), + .valid_o ( ar_valid_dp ), + .ready_i ( ar_ready_dp ), + .data_o ( ar_req_dp ) + ); + + + //-------------------------------------- + // Last flag store + //-------------------------------------- + idma_stream_fifo #( + .Depth ( MetaFifoDepth ), + .type_t ( logic [1:0] ), + .PrintInfo ( PrintFifoInfo ) + ) i_w_last ( + .clk_i, + .rst_ni, + .testmode_i, + .flush_i ( 1'b0 ), + .usage_o ( /* NOT CONNECTED */ ), + .data_i ( {w_req.super_last, w_req.last} ), + .valid_i ( w_valid & w_ready ), + .ready_o ( w_last_ready ), + .data_o ( {w_super_last, w_last_burst} ), + .valid_o ( /* NOT CONNECTED */ ), + .ready_i ( w_dp_rsp_valid & w_dp_rsp_ready ) + ); + + + //-------------------------------------- + // Transport Layer / Datapath + //-------------------------------------- + idma_axi_transport_layer #( + .DataWidth ( DataWidth ), + .BufferDepth ( BufferDepth ), + .MaskInvalidData ( MaskInvalidData ), + .PrintFifoInfo ( PrintFifoInfo ), + .r_dp_req_t ( r_dp_req_t ), + .w_dp_req_t ( w_dp_req_t ), + .r_dp_rsp_t ( r_dp_rsp_t ), + .w_dp_rsp_t ( w_dp_rsp_t ), + .axi_aw_chan_t ( axi_aw_chan_t ), + .axi_ar_chan_t ( axi_ar_chan_t ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_rsp_t ) + ) i_idma_axi_transport_layer ( + .clk_i, + .rst_ni, + .testmode_i, + .axi_req_o, + .axi_rsp_i, + .r_dp_req_i ( r_dp_req_out ), + .r_dp_valid_i ( r_dp_req_out_valid ), + .r_dp_ready_o ( r_dp_req_out_ready ), + .r_dp_rsp_o ( r_dp_rsp ), + .r_dp_valid_o ( r_dp_rsp_valid ), + .r_dp_ready_i ( r_dp_rsp_ready ), + .w_dp_req_i ( w_dp_req_out ), + .w_dp_valid_i ( w_dp_req_out_valid ), + .w_dp_ready_o ( w_dp_req_out_ready ), + .w_dp_rsp_o ( w_dp_rsp ), + .w_dp_valid_o ( w_dp_rsp_valid ), + .w_dp_ready_i ( w_dp_rsp_ready ), + .ar_req_i ( ar_req_dp ), + .ar_valid_i ( ar_valid_dp ), + .ar_ready_o ( ar_ready_dp ), + .aw_req_i ( aw_req_dp ), + .aw_valid_i ( aw_valid_dp ), + .aw_ready_o ( aw_ready_dp ), + .dp_poison_i ( dp_poison ), + .r_dp_busy_o ( busy_o.r_dp_busy ), + .w_dp_busy_o ( busy_o.w_dp_busy ), + .buffer_busy_o ( busy_o.buffer_busy ) + ); + + + //-------------------------------------- + // R-AW channel coupler + //-------------------------------------- + if (RAWCouplingAvail) begin : gen_r_aw_coupler + // instantiate the channel coupler + idma_channel_coupler #( + .NumAxInFlight ( NumAxInFlight ), + .AddrWidth ( AddrWidth ), + .UserWidth ( UserWidth ), + .AxiIdWidth ( AxiIdWidth ), + .PrintFifoInfo ( PrintFifoInfo ), + .axi_aw_chan_t ( axi_aw_chan_t ) + ) i_idma_channel_coupler ( + .clk_i, + .rst_ni, + .testmode_i, + .r_rsp_valid_i ( axi_rsp_i.r_valid ), + .r_rsp_ready_i ( axi_req_o.r_ready ), + .r_rsp_first_i ( r_dp_rsp.first ), + .r_decouple_aw_i ( r_dp_req_out.decouple_aw ), + .aw_decouple_aw_i ( w_req.decouple_aw ), + .aw_req_i ( w_req.aw_req ), + .aw_valid_i ( w_valid ), + .aw_ready_o ( aw_ready ), + .aw_req_o ( aw_req_dp ), + .aw_valid_o ( aw_valid_dp ), + .aw_ready_i ( aw_ready_dp ), + .busy_o ( busy_o.raw_coupler_busy ) + ); + end else begin : gen_r_aw_bypass + + // Add fall-through register to allow the input to be ready if the output is not. This + // does not add a cycle of delay + fall_through_register #( + .T ( axi_aw_chan_t ) + ) i_aw_fall_through_register ( + .clk_i, + .rst_ni, + .testmode_i, + .clr_i ( 1'b0 ), + .valid_i ( w_valid ), + .ready_o ( aw_ready ), + .data_i ( w_req.aw_req ), + .valid_o ( aw_valid_dp ), + .ready_i ( aw_ready_dp ), + .data_o ( aw_req_dp ) + ); + + // no unit: not busy + assign busy_o.raw_coupler_busy = 1'b0; + end + + + //-------------------------------------- + // Assertions + //-------------------------------------- + // pragma translate_off + `ifndef VERILATOR + initial begin : proc_assert_params + axi_addr_width : assert(AddrWidth >= 32'd12) else + $fatal(1, "Parameter `AddrWidth` has to be >= 12!"); + axi_id_width : assert(AxiIdWidth > 32'd0) else + $fatal(1, "Parameter `AxiIdWidth` has to be > 0!"); + axi_data_width : assert(DataWidth inside {32'd16, 32'd32, 32'd64, 32'd128, 32'd256, + 32'd512, 32'd1028}) else + $fatal(1, "Parameter `DataWidth` has to be at least 16 and inside the AXI4 spec!"); + axi_user_width : assert(UserWidth > 32'd0) else + $fatal(1, "Parameter `UserWidth` has to be > 0!"); + num_ax_in_flight : assert(NumAxInFlight > 32'd1) else + $fatal(1, "Parameter `NumAxInFlight` has to be > 1!"); + buffer_depth : assert(BufferDepth > 32'd1) else + $fatal(1, "Parameter `BufferDepth` has to be > 1!"); + tf_len_width : assert(TFLenWidth >= 32'd12) else + $fatal(1, "Parameter `BufferDepth` has to be >= 12!"); + tf_len_width_max : assert(TFLenWidth <= AddrWidth) else + $fatal(1, "Parameter `TFLenWidth` has to be <= `AddrWidth`!"); + end + `endif + // pragma translate_on + +endmodule : idma_backend diff --git a/hw/ip/iDMA/src/idma_buffer.sv b/hw/ip/iDMA/src/idma_buffer.sv new file mode 100644 index 000000000..c3eadeea7 --- /dev/null +++ b/hw/ip/iDMA/src/idma_buffer.sv @@ -0,0 +1,54 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +/// A byte-granular buffer holding data while it is copied. +module idma_buffer #( + /// The depth of the buffer + parameter int unsigned BufferDepth = 32'd3, + /// The width of the buffer in bytes + parameter int unsigned StrbWidth = 32'd1, + /// Print the info of the FIFO configuration + parameter bit PrintFifoInfo = 1'b0, + /// The strobe type + parameter type strb_t = logic, + /// The byte type + parameter type byte_t = logic [7:0] +)( + input logic clk_i, + input logic rst_ni, + input logic testmode_i, + + input byte_t [StrbWidth-1:0] data_i, + input strb_t valid_i, + output strb_t ready_o, + + output byte_t [StrbWidth-1:0] data_o, + output strb_t valid_o, + input strb_t ready_i +); + + // buffer is implemented as an array of stream FIFOs + for (genvar i = 0; i < StrbWidth; i++) begin : gen_fifo_buffer + idma_stream_fifo #( + .type_t ( byte_t ), + .Depth ( BufferDepth ), + .PrintInfo ( PrintFifoInfo ) + ) i_byte_buffer ( + .clk_i, + .rst_ni, + .testmode_i, + .flush_i ( 1'b0 ), + .usage_o ( /* NOT CONNECTED */ ), + .data_i ( data_i [i] ), + .valid_i ( valid_i [i] ), + .ready_o ( ready_o [i] ), + .data_o ( data_o [i] ), + .valid_o ( valid_o [i] ), + .ready_i ( ready_i [i] ) + ); + end : gen_fifo_buffer + +endmodule : idma_buffer diff --git a/hw/ip/iDMA/src/idma_channel_coupler.sv b/hw/ip/iDMA/src/idma_channel_coupler.sv new file mode 100644 index 000000000..b59c6673a --- /dev/null +++ b/hw/ip/iDMA/src/idma_channel_coupler.sv @@ -0,0 +1,183 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +`include "common_cells/registers.svh" +`include "../include/axi/typedef.svh" + +/// Couples the `R` to the `AW` channel by keeping writes back until the corresponding +/// reads arrive at the DMA. This reduces the congestion in the memory system. +module idma_channel_coupler #( + /// Number of transaction that can be in-flight concurrently + parameter int unsigned NumAxInFlight = 32'd2, + /// Address width + parameter int unsigned AddrWidth = 32'd24, + /// AXI user width + parameter int unsigned UserWidth = 32'd1, + /// AXI ID width + parameter int unsigned AxiIdWidth = 32'd1, + /// Print the info of the FIFO configuration + parameter bit PrintFifoInfo = 1'b0, + /// AXI 4 `AW` channel type + parameter type axi_aw_chan_t = logic +)( + /// Clock + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + /// Testmode in + input logic testmode_i, + + /// R response valid + input logic r_rsp_valid_i, + /// R response ready + input logic r_rsp_ready_i, + /// First R response + input logic r_rsp_first_i, + /// Did the read originate from a decoupled request + input logic r_decouple_aw_i, + /// Is the `AW` in the queue a decoupled request? + input logic aw_decouple_aw_i, + + /// Original meta request + input axi_aw_chan_t aw_req_i, + /// Original meta request valid + input logic aw_valid_i, + /// Original meta request ready + output logic aw_ready_o, + + /// Modified meta request + output axi_aw_chan_t aw_req_o, + /// Modified meta request valid + output logic aw_valid_o, + /// Modified meta request ready + input logic aw_ready_i, + /// busy signal + output logic busy_o +); + + /// The width of the credit counter keeping track of the transfers + localparam int unsigned CounterWidth = cf_math_pkg::idx_width(NumAxInFlight); + + /// Credit counter type + typedef logic [CounterWidth-1:0] cnt_t; + /// Address type + typedef logic [AddrWidth-1:0] addr_t; + /// User type + typedef logic [UserWidth-1:0] user_t; + /// ID type + typedef logic [AxiIdWidth-1:0] id_t; + + // AXI4+ATOP define macros for the AX channels -> recreate the type locally to increase + // compatibility with more versions of questasim + `AXI_TYPEDEF_AW_CHAN_T(aw_chan_t, addr_t, id_t, user_t) + + /// Combination of regular `AW` type and the decoupled field + typedef struct packed { + aw_chan_t aw; + logic decoupled; + } aw_ext_t; + + // cut signals after the fifo + aw_ext_t aw_req_in, aw_req_out; + logic aw_ready, aw_valid; + + // first R arrives -> AW can be sent + logic first; + + // aw ready i decoupled + logic aw_ready_decoupled; + + // aw is being sent + logic aw_sent; + + // counter to keep track of AR to send + cnt_t aw_to_send_d, aw_to_send_q; + + // first signal -> an R has arrived that needs to free an AW + assign first = r_rsp_valid_i & r_rsp_ready_i & r_rsp_first_i & !r_decouple_aw_i; + + // stream fifo to hold AWs back + idma_stream_fifo #( + .Depth ( NumAxInFlight ), + .type_t ( aw_ext_t ), + .PrintInfo ( PrintFifoInfo ) + ) i_aw_store ( + .clk_i, + .rst_ni, + .testmode_i, + .flush_i ( 1'b0 ), + .usage_o ( /* NOT CONNECTED */ ), + .data_i ( aw_req_in ), + .valid_i ( aw_valid_i ), + .ready_o ( aw_ready_o ), + .data_o ( aw_req_out ), + .valid_o ( aw_valid ), + .ready_i ( aw_ready ) + ); + + // fifo input is assembled + assign aw_req_in.aw = aw_req_i; + assign aw_req_in.decoupled = aw_decouple_aw_i; + + // aw payload is just connected to fifo + assign aw_req_o = aw_req_out.aw; + + // use a credit counter to keep track of AWs to send + always_comb begin : proc_credit_cnt + + // defaults + aw_to_send_d = aw_to_send_q; + + // if we bypass the logic + aw_sent = aw_req_out.decoupled & aw_valid; + + // first is asserted and aw is ready -> just send AW out + // without changing the credit counter value + if (aw_ready_decoupled & first) begin + aw_sent = 1'b1; + end + + // if first is asserted and aw is not ready -> increment + // credit counter + else if (!aw_ready_decoupled & first) begin + aw_to_send_d = aw_to_send_q + 1; + end + + // if not first, aw is ready and we have credit -> count down + else if (aw_ready_decoupled & !first & aw_to_send_q != '0) begin + aw_sent = 1'b1; + aw_to_send_d = aw_to_send_q - 1; + end + end + + // assign outputs + assign aw_ready = aw_valid_o & aw_ready_i; + + // fall through register to decouple the aw valid signal from the aw ready + // now payload is required; just the decoupling of the handshaking signals + fall_through_register #( + .T ( logic [0:0] ) + ) i_fall_through_register_decouple_aw_valid ( + .clk_i, + .rst_ni, + .testmode_i, + .clr_i ( 1'b0 ), + .valid_i ( aw_sent ), + .ready_o ( aw_ready_decoupled ), + .data_i ( 1'b0 ), + .valid_o ( aw_valid_o ), + .ready_i ( aw_ready_i ), + .data_o ( /* NOT CONNECTED */ ) + ); + + // connect busy pin + assign busy_o = aw_valid; + + // state + `FF(aw_to_send_q, aw_to_send_d, '0, clk_i, rst_ni) + + +endmodule : idma_channel_coupler diff --git a/hw/ip/iDMA/src/idma_legalizer.sv b/hw/ip/iDMA/src/idma_legalizer.sv new file mode 100644 index 000000000..0eb853e8e --- /dev/null +++ b/hw/ip/iDMA/src/idma_legalizer.sv @@ -0,0 +1,411 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +`include "common_cells/registers.svh" +`include "common_cells/assertions.svh" + +/// Legalizes a generic 1D transfer according to the rules given by the +/// AXI4 protocol. Bursts are cut at 4kiB boundaries and are a maximum of +/// 256 beats long. +module idma_legalizer #( + /// Data width + parameter int unsigned DataWidth = 32'd16, + /// Address width + parameter int unsigned AddrWidth = 32'd24, + /// 1D iDMA request type: + /// - `length`: the length of the transfer in bytes + /// - `*_addr`: the source / target byte addresses of the transfer + /// - `opt`: the options field + parameter type idma_req_t = logic, + /// Read request type + parameter type idma_r_req_t = logic, + /// Write request type + parameter type idma_w_req_t = logic, + /// Mutable transfer type + parameter type idma_mut_tf_t = logic, + /// Mutable options type + parameter type idma_mut_tf_opt_t = logic +)( + /// Clock + input logic clk_i, + /// Asynchronous reset, active low + input logic rst_ni, + + /// 1D request + input idma_req_t req_i, + /// 1D request valid + input logic valid_i, + /// 1D request ready + output logic ready_o, + + /// Read request; contains datapath and meta information + output idma_r_req_t r_req_o, + /// Read request valid + output logic r_valid_o, + /// Read request ready + input logic r_ready_i, + + /// Write request; contains datapath and meta information + output idma_w_req_t w_req_o, + /// Write request valid + output logic w_valid_o, + /// Write request ready + input logic w_ready_i, + + /// Invalidate the current burst transfer, stops emission of requests + input logic flush_i, + /// Kill the active 1D transfer; reload a new transfer + input logic kill_i, + + /// Read machine of the legalizer is busy + output logic r_busy_o, + /// Write machine of the legalizer is busy + output logic w_busy_o +); + + /// Stobe width + localparam int unsigned StrbWidth = DataWidth / 8; + /// Offset width + localparam int unsigned OffsetWidth = $clog2(StrbWidth); + /// The size of a page in byte + localparam int unsigned PageSize = (256 * StrbWidth > 4096) ? 4096 : 256 * StrbWidth; + /// The width of page offset byte addresses + localparam int unsigned PageAddrWidth = $clog2(PageSize); + + /// Offset type + typedef logic [ OffsetWidth-1:0] offset_t; + /// Address type + typedef logic [ AddrWidth-1:0] addr_t; + /// Page address type + typedef logic [PageAddrWidth-1:0] page_addr_t; + /// Page length type + typedef logic [ PageAddrWidth:0] page_len_t; + + + // state: internally hold one transfer, this is mutated + idma_mut_tf_t r_tf_d, r_tf_q; + idma_mut_tf_t w_tf_d, w_tf_q; + idma_mut_tf_opt_t opt_tf_d, opt_tf_q; + + // enable signals for next mutable transfer storage + logic r_tf_ena; + logic w_tf_ena; + + // page boundaries + page_addr_t r_page_offset; + page_len_t r_num_bytes_to_pb; + page_addr_t w_page_offset; + page_len_t w_num_bytes_to_pb; + page_len_t c_num_bytes_to_pb; + + logic [3:0] r_page_addr_width; + logic [3:0] w_page_addr_width; + page_len_t r_page_size; + page_len_t w_page_size; + + // read process + page_len_t r_num_bytes_possible; + page_len_t r_num_bytes; + offset_t r_addr_offset; + logic r_done; + + // write process + page_len_t w_num_bytes_possible; + page_len_t w_num_bytes; + offset_t w_addr_offset; + logic w_done; + + + //-------------------------------------- + // read page boundary check + //-------------------------------------- + // calculate the page with in bits + always_comb begin : proc_read_addr_width + // should the "virtual" page be reduced? e.g. the transfers split into + // smaller chunks than the AXI page size? + r_page_addr_width = OffsetWidth + (opt_tf_q.src_reduce_len ? opt_tf_q.src_max_llen : 'd8); + // a page can be a maximum of 4kB (12 bit) + r_page_addr_width = r_page_addr_width > 'd12 ? 'd12 : r_page_addr_width; + end + // calculate the page size in byte + assign r_page_size = (1 << r_page_addr_width); + + // this is written very confusing due to system verilog not allowing variable + // length ranges. + // the goal is to get 'r_tf_q.addr[PageAddrWidth-1:0]' where PageAddrWidth is + // r_page_addr_width and dynamically changing + always_comb begin : proc_read_range_select + r_page_offset = '0; + for (int i = 0; i < PageAddrWidth; i++) begin + r_page_offset[i] = r_page_addr_width > i ? r_tf_q.addr[i] : 1'b0; + end + end + + // calculate the number of bytes left in the page (number of bytes until + // we reach the page boundary (bp) + assign r_num_bytes_to_pb = r_page_size - r_page_offset; + + + //-------------------------------------- + // write page boundary check + //-------------------------------------- + // calculate the page with in bits + always_comb begin : proc_write_addr_width + // should the "virtual" page be reduced? e.g. the transfers split into + // smaller chunks than the AXI page size? + w_page_addr_width = OffsetWidth + (opt_tf_q.dst_reduce_len ? opt_tf_q.dst_max_llen : 'd8); + // a page can be a maximum of 4kB (12 bit) + w_page_addr_width = w_page_addr_width > 'd12 ? 'd12 : w_page_addr_width; + end + // calculate the page size in byte + assign w_page_size = (1 << w_page_addr_width); + + // this is written very confusing due to system verilog not allowing variable + // length ranges. + // the goal is to get 'r_tf_q.addr[PageAddrWidth-1:0]' where PageAddrWidth is + // r_page_addr_width and dynamically changing + always_comb begin : proc_write_range_select + w_page_offset = '0; + for (int i = 0; i < PageAddrWidth; i++) begin + w_page_offset[i] = w_page_addr_width > i ? w_tf_q.addr[i] : 1'b0; + end + end + + // calculate the number of bytes left in the page (number of bytes until + // we reach the page boundary (bp) + assign w_num_bytes_to_pb = w_page_size - w_page_offset; + + + //-------------------------------------- + // page boundary check + //-------------------------------------- + // how many transfers are remaining when concerning both r/w pages? + // take the boundary that is closer + assign c_num_bytes_to_pb = (r_num_bytes_to_pb > w_num_bytes_to_pb) ? + w_num_bytes_to_pb : r_num_bytes_to_pb; + + + //-------------------------------------- + // Synchronized R/W process + //-------------------------------------- + // max num bytes readable in page + assign r_num_bytes_possible = opt_tf_q.decouple_rw ? + r_num_bytes_to_pb : c_num_bytes_to_pb; + + // max num bytes writable in page + assign w_num_bytes_possible = opt_tf_q.decouple_rw ? + w_num_bytes_to_pb : c_num_bytes_to_pb; + + // calculate the address offsets aligned to transfer sizes. + assign r_addr_offset = r_tf_q.addr[OffsetWidth-1:0]; + assign w_addr_offset = w_tf_q.addr[OffsetWidth-1:0]; + + // legalization process -> read and write is coupled together + always_comb begin : proc_read_write_transaction + + // default: keep state + r_tf_d = r_tf_q; + w_tf_d = w_tf_q; + opt_tf_d = opt_tf_q; + + // default: not done + r_done = 1'b0; + w_done = 1'b0; + + //-------------------------------------- + // Legalize read transaction + //-------------------------------------- + // more bytes remaining than we can read + if (r_tf_q.length > r_num_bytes_possible) begin + r_num_bytes = r_num_bytes_possible; + // calculate remainder + r_tf_d.length = r_tf_q.length - r_num_bytes_possible; + // next address + r_tf_d.addr = r_tf_q.addr + r_num_bytes; + + // remaining bytes fit in one burst + end else begin + r_num_bytes = r_tf_q.length[PageAddrWidth:0]; + // finished + r_tf_d.valid = 1'b0; + r_done = 1'b1; + end + + //-------------------------------------- + // Legalize write transaction + //-------------------------------------- + // more bytes remaining than we can write + if (w_tf_q.length > w_num_bytes_possible) begin + w_num_bytes = w_num_bytes_possible; + // calculate remainder + w_tf_d.length = w_tf_q.length - w_num_bytes_possible; + // next address + w_tf_d.addr = w_tf_q.addr + w_num_bytes; + + // remaining bytes fit in one burst + end else begin + w_num_bytes = w_tf_q.length[PageAddrWidth:0]; + // finished + w_tf_d.valid = 1'b0; + w_done = 1'b1; + end + + //-------------------------------------- + // Kill + //-------------------------------------- + if (kill_i) begin + // kill the current state + r_tf_d = '0; + r_done = 1'b1; + w_tf_d = '0; + w_done = 1'b1; + end + + //-------------------------------------- + // Refill + //-------------------------------------- + // new request is taken in if both r and w machines are ready. + if (ready_o & valid_i) begin + + // load all three mutable objects (source, destination, option) + // source or read + r_tf_d = '{ + length: req_i.length, + addr: req_i.src_addr, + valid: 1'b1 + }; + // destination or write + w_tf_d = '{ + length: req_i.length, + addr: req_i.dst_addr, + valid: 1'b1 + }; + // options + opt_tf_d = '{ + shift: req_i.src_addr[OffsetWidth-1:0] - req_i.dst_addr[OffsetWidth-1:0], + decouple_rw: req_i.opt.beo.decouple_rw, + decouple_aw: req_i.opt.beo.decouple_aw, + src_max_llen: req_i.opt.beo.src_max_llen, + dst_max_llen: req_i.opt.beo.dst_max_llen, + src_reduce_len: req_i.opt.beo.src_reduce_len, + dst_reduce_len: req_i.opt.beo.dst_reduce_len, + axi_id: req_i.opt.axi_id, + src_axi_opt: req_i.opt.src, + dst_axi_opt: req_i.opt.dst, + super_last: req_i.opt.last + }; + end + end + + + //-------------------------------------- + // Connect outputs + //-------------------------------------- + // assign the signals for the read meta channel + assign r_req_o.ar_req = '{ + id: opt_tf_q.axi_id, + addr: { r_tf_q.addr[AddrWidth-1:OffsetWidth], {{OffsetWidth}{1'b0}} }, + len: ((r_num_bytes + r_addr_offset - 'd1) >> OffsetWidth), + size: axi_pkg::size_t'(OffsetWidth), + burst: opt_tf_q.src_axi_opt.burst, + lock: opt_tf_q.src_axi_opt.lock, + cache: opt_tf_q.src_axi_opt.cache, + prot: opt_tf_q.src_axi_opt.prot, + qos: opt_tf_q.src_axi_opt.qos, + region: opt_tf_q.src_axi_opt.region, + user: '0 + }; + + // assign the signals for the write meta channel + assign w_req_o.aw_req = '{ + id: opt_tf_q.axi_id, + addr: { w_tf_q.addr[AddrWidth-1:OffsetWidth], {{OffsetWidth}{1'b0}} }, + len: ((w_num_bytes + w_addr_offset - 'd1) >> OffsetWidth), + size: axi_pkg::size_t'(OffsetWidth), + burst: opt_tf_q.dst_axi_opt.burst, + lock: opt_tf_q.dst_axi_opt.lock, + cache: opt_tf_q.dst_axi_opt.cache, + prot: opt_tf_q.dst_axi_opt.prot, + qos: opt_tf_q.dst_axi_opt.qos, + region: opt_tf_q.dst_axi_opt.region, + user: '0, + atop: '0 + }; + + // assign the signals needed to set-up the read data path + assign r_req_o.r_dp_req = '{ + offset: r_addr_offset, + tailer: OffsetWidth'(r_num_bytes + r_addr_offset), + shift: opt_tf_q.shift, + decouple_aw: opt_tf_q.decouple_aw + }; + + // assign the signals needed to set-up the write data path + assign w_req_o.w_dp_req = '{ + offset: w_addr_offset, + tailer: OffsetWidth'(w_num_bytes + w_addr_offset), + num_beats: w_req_o.aw_req.len, + is_single: w_req_o.aw_req.len == '0 + }; + + // last burst in generic 1D transfer? + assign w_req_o.last = w_done; + + // last burst indicated by midend + assign w_req_o.super_last = opt_tf_q.super_last; + + // assign aw decouple flag + assign w_req_o.decouple_aw = opt_tf_q.decouple_aw; + + // busy output + assign r_busy_o = r_tf_q.valid; + assign w_busy_o = w_tf_q.valid; + + + //-------------------------------------- + // Flow Control + //-------------------------------------- + // only advance to next state if: + // * rw_coupled: both machines advance + // * rw_decoupled: either machine advances + always_comb begin : proc_legalizer_flow_control + if (opt_tf_q.decouple_rw) begin + r_tf_ena = (r_ready_i & !flush_i) | kill_i; + w_tf_ena = (w_ready_i & !flush_i) | kill_i; + + r_valid_o = r_tf_q.valid & r_ready_i & !flush_i; + w_valid_o = w_tf_q.valid & w_ready_i & !flush_i; + end else begin + r_tf_ena = (r_ready_i & w_ready_i & !flush_i) | kill_i; + w_tf_ena = (r_ready_i & w_ready_i & !flush_i) | kill_i; + + r_valid_o = r_tf_q.valid & w_ready_i & r_ready_i & !flush_i; + w_valid_o = w_tf_q.valid & r_ready_i & w_ready_i & !flush_i; + end + end + + // load next idma request: if both machines are done! + assign ready_o = r_done & w_done & r_ready_i & w_ready_i & !flush_i; + + + //-------------------------------------- + // State + //-------------------------------------- + `FF(opt_tf_q, opt_tf_d, '0, clk_i, rst_ni) + `FFL(r_tf_q, r_tf_d, r_tf_ena, '0, clk_i, rst_ni) + `FFL(w_tf_q, w_tf_d, w_tf_ena, '0, clk_i, rst_ni) + + + //-------------------------------------- + // Assertions + //-------------------------------------- + // only support the decomposition of incremental bursts + `ASSERT_NEVER(OnlyIncrementalBurstsSRC, (ready_o & valid_i & + req_i.opt.src.burst != axi_pkg::BURST_INCR), clk_i, !rst_ni) + `ASSERT_NEVER(OnlyIncrementalBurstsDST, (ready_o & valid_i & + req_i.opt.dst.burst != axi_pkg::BURST_INCR), clk_i, !rst_ni) + +endmodule : idma_legalizer diff --git a/hw/ip/iDMA/src/idma_stream_fifo.sv b/hw/ip/iDMA/src/idma_stream_fifo.sv new file mode 100644 index 000000000..b6de92d90 --- /dev/null +++ b/hw/ip/iDMA/src/idma_stream_fifo.sv @@ -0,0 +1,127 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +`include "common_cells/assertions.svh" + +/// Optimal implementation of a stream FIFO based on the common cells modules. +module idma_stream_fifo #( + /// Depth can be arbitrary from 2 to 2**32 + parameter int unsigned Depth = 32'd8, + /// Type of the FIFO + parameter type type_t = logic, + /// Print information when the simulation launches + parameter bit PrintInfo = 1'b0, + // DO NOT OVERWRITE THIS PARAMETER + parameter int unsigned AddrDepth = (Depth > 32'd1) ? $clog2(Depth) : 32'd1 +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the fifo + input logic testmode_i, // test_mode to bypass clock gating + output logic [AddrDepth-1:0] usage_o, // fill pointer + // input interface + input type_t data_i, // data to push into the fifo + input logic valid_i, // input data valid + output logic ready_o, // fifo is not full + // output interface + output type_t data_o, // output data + output logic valid_o, // fifo is not empty + input logic ready_i // pop head from fifo +); + + //-------------------------------------- + // Prevent Depth 0 and 1 + //-------------------------------------- + // Throw an error if depth is 0 or 1 + // pragma translate off + if (Depth < 32'd2) begin : gen_fatal + initial begin + $fatal(1, "FIFO of depth %d does not make any sense!", Depth); + end + end + // pragma translate on + + //-------------------------------------- + // Spill register (depth 2) + //-------------------------------------- + // Instantiate a spill register for depth 2 + if (Depth == 32'd2) begin : gen_spill + + // print info + // pragma translate off + if (PrintInfo) begin : gen_info + initial begin + $display("[%m] Instantiate spill register (of depth %d)", Depth); + end + end + // pragma translate on + + // spill register + spill_register_flushable #( + .T ( type_t ), + .Bypass ( 1'b0 ) + ) i_spill_register_flushable ( + .clk_i, + .rst_ni, + .flush_i, + .valid_i, + .ready_o, + .data_i, + .valid_o, + .ready_i, + .data_o + ); + + // usage is not supported + assign usage_o = 'x; + + // no full push + `ASSERT_NEVER(CheckFullPush, (!ready_o & valid_i), clk_i, !rst_ni) + // empty pop + `ASSERT_NEVER(CheckEmptyPop, (!valid_o & ready_i), clk_i, !rst_ni) + end + + + //-------------------------------------- + // FIFO register (depth 3+) + //-------------------------------------- + // default to stream fifo + if (Depth > 32'd2) begin : gen_fifo + + // print info + // pragma translate off + if (PrintInfo) begin : gen_info + initial begin + $info("[%m] Instantiate stream FIFO of depth %d", Depth); + end + end + // pragma translate on + + // stream fifo + stream_fifo #( + .DEPTH ( Depth ), + .T ( type_t ) + ) i_stream_fifo ( + .clk_i, + .rst_ni, + .flush_i, + .testmode_i, + .usage_o, + .data_i, + .valid_i, + .ready_o, + .data_o, + .valid_o, + .ready_i + ); + + // no full push + `ASSERT_NEVER(CheckFullPush, (!ready_o & valid_i), clk_i, !rst_ni) + // empty pop + `ASSERT_NEVER(CheckEmptyPop, (!valid_o & ready_i), clk_i, !rst_ni) + end + +endmodule : idma_stream_fifo diff --git a/sw/device/lib/drivers/dma/dma.c b/sw/device/lib/drivers/dma/dma.c index 44f6db6f6..fc9c78762 100644 --- a/sw/device/lib/drivers/dma/dma.c +++ b/sw/device/lib/drivers/dma/dma.c @@ -10,33 +10,29 @@ #include "dma_regs.h" // Generated. void dma_set_read_ptr(const dma_t *dma, uint32_t read_ptr) { - mmio_region_write32(dma->base_addr, (ptrdiff_t)(DMA_PTR_IN_REG_OFFSET), read_ptr); + mmio_region_write32(dma->base_addr, (ptrdiff_t)(IDMA_REG32_FRONTEND_SRC_ADDR_REG_OFFSET), read_ptr); } void dma_set_write_ptr(const dma_t *dma, uint32_t write_ptr) { - mmio_region_write32(dma->base_addr, (ptrdiff_t)(DMA_PTR_OUT_REG_OFFSET), write_ptr); + mmio_region_write32(dma->base_addr, (ptrdiff_t)(IDMA_REG32_FRONTEND_DST_ADDR_REG_OFFSET), write_ptr); } -void dma_set_cnt_start(const dma_t *dma, uint32_t copy_size) { - mmio_region_write32(dma->base_addr, (ptrdiff_t)(DMA_DMA_START_REG_OFFSET), copy_size); +void dma_set_cnt(const dma_t *dma, uint32_t copy_size) { + mmio_region_write32(dma->base_addr, (ptrdiff_t)(IDMA_REG32_FRONTEND_NUM_BYTES_REG_OFFSET), copy_size); } -int32_t dma_get_done(const dma_t *dma) { - return mmio_region_read32(dma->base_addr, (ptrdiff_t)(DMA_DONE_REG_OFFSET)); -} - -void dma_set_read_ptr_inc(const dma_t *dma, uint32_t read_ptr_inc){ - mmio_region_write32(dma->base_addr, (ptrdiff_t)(DMA_SRC_PTR_INC_REG_OFFSET), read_ptr_inc); +void dma_set_config(const dma_t *dma, uint32_t config) { + mmio_region_write32(dma->base_addr, (ptrdiff_t)(IDMA_REG32_FRONTEND_CONF_REG_OFFSET), config); } -void dma_set_write_ptr_inc(const dma_t *dma, uint32_t write_ptr_inc){ - mmio_region_write32(dma->base_addr, (ptrdiff_t)(DMA_DST_PTR_INC_REG_OFFSET), write_ptr_inc); +void dma_get_status(const dma_t *dma) { + return mmio_region_read32(dma->base_addr, (ptrdiff_t)(IDMA_REG32_FRONTEND_STATUS_REG_OFFSET)); } -void dma_set_spi_mode(const dma_t *dma, uint32_t spi_mode){ - mmio_region_write32(dma->base_addr, (ptrdiff_t)(DMA_SPI_MODE_REG_OFFSET), spi_mode); +int32_t dma_get_nextid(const dma_t *dma) { + return mmio_region_read32(dma->base_addr, (ptrdiff_t)(IDMA_REG32_FRONTEND_NEXT_ID_REG_OFFSET)); } -void dma_set_data_type(const dma_t *dma, uint32_t data_type){ - mmio_region_write32(dma->base_addr, (ptrdiff_t)(DMA_DATA_TYPE_REG_OFFSET), data_type); +int32_t dma_get_done(const dma_t *dma) { + return mmio_region_read32(dma->base_addr, (ptrdiff_t)(IDMA_REG32_FRONTEND_DONE_REG_OFFSET)); } diff --git a/sw/device/lib/drivers/dma/dma.h b/sw/device/lib/drivers/dma/dma.h index 4d16a3cf5..b8778af32 100644 --- a/sw/device/lib/drivers/dma/dma.h +++ b/sw/device/lib/drivers/dma/dma.h @@ -26,60 +26,53 @@ typedef struct dma { } dma_t; /** - * Write to read_ptr register of the DMA + * Write to read_ptr register of the MEMCOPY PERIPHERAL. * @param dma Pointer to dma_t represting the target MEMCOPY PERIPHERAL. * @param read_ptr Any valid memory address. */ void dma_set_read_ptr(const dma_t *dma, uint32_t read_ptr); /** - * Write to write_ptr register of the DMA + * Write to write_ptr register of the MEMCOPY PERIPHERAL. * @param dma Pointer to dma_t represting the target MEMCOPY PERIPHERAL. * @param write_ptr Any valid memory address. */ void dma_set_write_ptr(const dma_t *dma, uint32_t write_ptr); /** - * Write to cnt_start register of the DMA + * Write to num_byte register of the MEMCOPY PERIPHERAL. * @param dma Pointer to dma_t represting the target MEMCOPY PERIPHERAL. - * @param copy_size Number of bytes to be copied from read_ptr to write_ptr. + * @param copy_size Number of data to be copied from read_ptr to write_ptr. */ -void dma_set_cnt_start(const dma_t *dma, uint32_t copy_size); +void dma_set_cnt(const dma_t *dma, uint32_t copy_size); /** - * Read from done register of the DMA + * Write to configration register of the MEMCOPY PERIPHERAL. * @param dma Pointer to dma_t represting the target MEMCOPY PERIPHERAL. - * @return done value (0: data are being copied - 1: copy done/peripheral idle) - */ -int32_t dma_get_done(const dma_t *dma); - -/** - * Write to src_ptr_inc register of the DMA. - * @param dma Pointer to dma_t represting the target DMA. - * @param read_ptr_inc Increment of source pointer (Default: 4). + * @param config Configurations of the iDMA */ -void dma_set_read_ptr_inc(const dma_t *dma, uint32_t read_ptr_inc); +void dma_set_config(const dma_t *dma, uint32_t config); /** - * Write to dst_ptr_inc register of the DMA. - * @param dma Pointer to dma_t represting the target DMA. - * @param write_ptr_inc Increment of destination pointer (Default: 4). + * Read from the status register of the MEMCOPY PERIPHERAL. + * @param dma Pointer to dma_t represting the target MEMCOPY PERIPHERAL. + * @return status value (1: bust, 0: Not bust) */ -void dma_set_write_ptr_inc(const dma_t *dma, uint32_t write_ptr_inc); +void dma_get_status(const dma_t *dma); /** - * Sets the DMA data transfer modes when used with the SPI. - * @param dma Pointer to dma_t represting the target DMA. - * @param spi_mode 0: mem to mem - 1: spi_rx to mem (Default: 0) - 2: mem to spi_tx. + * Read from the next_id register of the MEMCOPY PERIPHERAL. + * @param dma Pointer to dma_t represting the target MEMCOPY PERIPHERAL. + * @return next ID, used to launcher transfer */ -void dma_set_spi_mode(const dma_t *dma, uint32_t spi_mode); +int32_t dma_get_nextid(const dma_t *dma); /** - * Sets the DMA data type. - * @param dma Pointer to dma_t represting the target DMA. - * @param data_type Data type to transfer: 32-bit word(0), 16-bit half word (1), 8-bit byte(2,3). + * Read from done register of the MEMCOPY PERIPHERAL. + * @param dma Pointer to dma_t represting the target MEMCOPY PERIPHERAL. + * @return done value (0: data are being copied - 1: copy done/peripheral idle) */ -void dma_set_data_type(const dma_t *dma, uint32_t data_type); +int32_t dma_get_done(const dma_t *dma); #ifdef __cplusplus } diff --git a/sw/device/lib/drivers/dma/dma_regs.h b/sw/device/lib/drivers/dma/dma_regs.h index a11c59765..c3da9ddc7 100644 --- a/sw/device/lib/drivers/dma/dma_regs.h +++ b/sw/device/lib/drivers/dma/dma_regs.h @@ -14,43 +14,31 @@ extern "C" { #endif // Register width -#define DMA_PARAM_REG_WIDTH 32 - -// Input data pointer (word aligned) -#define DMA_PTR_IN_REG_OFFSET 0x0 - -// Output data pointer (word aligned) -#define DMA_PTR_OUT_REG_OFFSET 0x4 - -// Number of bytes to copy - Once a value is written, the copy starts -#define DMA_DMA_START_REG_OFFSET 0x8 - -// Register set to 1 when copy is done -#define DMA_DONE_REG_OFFSET 0xc - -// Increment number of source pointer every time a word is copied from source -#define DMA_SRC_PTR_INC_REG_OFFSET 0x10 - -// Increment number of source pointer every time a word is copied to -// destination -#define DMA_DST_PTR_INC_REG_OFFSET 0x14 - -// SPI mode selection: disable(0), receive from SPI (1), and send to SPI (2), -// receive from SPI FLASH (3), and send to SPI FLASH (4). It waits for TX and -// RX FIFO in modes 1 and 2, respectively. -#define DMA_SPI_MODE_REG_OFFSET 0x18 -#define DMA_SPI_MODE_SPI_MODE_MASK 0x7 -#define DMA_SPI_MODE_SPI_MODE_OFFSET 0 -#define DMA_SPI_MODE_SPI_MODE_FIELD \ - ((bitfield_field32_t) { .mask = DMA_SPI_MODE_SPI_MODE_MASK, .index = DMA_SPI_MODE_SPI_MODE_OFFSET }) - -// Data type to transfer: 32-bit word(0), 16-bit half word(1), 8-bit -// byte(2,3). -#define DMA_DATA_TYPE_REG_OFFSET 0x1c -#define DMA_DATA_TYPE_DATA_TYPE_MASK 0x3 -#define DMA_DATA_TYPE_DATA_TYPE_OFFSET 0 -#define DMA_DATA_TYPE_DATA_TYPE_FIELD \ - ((bitfield_field32_t) { .mask = DMA_DATA_TYPE_DATA_TYPE_MASK, .index = DMA_DATA_TYPE_DATA_TYPE_OFFSET }) +#define IDMA_REG32_FRONTEND_PARAM_REG_WIDTH 32 + +// Source Address +#define IDMA_REG32_FRONTEND_SRC_ADDR_REG_OFFSET 0x0 + +// Destination Address +#define IDMA_REG32_FRONTEND_DST_ADDR_REG_OFFSET 0x4 + +// Number of bytes +#define IDMA_REG32_FRONTEND_NUM_BYTES_REG_OFFSET 0x8 + +// Configuration Register for DMA settings +#define IDMA_REG32_FRONTEND_CONF_REG_OFFSET 0xc +#define IDMA_REG32_FRONTEND_CONF_DECOUPLE_BIT 0 +#define IDMA_REG32_FRONTEND_CONF_DEBURST_BIT 1 + +// DMA Status +#define IDMA_REG32_FRONTEND_STATUS_REG_OFFSET 0x10 +#define IDMA_REG32_FRONTEND_STATUS_BUSY_BIT 0 + +// Next ID, launches transfer, returns 0 if transfer not set up properly. +#define IDMA_REG32_FRONTEND_NEXT_ID_REG_OFFSET 0x14 + +// Get ID of finished transactions. +#define IDMA_REG32_FRONTEND_DONE_REG_OFFSET 0x18 #ifdef __cplusplus } // extern "C"