Skip to content

Commit

Permalink
CGRA configurable size and instruction update (#21)
Browse files Browse the repository at this point in the history
* Reconfigurable for CGRA size

* Adding, renaming, removing many file to make the cgra peripheral registers configurable and the software driver compatible.

* Removing unwanted files being tracked.

* Folder renaming.

* FFT example updated and CGRA rtl debugged.

* python script for bitsream generation updated with template.

* CGRA app code cleaning.

* Added manual control over incrementation for LWD and SWD instructions on the CGRA.

* CGRA vendor update.

* CGRA vendor update for regtool.py execution.

* Weird error solved by changing the printf.

* Save commit

* Save commit

* Save commit.

* Simple check app working with any CGRA size.

* Example CGRA check size cleaned.

* Updating lint rules to be compatible with any CGRA size.

* Added error check on cgra_size app.

* File renaming and CGRA driver updated for any CGRA size.

* Added X extension parameter to testharness and top module.

* Renaming all cgra_x_heep reference to heepsilon.

* Removed dummy header file not needed anymore with new xheep flow.

* run verible.

* Few remaining old name changed.

* testharness and top modules updated.

* Put default 4x4 CGRA size.

* Code cleaning.

* Modified CGRA toe be compatible with single row or column.

* Added CGRA size to debug printf.

* Removing changes to x-heep vendor repo.

* Updated tb_util for new memory bank organization.

* CGRA vendor version updated.

---------

Co-authored-by: Benoît Denkinger <benoit.denkinger@epfl.ch>
  • Loading branch information
benoitdenkinger and Benoît Denkinger authored Oct 1, 2024
1 parent 8d1534b commit b55b14f
Show file tree
Hide file tree
Showing 128 changed files with 15,651 additions and 3,400 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ build
*.log
*.do
.venv/
*__pycache__*
# sw files
*.map
*.hex
Expand All @@ -12,7 +13,10 @@ build
*.dump

# ignore automatically generated files
hw/rtl/heepsilon_pkg.sv
tb/tb_util.svh
sw/external/drivers/cgra/cgra.h

*.vscode*
*vivado*
*__pycache__*
32 changes: 21 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0


# Makefile to generates cgra-x-heep files and build the design with fusesoc
# Makefile to generates heepsilon files and build the design with fusesoc

.PHONY: clean help

Expand All @@ -22,19 +22,29 @@ PROJECT ?= hello_world
export HEEP_DIR = hw/vendor/esl_epfl_x_heep/
include $(HEEP_DIR)Makefile.venv

HEEPSILON_CFG ?= heepsilon_cfg.hjson

heepsilon-gen:
$(PYTHON) util/heepsilon_gen.py --cfg $(HEEPSILON_CFG) --outdir hw/vendor/esl_epfl_cgra/hw/rtl --pkg-sv hw/vendor/esl_epfl_cgra/hw/rtl/cgra_pkg.sv.tpl
$(PYTHON) util/heepsilon_gen.py --cfg $(HEEPSILON_CFG) --outdir hw/vendor/esl_epfl_cgra/hw/rtl --tpl-sv hw/vendor/esl_epfl_cgra/hw/rtl/peripheral_regs.sv.tpl
$(PYTHON) util/heepsilon_gen.py --cfg $(HEEPSILON_CFG) --outdir hw/vendor/esl_epfl_cgra/util --tpl-sv hw/vendor/esl_epfl_cgra/util/cgra_bitstream_gen.py.tpl
$(PYTHON) util/heepsilon_gen.py --cfg $(HEEPSILON_CFG) --outdir hw/rtl --pkg-sv hw/rtl/heepsilon_pkg.sv.tpl
$(PYTHON) util/heepsilon_gen.py --cfg $(HEEPSILON_CFG) --outdir sw/external/drivers/cgra --header-c sw/external/drivers/cgra/cgra.h.tpl
$(PYTHON) util/heepsilon_gen.py --cfg $(HEEPSILON_CFG) --outdir hw/vendor/esl_epfl_cgra/data --pkg-sv hw/vendor/esl_epfl_cgra/data/cgra_regs.hjson.tpl
bash -c "cd hw/vendor/esl_epfl_cgra/data; source cgra_reg_gen.sh; cd ../../../.."

# Generates mcu files. First the mcu-gen from X-HEEP is called.
# This is needed to be done after the X-HEEP mcu-gen because the test-bench to be used is the one from CGRA-X-HEEP, not the one from X-HEEP.
mcu-gen:
# This is needed to be done after the X-HEEP mcu-gen because the test-bench to be used is the one from heepsilon, not the one from X-HEEP.
mcu-gen: heepsilon-gen
$(MAKE) -f $(XHEEP_MAKE) EXTERNAL_DOMAINS=${EXTERNAL_DOMAINS} MEMORY_BANKS=${MEMORY_BANKS} $(MAKECMDGOALS)
cd hw/vendor/esl_epfl_x_heep &&\
python util/mcu_gen.py --cfg mcu_cfg.hjson --pads_cfg pad_cfg.hjson --outdir ../../../tb/ --memorybanks $(MEMORY_BANKS) --tpl-sv ../../../tb/tb_util.svh.tpl
$(PYTHON) util/mcu_gen.py --cfg mcu_cfg.hjson --pads_cfg pad_cfg.hjson --outdir ../../../tb/ --memorybanks $(MEMORY_BANKS) --tpl-sv ../../../tb/tb_util.svh.tpl

## Builds (synthesis and implementation) the bitstream for the FPGA version using Vivado
## @param FPGA_BOARD=nexys-a7-100t,pynq-z2
## @param FUSESOC_FLAGS=--flag=<flagname>
vivado-fpga: |venv
fusesoc --cores-root . run --no-export --target=$(FPGA_BOARD) $(FUSESOC_FLAGS) --setup --build eslepfl:systems:cgra-x-heep 2>&1 | tee buildvivado.log
fusesoc --cores-root . run --no-export --target=$(FPGA_BOARD) $(FUSESOC_FLAGS) --setup --build eslepfl:systems:heepsilon 2>&1 | tee buildvivado.log


# Runs verible formating
Expand All @@ -43,24 +53,24 @@ verible:

# Simulation
verilator-sim:
fusesoc --cores-root . run --no-export --target=sim --tool=verilator $(FUSESOC_FLAGS) --setup --build eslepfl:systems:cgra-x-heep 2>&1 | tee buildsim.log
fusesoc --cores-root . run --no-export --target=sim --tool=verilator $(FUSESOC_FLAGS) --setup --build eslepfl:systems:heepsilon 2>&1 | tee buildsim.log

questasim-sim:
fusesoc --cores-root . run --no-export --target=sim --tool=modelsim $(FUSESOC_FLAGS) --setup --build eslepfl:systems:cgra-x-heep 2>&1 | tee buildsim.log
fusesoc --cores-root . run --no-export --target=sim --tool=modelsim $(FUSESOC_FLAGS) --setup --build eslepfl:systems:heepsilon 2>&1 | tee buildsim.log

questasim-sim-opt: questasim-sim
$(MAKE) -C build/eslepfl_systems_cgra-x-heep_0/sim-modelsim opt
$(MAKE) -C build/eslepfl_systems_heepsilon_0/sim-modelsim opt

vcs-sim:
fusesoc --cores-root . run --no-export --target=sim --tool=vcs $(FUSESOC_FLAGS) --setup --build eslepfl:systems:cgra-x-heep 2>&1 | tee buildsim.log
fusesoc --cores-root . run --no-export --target=sim --tool=vcs $(FUSESOC_FLAGS) --setup --build eslepfl:systems:heepsilon 2>&1 | tee buildsim.log


## Generates the build output for a given application
## Uses verilator to simulate the HW model and run the FW
## UART Dumping in uart0.log to show recollected results
run-verilator:
$(MAKE) app PROJECT=$(PROJECT)
cd ./build/eslepfl_systems_cgra-x-heep_0/sim-verilator; \
cd ./build/eslepfl_systems_heepsilon_0/sim-verilator; \
./Vtestharness +firmware=../../../sw/build/main.hex; \
cat uart0.log; \
cd ../../..;
Expand All @@ -70,7 +80,7 @@ run-verilator:
## UART Dumping in uart0.log to show recollected results
run-questasim:
$(MAKE) app PROJECT=$(PROJECT)
cd ./build/eslepfl_systems_cgra-x-heep_0/sim-modelsim; \
cd ./build/eslepfl_systems_heepsilon_0/sim-modelsim; \
make run PLUSARGS="c firmware=../../../sw/build/main.hex"; \
cat uart0.log; \
cd ../../..;
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Due to its modular design, HEEPsilon respects the X-HEEP workflow. As such, you

Althought the HEEPsilon team will try to keep the latest version of X-HEEP available, changes in the X-HEEP setup might not reflect immediately on this repository.

👉 For the most accurate set-up instructions please refer to the documentation of the [vendorized X-HEEP](https://github.com/esl-epfl/cgra_x_heep/tree/main/hw/vendor/esl_epfl_x_heep).
👉 For the most accurate set-up instructions please refer to the documentation of the [vendorized X-HEEP](https://github.com/esl-epfl/heepsilon/tree/main/hw/vendor/esl_epfl_x_heep).


# Behavioural simulations
Expand Down
14 changes: 7 additions & 7 deletions cgra_x_heep.core → heepsilon.core
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ CAPI=2:
# Solderpad Hardware License, Version 2.1, see LICENSE.md for details.
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

name: eslepfl:systems:cgra-x-heep
description: CGRA X-HEEP Top.
name: eslepfl:systems:heepsilon
description: HEEPsilon (X-HEEP + CGRA) Top.

filesets:
files_rtl_generic:
Expand All @@ -14,8 +14,8 @@ filesets:
- openhwgroup.org:systems:core-v-mini-mcu
- eslepfl::cgra
files:
- hw/rtl/cgra_x_heep_pkg.sv
- hw/rtl/cgra_x_heep_top.sv
- hw/rtl/heepsilon_pkg.sv
- hw/rtl/heepsilon_top.sv
file_type: systemVerilogSource

x_heep_system:
Expand Down Expand Up @@ -102,7 +102,7 @@ filesets:
- hw/fpga/sram_wrapper.sv
- hw/fpga_cgra/cgra_sram_wrapper.sv
- hw/fpga_cgra/cgra_clock_gate.sv
- hw/fpga_cgra/xilinx_cgra_x_heep_wrapper.sv
- hw/fpga_cgra/xilinx_heepsilon_wrapper.sv
file_type: systemVerilogSource

ip-fpga:
Expand Down Expand Up @@ -179,7 +179,7 @@ targets:
default: &default_target
filesets:
- files_rtl_generic
toplevel: [cgra_x_heep_top]
toplevel: [heepsilon_top]

sim:
<<: *default_target
Expand Down Expand Up @@ -277,4 +277,4 @@ targets:
tools:
vivado:
part: xc7z020clg400-1
toplevel: [xilinx_cgra_x_heep_wrapper]
toplevel: [xilinx_heepsilon_wrapper]
32 changes: 32 additions & 0 deletions heepsilon_cfg.hjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
// Derived from Occamy: https://github.com/pulp-platform/snitch/blob/master/hw/system/occamy/src/occamy_cfg.hjson
// Peripherals configuration for core-v-mini-mcu.
{
cgra: {
// Main impact: more columns equals more master ports connected to the bus
// There is a maximum limit because of the kernel configuration size fixed to the bus width of 32 bits
// This limit can be increased but manual changes are required
// Maximum number of columns: 32-log2(rcs_num_instr)-log2(max_columns*rcs_num_instr)
// Default settings enable: 32-log2(32)-log2(4*32) = 20 columns
num_columns: 4
// Main impact: more rows equals to more context memory banks (i.e., one per row)
// There should be no limitation compared to the number of rows compared to columns
num_rows: 4
// It is possible to limit the maximum number of columns a kernel can use (this saves a bit of resources)
// The default value should be the same than num_columns, put an number to change it
max_columns: default
// Number of instructions each RC can contain (usually a power of 2)
rcs_num_instr: 32
// Context memory bank depth
// This parameter is by default set to max_columns*rcs_num_instr
// The default depth correspond to a minimum required in case a kernel uses max_columns and rcs_num_instr instructions per RC
// The depth can be increased in case more kernels need to be stored inside the context memory
cmem_bk_depth: default
// This value fixes the maximum number of kernel configuration words that can be stores
// It does not mean that the context memory can holds that many kernels, it depends on the size of each kernel
// You probably don't need to change this value
kmem_depth: 16
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Solderpad Hardware License, Version 2.1, see LICENSE.md for details.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

module xilinx_cgra_x_heep_wrapper
module xilinx_heepsilon_wrapper
import obi_pkg::*;
import reg_pkg::*;
#(
Expand Down Expand Up @@ -89,11 +89,11 @@ module xilinx_cgra_x_heep_wrapper
.clk_out1_0(clk_gen)
);

cgra_x_heep_top #(
heepsilon_top #(
.COREV_PULP (0),
.FPU (0),
.ZFINX (0)
) cgra_x_heep_top_i (
) heepsilon_top_i (
.clk_i(clk_gen),
.rst_ni(rst_n),
.boot_select_i,
Expand Down
2 changes: 1 addition & 1 deletion hw/rtl/cgra_top.vlt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

`verilator_config

lint_off -rule UNUSED -file "*hw/rtl/cgra_x_heep_top.sv" -match "Signal is not used: 'external_subsystem_powergate_iso'*"
lint_off -rule UNUSED -file "*hw/rtl/heepsilon_top.sv" -match "Signal is not used: 'external_subsystem_powergate_iso'*"
6 changes: 3 additions & 3 deletions hw/rtl/cgra_x_heep_pkg.sv → hw/rtl/heepsilon_pkg.sv.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
// Solderpad Hardware License, Version 2.1, see LICENSE.md for details.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

package cgra_x_heep_pkg;
package heepsilon_pkg;

import addr_map_rule_pkg::*;
import core_v_mini_mcu_pkg::*;

// One master port per CGRA column
localparam CGRA_XBAR_NMASTER = 4;
localparam CGRA_XBAR_NMASTER = ${cgra_num_columns};
// One slave port to the CGRA context memory
localparam EXT_XBAR_NSLAVE = 1;

Expand Down Expand Up @@ -49,4 +49,4 @@ package cgra_x_heep_pkg;
EXT_SYSTEM_NPERIPHERALS
) : 32'd1;

endpackage // cgra_x_heep_pkg
endpackage // heepsilon_pkg
64 changes: 33 additions & 31 deletions hw/rtl/cgra_x_heep_top.sv → hw/rtl/heepsilon_top.sv
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
// Solderpad Hardware License, Version 2.1, see LICENSE.md for details.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

module cgra_x_heep_top #(
module heepsilon_top #(
parameter COREV_PULP = 0,
parameter FPU = 0,
parameter ZFINX = 0
parameter ZFINX = 0,
parameter X_EXT = 0
) (
inout logic clk_i,
inout logic rst_ni,
Expand Down Expand Up @@ -48,17 +49,17 @@ module cgra_x_heep_top #(

import obi_pkg::*;
import reg_pkg::*;
import cgra_x_heep_pkg::*;
import heepsilon_pkg::*;

// External xbar master/slave and peripheral ports
obi_req_t ext_xbar_slave_req;
obi_resp_t ext_xbar_slave_resp;
reg_req_t ext_periph_slave_req;
reg_rsp_t ext_periph_slave_resp;
obi_req_t [cgra_x_heep_pkg::CGRA_XBAR_NMASTER-1:0] ext_master_req;
obi_req_t [cgra_x_heep_pkg::CGRA_XBAR_NMASTER-1:0] heep_slave_req;
obi_resp_t [cgra_x_heep_pkg::CGRA_XBAR_NMASTER-1:0] ext_master_resp;
obi_resp_t [cgra_x_heep_pkg::CGRA_XBAR_NMASTER-1:0] heep_slave_resp;
obi_req_t [heepsilon_pkg::CGRA_XBAR_NMASTER-1:0] ext_master_req;
obi_req_t [heepsilon_pkg::CGRA_XBAR_NMASTER-1:0] heep_slave_req;
obi_resp_t [heepsilon_pkg::CGRA_XBAR_NMASTER-1:0] ext_master_resp;
obi_resp_t [heepsilon_pkg::CGRA_XBAR_NMASTER-1:0] heep_slave_resp;
obi_req_t heep_core_instr_req;
obi_resp_t heep_core_instr_resp;
obi_req_t heep_core_data_req;
Expand All @@ -80,21 +81,21 @@ module cgra_x_heep_top #(
logic cgra_int;
logic cgra_enable;
logic cgra_logic_rst_n;
logic cgra_ram_banks_set_retentive;
logic cgra_ram_banks_set_retentive_n;

// External subsystems
logic external_subsystem_rst_n;
logic external_ram_banks_set_retentive;
logic external_ram_banks_set_retentive_n;
/* verilator lint_off unused */
logic external_subsystem_clkgate_en_n;
logic external_subsystem_powergate_switch;
logic external_subsystem_powergate_switch_ack;
logic external_subsystem_powergate_iso;
logic external_subsystem_powergate_switch_n;
logic external_subsystem_powergate_switch_ack_n;
logic external_subsystem_powergate_iso_n;

// CGRA logic clock gating unit enable (always-on in this case)
assign cgra_enable = 1'b1;
assign cgra_logic_rst_n = external_subsystem_rst_n;
assign cgra_ram_banks_set_retentive = external_ram_banks_set_retentive;
assign cgra_enable = 1'b1;
assign cgra_logic_rst_n = external_subsystem_rst_n;
assign cgra_ram_banks_set_retentive_n = external_ram_banks_set_retentive_n;

always_comb begin
// All interrupt lines set to zero by default
Expand All @@ -114,10 +115,10 @@ module cgra_x_heep_top #(
.EXT_XBAR_NMASTER(CGRA_XBAR_NMASTER),
.EXT_XBAR_NSLAVE (1)
) ext_bus_i (
.clk_i (clk_i),
.rst_ni (rst_ni),
.addr_map_i (EXT_XBAR_ADDR_RULES),
.default_idx_i ('0),
.clk_i (clk_i),
.rst_ni (rst_ni),
.addr_map_i (EXT_XBAR_ADDR_RULES),
.default_idx_i('0),

.heep_core_instr_req_i (heep_core_instr_req),
.heep_core_instr_resp_o (heep_core_instr_resp),
Expand All @@ -132,12 +133,12 @@ module cgra_x_heep_top #(
.heep_dma_addr_ch0_req_i (heep_dma_addr_ch0_req),
.heep_dma_addr_ch0_resp_o (heep_dma_addr_ch0_resp),

.ext_master_req_i (ext_master_req),
.ext_master_resp_o (ext_master_resp),
.heep_slave_req_o (heep_slave_req),
.heep_slave_resp_i (heep_slave_resp),
.ext_slave_req_o (ext_xbar_slave_req),
.ext_slave_resp_i (ext_xbar_slave_resp)
.ext_master_req_i (ext_master_req),
.ext_master_resp_o(ext_master_resp),
.heep_slave_req_o (heep_slave_req),
.heep_slave_resp_i(heep_slave_resp),
.ext_slave_req_o (ext_xbar_slave_req),
.ext_slave_resp_i (ext_xbar_slave_resp)
);

cgra_top_wrapper cgra_top_wrapper_i (
Expand All @@ -151,7 +152,7 @@ module cgra_x_heep_top #(
.reg_rsp_o(ext_periph_slave_resp),
.slave_req_i(ext_xbar_slave_req),
.slave_resp_o(ext_xbar_slave_resp),
.cmem_set_retentive_i(cgra_ram_banks_set_retentive),
.cmem_set_retentive_ni(cgra_ram_banks_set_retentive_n),
.cgra_int_o(cgra_int)
);

Expand All @@ -162,6 +163,7 @@ module cgra_x_heep_top #(
.COREV_PULP(COREV_PULP),
.FPU(FPU),
.ZFINX(ZFINX),
.X_EXT(X_EXT),
.EXT_XBAR_NMASTER(CGRA_XBAR_NMASTER)
) x_heep_system_i (
.clk_i,
Expand Down Expand Up @@ -252,12 +254,12 @@ module cgra_x_heep_top #(
.external_subsystem_clkgate_en_no(external_subsystem_clkgate_en_n),
.ext_peripheral_slave_req_o(ext_periph_slave_req),
.ext_peripheral_slave_resp_i(ext_periph_slave_resp),
.external_subsystem_powergate_switch_no(external_subsystem_powergate_switch),
.external_subsystem_powergate_switch_ack_ni(external_subsystem_powergate_switch_ack),
.external_subsystem_powergate_iso_no(external_subsystem_powergate_iso),
.external_subsystem_powergate_switch_no(external_subsystem_powergate_switch_n),
.external_subsystem_powergate_switch_ack_ni(external_subsystem_powergate_switch_ack_n),
.external_subsystem_powergate_iso_no(external_subsystem_powergate_iso_n),

.external_subsystem_rst_no(external_subsystem_rst_n),
.external_ram_banks_set_retentive_no(external_ram_banks_set_retentive)
.external_ram_banks_set_retentive_no(external_ram_banks_set_retentive_n)
);

endmodule // cgra_x_heep_pkg
endmodule // heepsilon_pkg
12 changes: 12 additions & 0 deletions hw/vendor/esl_epfl_cgra/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,15 @@
build
*_app
bitstream
*__pycache__*

# Generated hardware files
hw/rtl/cgra_pkg.sv
hw/rtl/cgra_reg_pkg.sv
hw/rtl/cgra_reg_top.sv
hw/rtl/peripheral_regs.sv
data/cgra_regs.hjson

# Generated software files
sw/cgra_regs.h
util/cgra_bitstream_gen.py
Loading

0 comments on commit b55b14f

Please sign in to comment.